Document for the C/C++ samples (#1442)

1. Document for the C/C++ samples. 2. Fix a null pointer errorin the imagenet sample.
2026-05-14 20:48:00 +00:00 · 2019-07-22 16:14:49 -07:00 · 2019-07-22 16:14:49 -07:00 · 9d67292c8c
commit 9d67292c8c
parent 29de25c5a7
11 changed files with 184 additions and 35 deletions
--- a/samples/c_cxx/CMakeLists.txt
+++ b/samples/c_cxx/CMakeLists.txt
@ -12,12 +12,47 @@ else()
 string(APPEND CMAKE_C_FLAGS " -Wall -Wextra")
 endif()

-message("${CMAKE_SYSTEM_INCLUDE_PATH}")
+#onnxruntime providers
+option(onnxruntime_USE_CUDA "Build with CUDA support" OFF)
+option(onnxruntime_USE_OPENVINO "Build with OpenVINO support" OFF)
+option(onnxruntime_USE_NNAPI "Build with DNNLibrary for Android NNAPI support" OFF)
+option(onnxruntime_USE_MKLDNN "Build with MKL-DNN support" OFF)
+option(onnxruntime_USE_NGRAPH "Build with nGraph support" OFF)
+option(onnxruntime_USE_NUPHAR "Build with Nuphar" OFF)
+option(onnxruntime_USE_BRAINSLICE "Build with BrainSlice" OFF)
+option(onnxruntime_USE_TENSORRT "Build with TensorRT support" OFF)
+
+#if JPEG lib is available, we'll use it for image decoding, otherwise we'll use WIC
 find_package(JPEG)
 find_package(PNG)

-
-add_definitions(-DUSE_CUDA)
+if(onnxruntime_USE_CUDA)
+  add_definitions(-DUSE_CUDA)
+endif()
+if(onnxruntime_USE_OPENVINO)
+  add_definitions(-DUSE_OPENVINO)
+endif()
+if(onnxruntime_USE_NNAPI)
+  add_definitions(-DUSE_NNAPI)
+endif()
+if(onnxruntime_USE_NNAPI)
+  add_definitions(-DUSE_NNAPI)
+endif()
+if(onnxruntime_USE_MKLDNN)
+  add_definitions(-DUSE_MKLDNN)
+endif()
+if(onnxruntime_USE_NGRAPH)
+  add_definitions(-DUSE_NGRAPH)
+endif()
+if(onnxruntime_USE_NUPHAR)
+  add_definitions(-DUSE_NUPHAR)
+endif()
+if(onnxruntime_USE_BRAINSLICE)
+  add_definitions(-DUSE_BRAINSLICE)
+endif()
+if(onnxruntime_USE_TENSORRT)
+  add_definitions(-DUSE_TENSORRT)
+endif()

 #TODO: remove this part(need to talk to Ryan and find a solution for this)
 if(UNIX)
--- a/samples/c_cxx/README.md
+++ b/samples/c_cxx/README.md
@ -0,0 +1,35 @@
+This directory contains a few C/C++ sample applications for demoing onnxruntime usage:
+
+1. fns_candy_style_transfer: A C application that uses the FNS-Candy style transfer model to re-style images. 
+2. MNIST: A windows GUI application for doing handwriting recognition
+3. imagenet: An end-to-end sample for the [ImageNet Large Scale Visual Recognition Challenge 2012](http://www.image-net.org/challenges/LSVRC/2012/)
+
+# How to build
+
+## Prerequisites
+1. Visual Studio 2015/2017/2019
+2. cmake(version >=3.13)
+
+## Install ONNX Runtime
+You may either get a prebuit onnxruntime from nuget.org, or build it from source by following the [BUILD.md document](../../../BUILD.md). 
+If you build it by yourself, you must append the "--build_shared_lib" flag to your build command. Like:
+
+```
+build.bat --config RelWithDebInfo --build_shared_lib --parallel
+```
+When the build is done, run Visual Studio as administrator and open the onnxruntime.sln file in your build directory.
+![vs.png](vs.png)
+
+When the solution is loaded, change the build configuration to "RelWithDebInfo"(which must match your previous build command), then select the "INSTALL" project, and build it.  It will install your onnxruntime to  "C:\Program Files (x86)\onnxruntime"
+
+## Build the samples
+Open cmd.exe, change your current directory to samples\c_cxx, then run
+```bat
+mkdir build
+cmake .. -A x64 -T host=x64
+```
+You may append "-Donnxruntime_USE_CUDA=ON" to the last command args if your onnxruntime binary was built with CUDA support.
+
+Then you can open the onnxruntime_samples.sln file in the "build" directory and build the solution.
+
+
--- a/samples/c_cxx/imagenet/README.md
+++ b/samples/c_cxx/imagenet/README.md
@ -0,0 +1,66 @@
+# Overview
+
+
+
+![taskflow](taskflow.png)
+
+WARNING: If you want to train the model by yourself, you need at least 500GB disk space and a powerful NVIDIA GPU.
+
+# Install tensorflow
+Install Python 3.x from [python.org](https://www.python.org/), then execute
+```
+pip install --upgrade tensorflow
+```
+For more information, see [Install Tensorflow](https://www.tensorflow.org/install)
+
+# Get the Imagenet dataset
+We need the [ILSVRC-2012-CLS](http://www.image-net.org/challenges/LSVRC/2012/) image classification dataset from http://www.image-net.org/.
+
+If you're going to train the model by yourself, then you need the full dataset, which is about 500GB. Otherwise, you only need the 
+validation data set, which is just about 3GB.
+
+For how to get the data, see [ImageNet Download faq](http://image-net.org/download-faq). Once you get an account, visit http://www.image-net.org/download-images. You will find "Download links to ILSVRC2012 image data" on that page
+
+And also, please download the "[imagenet_lsvrc_2015_synsets.txt](https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_lsvrc_2015_synsets.txt)" and "[imagenet_2012_validation_synset_labels.txt](https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_2012_validation_synset_labels.txt)" from tensorflow models repo.
+
+# Get the model
+Please check [https://github.com/tensorflow/models/tree/master/research/slim/](https://github.com/tensorflow/models/tree/master/research/slim/).
+You may either train the model by yourself, or just download a pretrained model provided by Google. 
+If you don't know which one to download and try, we suggest you choose the [Inception V4](http://download.tensorflow.org/models/inception_v4_2016_09_09.tar.gz) model as a starting point.
+
+After downloading, please uncompress it.
+```
+tar -zxvf inception_v4_2016_09_09.tar.gz
+```
+
+The [Inception V4] zip file only contains a single checkpoint file: inception_v4.ckpt. It can't be directly used for inferencing. 
+You need to combine the network definition and the checkpoint. Please follow the steps below:
+
+1. Export the graph
+Create an new folder. At there, execute
+```
+git clone https://github.com/tensorflow/models .
+python research\slim\export_inference_graph.py --model_name=inception_v4 --output_file=grpah.pb
+```
+
+2. Freeze the graph
+Run
+```
+freeze_graph.exe --input_graph=graph.pb --input_checkpoint=inception_v4.ckpt --output_graph=inception_v4.pb --output_node_names=InceptionV4/Logits/Predictions --input_binary=true
+```
+
+# Convert the model to ONNX
+
+```
+pip install --upgrade tf2onnx 
+python -m tf2onnx.convert --input inception_v4.pb --inputs input:0 --outputs InceptionV4/Logits/Predictions:0 --opset 10 --output inception_v4.onnx
+```
+
+# Run the inferencing 
+In your build dir of onnxruntime_samples, search for "image_classifier.exe" and run 
+```
+image_classifier.exe C:\tools\imagnet_validation_data inception_v4.onnx imagenet_lsvrc_2015_synsets.txt imagenet_2012_validation_synset_labels.txt 32
+```
+Please replace the file names with the corresponding file paths.
+
+The last parameter is batch size, you may need to adjust it according to your GPU memory size.
--- a/samples/c_cxx/imagenet/async_ring_buffer.h
+++ b/samples/c_cxx/imagenet/async_ring_buffer.h
@ -18,14 +18,17 @@ class AsyncRingBuffer {
    (*(RunnableTask*)data)(pci);
  }

-  template <typename T=float>
+  template <typename T = float>
  static size_t CalcItemSize(const std::vector<int64_t>& tensor_shape) {
    int64_t r = 1;
    for (int64_t i : tensor_shape) r *= i;
    return static_cast<size_t>(r) * sizeof(T);
  }

-  enum class BufferState { EMPTY, FILLING, FULL, TAKEN };
+  enum class BufferState { EMPTY,
+                           FILLING,
+                           FULL,
+                           TAKEN };
  const size_t batch_size_;
  using InputType = typename InputIterator::value_type;
  DataProcessing* p_;
@ -75,8 +78,8 @@ class AsyncRingBuffer {
          input_task_id_for_buffers_(capacity),
          buffer_(item_size_in_bytes * capacity) {}

-    size_t GetId(const uint8_t* p) const { return (p - buffer_.data()) / item_size_in_bytes_; }
-
+    size_t GetId(_In_ const uint8_t* p) const { return (p - buffer_.data()) / item_size_in_bytes_; }
+    size_t GetItemSizeInBytes() const { return item_size_in_bytes_; }
    bool CompareAndSet(size_t i, BufferState old, BufferState new_state) {
      if (buffer_state[i] != old) return false;
      buffer_state[i] = new_state;
@ -105,7 +108,7 @@ class AsyncRingBuffer {
      return true;
    }

-    bool TakeAllRemain(_Out_ uint8_t** begin, std::vector<InputType>& task_id_list) {
+    _Success_(return ) bool TakeAllRemain(_Out_ uint8_t** begin, std::vector<InputType>& task_id_list) {
      auto iter =
          std::find_if(buffer_state.begin(), buffer_state.end(), [](BufferState s) { return s == BufferState::FULL; });
      if (iter == buffer_state.end()) return false;
@ -175,9 +178,9 @@ class AsyncRingBuffer {
      std::vector<InputType> task_id_list;
      buffer_id = tensor_id * batch_size_;
      if (buffer_.TakeRange(buffer_id, buffer_id + batch_size_, task_id_list)) {
-        queue_.Put(tensor_id, [&task_id_list](QueueItem& i){
-			i.taskid_list = task_id_list;
-		});
+        queue_.Put(tensor_id, [&task_id_list](QueueItem& i) {
+          i.taskid_list = task_id_list;
+        });
        input_tensor = queue_.Take();
      }
    }
@ -203,7 +206,7 @@ class AsyncRingBuffer {
    }
  }

-  void Fail(_Inout_opt_ ONNXRUNTIME_CALLBACK_INSTANCE pci, const char* errmsg) {    
+  void Fail(_Inout_opt_ ONNXRUNTIME_CALLBACK_INSTANCE pci, const char* errmsg) {
    threadpool_.SetFailBit(pci, errmsg);
  }

@ -221,9 +224,9 @@ class AsyncRingBuffer {
        input_end_(input_end) {
    OrtAllocatorInfo* allocator_info;
    ORT_THROW_ON_ERROR(OrtCreateCpuAllocatorInfo(OrtArenaAllocator, OrtMemTypeDefault, &allocator_info));
-    uint8_t* output_data = buffer_.Begin();     
+    uint8_t* output_data = buffer_.Begin();
    std::vector<int64_t> input_shape = p_->GetOutputShape(batch_size_);
-	size_t off = CalcItemSize(input_shape);
+    size_t off = CalcItemSize(input_shape);
    queue_.Init([allocator_info, off, &output_data, &input_shape](QueueItem& e) {
      ORT_THROW_ON_ERROR(OrtCreateTensorWithDataAsOrtValue(allocator_info, output_data, off, input_shape.data(),
                                                           input_shape.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
@ -255,7 +258,6 @@ class AsyncRingBuffer {
    OrtReleaseValue(input_tensor);
  }

- 
  /**
   * call this function when a download task is just finished or any buffer became FREE.
   * \return 0 EOF. No more download task to schedule
@ -276,15 +278,12 @@ class AsyncRingBuffer {
        uint8_t* d = dest;
        delete this;
        try {
-          (*r->p_)(&s, d);
+          (*r->p_)(&s, d, r->buffer_.GetItemSizeInBytes());
          r->OnDownloadFinished(pci, d);
        } catch (const std::exception& ex) {
-		  fprintf(stderr, "%s\n", ex.what());
+          fprintf(stderr, "%s\n", ex.what());
          r->Fail(pci, ex.what());
-#ifdef _WIN32
-        
-#endif
-		}
+        }
      }
    };

--- a/samples/c_cxx/imagenet/data_processing.h
+++ b/samples/c_cxx/imagenet/data_processing.h
@ -3,10 +3,11 @@

 #pragma once
 #include <vector>
+#include <sal.h>

 class DataProcessing {
 public:
-  virtual void operator()(const void* input_data, void* output_data) const = 0;
+  virtual void operator()(_In_ const void* input_data, _Out_writes_bytes_all_(output_len) void* output_data, size_t output_len) const = 0;
  virtual std::vector<int64_t> GetOutputShape(size_t batch_size) const = 0;
  virtual ~DataProcessing() = default;
 };
--- a/samples/c_cxx/imagenet/image_loader.cc
+++ b/samples/c_cxx/imagenet/image_loader.cc
@ -165,22 +165,25 @@ InceptionPreprocessing::InceptionPreprocessing(int out_height, int out_width, in
  }
 }

-
 // see: https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/inception_preprocessing.py
 // function: preprocess_for_eval
-void InceptionPreprocessing::operator()(const void* input_data, void* output_data) const {
+void InceptionPreprocessing::operator()(_In_ const void* input_data,
+                                        _Out_writes_bytes_all_(output_len) void* output_data, size_t output_len) const {
  const TCharString& file_name = *reinterpret_cast<const TCharString*>(input_data);
-
+  size_t output_count = channels_ * out_height_ * out_width_;
+  if (output_len < output_count * sizeof(float)) {
+    throw std::runtime_error("buffer is too small");
+  }
  float* float_file_data_pointer;
  int bbox_h_size, bbox_w_size;
-  int channels = 3;
-  ORT_THROW_ON_ERROR(LoadImageFromFileAndCrop(nullptr, file_name.c_str(), central_fraction_, &float_file_data_pointer,
-                                              &bbox_w_size, &bbox_h_size));
+  ORT_THROW_ON_ERROR(LoadImageFromFileAndCrop(image_loader_, file_name.c_str(), central_fraction_,
+                                              &float_file_data_pointer, &bbox_w_size, &bbox_h_size));
  auto output_data_ = reinterpret_cast<float*>(output_data);
  ResizeImageInMemory(float_file_data_pointer, output_data_, bbox_h_size, bbox_w_size, out_height_, out_width_,
-                      channels);
-  size_t output_data_len = channels_ * out_height_ * out_width_;
-  for (size_t i = 0; i != output_data_len; ++i) {
+                      channels_);
+  free(float_file_data_pointer);
+
+  for (size_t i = 0; i != output_count; ++i) {
    output_data_[i] = (output_data_[i] - 0.5f) * 2.f;
  }
 }
--- a/samples/c_cxx/imagenet/image_loader.h
+++ b/samples/c_cxx/imagenet/image_loader.h
@ -39,7 +39,7 @@ class InceptionPreprocessing : public DataProcessing {
 public:
  InceptionPreprocessing(int out_height, int out_width, int channels);

-  void operator()(const void* input_data, void* output_data) const override;
+  void operator()(_In_ const void* input_data, _Out_writes_bytes_all_(output_len) void* output_data, size_t output_len) const override;

  // output data from this class is in NWHC format
  std::vector<int64_t> GetOutputShape(size_t batch_size) const override {
--- a/samples/c_cxx/imagenet/image_loader_libjpeg.cc
+++ b/samples/c_cxx/imagenet/image_loader_libjpeg.cc
@ -14,7 +14,7 @@ bool CreateImageLoader(void** out) {

 void ReleaseImageLoader(void*) {}

-OrtStatus* LoadImageFromFileAndCrop(void* , const ORTCHAR_T* filename, double central_crop_fraction, float** out,
+OrtStatus* LoadImageFromFileAndCrop(void*, const ORTCHAR_T* filename, double central_crop_fraction, float** out,
                                    int* out_width, int* out_height) {
  const int channels_ = 3;
  UncompressFlags flags;
@ -59,6 +59,9 @@ OrtStatus* LoadImageFromFileAndCrop(void* , const ORTCHAR_T* filename, double ce
  int bbox_w_size = width - bbox_w_start * 2;
  const size_t ele_count = bbox_h_size * bbox_w_size * channels;
  float* float_file_data = (float*)malloc(ele_count * sizeof(float));
+  if (float_file_data == nullptr) {
+    return OrtCreateStatus(ORT_FAIL, "out of memory");
+  }

  {
    auto p = decompressed_image.get() + (bbox_h_start * width + bbox_w_start) * channels;
--- a/samples/c_cxx/imagenet/main.cc
+++ b/samples/c_cxx/imagenet/main.cc
@ -26,6 +26,7 @@
 #ifdef _WIN32
 #include <atlbase.h>
 #endif
+using namespace std::chrono;

 class Validator : public OutputCollector<TCharString> {
 private:
@ -85,6 +86,7 @@ class Validator : public OutputCollector<TCharString> {
  char* output_name_ = nullptr;
  OrtEnv* const env_;
  const TCharString model_path_;
+  system_clock::time_point start_time_;

 public:
  int GetImageSize() const { return image_size_; }
@ -152,6 +154,7 @@ class Validator : public OutputCollector<TCharString> {
    }

    image_size_ = static_cast<int>(dims[1]);
+    start_time_ = system_clock::now();
  }

  void operator()(const std::vector<TCharString>& task_id_list, const OrtValue* input_tensor) override {
@ -174,8 +177,12 @@ class Validator : public OutputCollector<TCharString> {
        }
        probs = end;
      }
-      finished_count_ += static_cast<int>(remain);
-      printf("%d\n", finished_count_.load());
+      size_t finished = finished_count_ += static_cast<int>(remain);
+      float progress = static_cast<float>(finished) / validation_data_.size();
+      auto elapsed = system_clock::now() - start_time_;
+      auto eta = progress > 0 ? duration_cast<minutes>(elapsed * (1 - progress) / progress).count() : 9999999;
+      float accuracy = finished > 0 ? top_1_correct_count_ / static_cast<float>(finished) : 0;
+      printf("accuracy = %.2f, progress %.2f%%, expect to be finished in %d minutes\n", accuracy, progress * 100, eta);
      OrtReleaseValue(output_tensor);
    }
  }
--- a/samples/c_cxx/imagenet/taskflow.png
+++ b/samples/c_cxx/imagenet/taskflow.png
--- a/samples/c_cxx/vs.png
+++ b/samples/c_cxx/vs.png