Initial PR for VSINPU execution provider (#20903)

### Description
<!-- Describe your changes. -->
-It is an initial PR for VSINPU execution provider



### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
- For support VeriSilicon hardware
- TIM-VX(Tensor Interface Module)
(https://github.com/VeriSilicon/TIM-VX) is an integrated software
solution by Verisilicon for our hardware(A311D/i.MX 8M Plus etc.)
design, it is easy to use Verisilicon’s hardware by simply connecting
onnxruntime with the TIM-VX API by this VSINPU execution provider.
This commit is contained in:
Chen Feiyue 2024-06-29 12:48:34 +08:00 committed by GitHub
parent 9007ede102
commit 56b36a58ba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
65 changed files with 5096 additions and 3 deletions

View file

@ -101,6 +101,7 @@ option(onnxruntime_BUILD_OBJC "Build Objective-C library" OFF)
option(onnxruntime_USE_PREINSTALLED_EIGEN "Use pre-installed EIGEN. Need to provide eigen_SOURCE_PATH if turn this on." OFF)
option(onnxruntime_BUILD_BENCHMARKS "Build ONNXRuntime micro-benchmarks" OFF)
option(onnxruntime_USE_LLVM "Build TVM with LLVM" OFF)
option(onnxruntime_USE_VSINPU "Build with VSINPU support" OFF)
cmake_dependent_option(onnxruntime_USE_FLASH_ATTENTION "Build flash attention kernel for scaled dot product attention" ON "onnxruntime_USE_CUDA" OFF)
option(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON)
@ -797,6 +798,11 @@ if (onnxruntime_USE_RKNPU)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_RKNPU=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES rknpu)
endif()
if (onnxruntime_USE_VSINPU)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_VSINPU=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_VSINPU=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES vsinpu)
endif()
if (onnxruntime_USE_NNAPI_BUILTIN)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_NNAPI=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_NNAPI_BUILTIN=1)

View file

@ -189,6 +189,7 @@ set(onnxruntime_INTERNAL_LIBRARIES
${PROVIDERS_SNPE}
${PROVIDERS_TVM}
${PROVIDERS_RKNPU}
${PROVIDERS_VSINPU}
${PROVIDERS_XNNPACK}
${PROVIDERS_WEBNN}
${PROVIDERS_AZURE}

View file

@ -80,6 +80,9 @@ endif()
if(onnxruntime_USE_RKNPU)
set(PROVIDERS_RKNPU onnxruntime_providers_rknpu)
endif()
if(onnxruntime_USE_VSINPU)
set(PROVIDERS_VSINPU onnxruntime_providers_vsinpu)
endif()
if(onnxruntime_USE_DML)
set(PROVIDERS_DML onnxruntime_providers_dml)
endif()
@ -188,6 +191,35 @@ if (onnxruntime_USE_TVM)
include(onnxruntime_providers_tvm.cmake)
endif()
if (onnxruntime_USE_VSINPU)
add_definitions(-DUSE_VSINPU=1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
file(GLOB_RECURSE onnxruntime_providers_vsinpu_srcs
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/builders/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/builders/*.cc"
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/*.cc"
"${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h"
"${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
)
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_vsinpu_srcs})
add_library(onnxruntime_providers_vsinpu ${onnxruntime_providers_vsinpu_srcs})
onnxruntime_add_include_to_target(onnxruntime_providers_vsinpu
onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf-lite flatbuffers Boost::mp11
safeint_interface nsync::nsync_cpp)
add_dependencies(onnxruntime_providers_vsinpu ${onnxruntime_EXTERNAL_DEPENDENCIES})
set_target_properties(onnxruntime_providers_vsinpu PROPERTIES FOLDER "ONNXRuntime" LINKER_LANGUAGE CXX)
target_include_directories(onnxruntime_providers_vsinpu PRIVATE ${ONNXRUNTIME_ROOT} $ENV{TIM_VX_INSTALL}/include)
find_library(TIMVX_LIBRARY NAMES tim-vx PATHS $ENV{TIM_VX_INSTALL}/lib NO_DEFAULT_PATH)
if(TIMVX_LIBRARY)
target_link_libraries(onnxruntime_providers_vsinpu PRIVATE ${TIMVX_LIBRARY})
else()
message(FATAL_ERROR "Cannot find TIM-VX library!")
endif()
endif()
if (onnxruntime_USE_XNNPACK)
include(onnxruntime_providers_xnnpack.cmake)
endif()

View file

@ -546,6 +546,10 @@ if(onnxruntime_USE_NNAPI_BUILTIN)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_nnapi)
endif()
if(onnxruntime_USE_VSINPU)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_vsinpu)
endif()
if(onnxruntime_USE_JSEP)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_js)
endif()
@ -589,6 +593,7 @@ set(ONNXRUNTIME_TEST_LIBS
${onnxruntime_libs}
# CUDA, ROCM, TENSORRT, MIGRAPHX, DNNL, and OpenVINO are dynamically loaded at runtime
${PROVIDERS_NNAPI}
${PROVIDERS_VSINPU}
${PROVIDERS_JS}
${PROVIDERS_QNN}
${PROVIDERS_SNPE}

View file

@ -52,6 +52,7 @@ constexpr const char* kXnnpackExecutionProvider = "XnnpackExecutionProvider";
constexpr const char* kWebNNExecutionProvider = "WebNNExecutionProvider";
constexpr const char* kCannExecutionProvider = "CANNExecutionProvider";
constexpr const char* kAzureExecutionProvider = "AzureExecutionProvider";
constexpr const char* kVSINPUExecutionProvider = "VSINPUExecutionProvider";
constexpr const char* kExecutionProviderSharedLibraryPath = "shared_lib_path";
constexpr const char* kExecutionProviderSharedLibraryEntry = "provider_factory_entry_point";

View file

@ -0,0 +1,34 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include "onnxruntime_c_api.h"
#ifdef __cplusplus
extern "C" {
#endif
ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_VSINPU, _In_ OrtSessionOptions* options);
#ifdef __cplusplus
}
#endif

View file

@ -285,7 +285,7 @@ void NodeUnit::InitForSingleNode() {
const auto& output_defs = target_node_.OutputDefs();
const auto& node_attrs = target_node_.GetAttributes();
auto qlinear_type = GetQLinearOpType(target_node_);
if (qlinear_type == QLinearOpType::Unknown || IsVariadicQLinearOp(qlinear_type)) { // TODO, add variadic support
if (qlinear_type == QLinearOpType::Unknown) {
// Not a Qlinear op, add all inputs / outputs
auto add_all_io = [](std::vector<NodeUnitIODef>& defs,
const ConstPointerContainer<std::vector<NodeArg*>>& node_defs) {
@ -351,6 +351,13 @@ void NodeUnit::InitForSingleNode() {
NodeUnitIODef::QuantParam{*input_defs[1],
input_defs.size() == 3 ? input_defs[2] : nullptr,
axis}});
} else if (IsVariadicQLinearOp(qlinear_type)) {
size_t input_num = (input_defs.size() - 2) / 3;
for (size_t i = 0; i < input_num; i++) {
inputs_.push_back(NodeUnitIODef{*input_defs[3 * i + 2], NodeUnitIODef::QuantParam{*input_defs[3 * i + 3],
input_defs[3 * i + 4]}});
}
outputs_.push_back(NodeUnitIODef{*output_defs[0], NodeUnitIODef::QuantParam{*input_defs[0], input_defs[1]}});
} else {
ORT_THROW("The QLinear op [", static_cast<uint8_t>(qlinear_type), "] is not supported");
}

View file

@ -61,6 +61,7 @@ bool ProviderIsCpuBased(const std::string& provider_type) {
provider_type == onnxruntime::kVitisAIExecutionProvider ||
provider_type == onnxruntime::kOpenVINOExecutionProvider ||
provider_type == onnxruntime::kNnapiExecutionProvider ||
provider_type == onnxruntime::kVSINPUExecutionProvider ||
provider_type == onnxruntime::kAclExecutionProvider ||
provider_type == onnxruntime::kArmNNExecutionProvider ||
provider_type == onnxruntime::kRknpuExecutionProvider ||

View file

@ -98,6 +98,14 @@ constexpr ProviderInfo kProvidersInPriorityOrder[] =
true,
#else
false,
#endif
},
{
kVSINPUExecutionProvider,
#ifdef USE_VSINPU
true,
#else
false,
#endif
},
{

View file

@ -46,6 +46,10 @@
#include "core/providers/nnapi/nnapi_provider_factory_creator.h"
#endif
#if defined(USE_VSINPU)
#include "core/providers/vsinpu/vsinpu_provider_factory_creator.h"
#endif
#if defined(USE_JSEP)
#include "core/providers/js/js_provider_factory_creator.h"
#endif

View file

@ -0,0 +1,130 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/shared/utils/utils.h"
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class ReluOpBuilder : public BaseOpBuilder {
public:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Relu Activation.";
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Relu>();
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
class SigmoidOpBuilder : public BaseOpBuilder {
public:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Sigmoid Activation.";
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Sigmoid>();
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
class TanhOpBuilder : public BaseOpBuilder {
public:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Tanh activation.";
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Tanh>();
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
class LeakyReluOpBuilder : public BaseOpBuilder {
public:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating LeakyRelu activation.";
const auto& node = node_unit.GetNode();
NodeAttrHelper helper(node);
auto alpha = helper.Get("alpha", 1.0f);
auto op =
graph_ep->GetGraph()->CreateOperation<tim::vx::ops::LeakyRelu>(alpha);
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
class EluOpBuilder : public BaseOpBuilder {
public:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Elu activation.";
const auto& node = node_unit.GetNode();
NodeAttrHelper helper(node);
auto alpha = helper.Get("alpha", 1.0f);
auto op =
graph_ep->GetGraph()->CreateOperation<tim::vx::ops::LeakyRelu>(alpha);
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
class HardSigmoidOpBuilder : public BaseOpBuilder {
public:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating HardSigmoid activation.";
const auto& node = node_unit.GetNode();
NodeAttrHelper helper(node);
auto alpha = helper.Get("alpha", 1.0f);
auto beta = helper.Get("beta", 1.0f);
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::HardSigmoid>(
alpha, beta);
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,205 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <string>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
bool BaseOpBuilder::IsSupported(const onnxruntime::GraphViewer& graph_viewer,
const NodeUnit& node_unit) const {
auto initializers = graph_viewer.GetAllInitializedTensors();
if (!HasSupportedOpSet(node_unit)) {
return false;
}
if (!HasSupportedInputOutputs(initializers, node_unit)) {
return false;
}
return IsOpSupported(graph_viewer, &node_unit.GetNode());
}
bool BaseOpBuilder::HasSupportedInputOutputs(const InitializedTensorSet& initializers,
const NodeUnit& node_unit) const {
// We do not support unknown(null) input shape
auto has_supported_shape = [](const NodeArg& node_arg, const std::string& name, const std::string& op_type) {
const auto* shape_proto = node_arg.Shape();
if (!shape_proto) {
LOGS_DEFAULT(WARNING) << "Node [" << name << "] type [" << op_type
<< "] Input [" << node_arg.Name() << "] has no shape";
return false;
}
// We do not support dynamic shape input yet, but resize op's second input can be empty
for (const auto& dim : shape_proto->dim()) {
if (!dim.has_dim_value()) {
LOGS_DEFAULT(WARNING) << "Dynamic shape is not supported for now, for input:" << node_arg.Name();
return false;
}
if (dim.dim_value() == 0 && op_type != "Resize") {
LOGS_DEFAULT(WARNING) << "Zero in shape is not supported for now, for input:" << node_arg.Name();
return false;
}
}
return true;
};
auto has_initialized_quant_param = [](const NodeArg& arg, const InitializedTensorSet& initializers) {
auto it = initializers.find(arg.Name());
if (it == initializers.end()) {
LOGS_DEFAULT(WARNING) << "The quantization param must be an initializer tensor";
return false;
}
return true;
};
for (const auto& input : node_unit.Inputs()) {
if (!input.node_arg.Exists()) {
continue;
}
if (!has_supported_shape(input.node_arg, node_unit.Name(), node_unit.OpType()))
return false;
if (input.quant_param.has_value()) {
if (!has_supported_shape(input.quant_param->scale, node_unit.Name(), node_unit.OpType()))
return false;
if (!has_initialized_quant_param(input.quant_param->scale, initializers))
return false;
// zero point is optional
if (input.quant_param->zero_point) {
if (!has_supported_shape(*input.quant_param->zero_point, node_unit.Name(), node_unit.OpType()))
return false;
if (!has_initialized_quant_param(*input.quant_param->zero_point, initializers))
return false;
if (input.quant_param->zero_point->Type() != input.node_arg.Type()) {
LOGS_DEFAULT(ERROR) << "Invalid input type because the data type mismatch with its' quant param type.";
return false;
}
}
}
}
for (const auto& output : node_unit.Outputs()) {
if (output.quant_param.has_value()) {
if (!has_supported_shape(output.quant_param->scale, node_unit.Name(), node_unit.OpType()))
return false;
if (!has_initialized_quant_param(output.quant_param->scale, initializers))
return false;
// zero point is optional
if (output.quant_param->zero_point) {
if (!has_supported_shape(*output.quant_param->zero_point, node_unit.Name(), node_unit.OpType()))
return false;
if (!has_initialized_quant_param(*output.quant_param->zero_point, initializers))
return false;
}
}
}
return HasSupportedInputOutputsImpl(initializers, node_unit);
}
bool BaseOpBuilder::HasSupportedInputOutputsImpl(
const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit) const {
// Check input/output data type, int64 is generally unsupported
// specific op builder can override this if the int64 input corresponds to VSINPU param
for (const auto& input : node_unit.Inputs()) {
auto input_type = input.node_arg.Type();
if (*input_type == "tensor(int64)" || !util::IsTypeSupported(&input.node_arg)) {
LOGS_DEFAULT(WARNING) << node_unit.OpType() << " has unsupported input type : "
<< *input_type;
return false;
}
}
for (const auto& output : node_unit.Outputs()) {
auto output_type = output.node_arg.Type();
if (*output_type == "tensor(int64)" || !util::IsTypeSupported(&output.node_arg)) {
LOGS_DEFAULT(WARNING) << node_unit.OpType() << " has unsupported output type : "
<< *output_type;
return false;
}
}
return true;
}
bool BaseOpBuilder::HasSupportedOpSet(const NodeUnit& node_unit) const {
auto since_version = node_unit.SinceVersion();
if (since_version < GetMinSupportedOpSet(node_unit) || since_version > GetMaxSupportedOpSet(node_unit)) {
LOGS_DEFAULT(VERBOSE) << node_unit.OpType() << " opset [" << since_version
<< "] is only supported for opset ["
<< GetMinSupportedOpSet(node_unit) << ", "
<< GetMaxSupportedOpSet(node_unit) << "]";
return false;
}
return true;
}
bool BaseOpBuilder::BuildOp(vsi::npu::GraphEP* graph_ep,
const onnxruntime::GraphViewer& graph_viewer,
const NodeUnit& node_unit) {
std::vector<std::shared_ptr<tim::vx::Tensor>> inputs;
std::vector<NodeUnitIODef> input_defs = node_unit.Inputs();
std::vector<NodeUnitIODef> output_defs = node_unit.Outputs();
for (const auto input_def : input_defs) {
auto it = std::find_if(
graph_ep->GetGraphInputs().begin(), graph_ep->GetGraphInputs().end(),
[input_def](const std::shared_ptr<GraphIOInfo>& info) {
return info->name == input_def.node_arg.Name();
});
tim::vx::TensorAttribute attr;
if (graph_viewer.IsConstantInitializer(input_def.node_arg.Name(), true)) {
attr = tim::vx::TensorAttribute::CONSTANT;
} else if (it == graph_ep->GetGraphInputs().end()) {
attr = tim::vx::TensorAttribute::TRANSIENT;
} else {
attr = tim::vx::TensorAttribute::INPUT;
}
auto tensor = graph_ep->MapTIMVXTensor(graph_ep->GetGraph(), input_def, node_unit,
&graph_viewer, attr);
inputs.push_back(tensor);
}
std::vector<std::shared_ptr<tim::vx::Tensor>> outputs;
for (auto output_def : output_defs) {
auto it = std::find_if(
graph_ep->GetGraphOutputs().begin(), graph_ep->GetGraphOutputs().end(),
[output_def](const std::shared_ptr<GraphIOInfo>& info) {
return info->name == output_def.node_arg.Name();
});
tim::vx::TensorAttribute attribute =
it == graph_ep->GetGraphOutputs().end()
? tim::vx::TensorAttribute::TRANSIENT
: tim::vx::TensorAttribute::OUTPUT;
auto tensor = graph_ep->MapTIMVXTensor(graph_ep->GetGraph(), output_def, node_unit,
&graph_viewer, attribute);
outputs.push_back(tensor);
}
return HandleBuildOp(graph_ep, inputs, outputs, node_unit);
}
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,75 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#pragma once
#include <memory>
#include <vector>
#include "core/providers/vsinpu/builders/op_builder.h"
#include "core/providers/vsinpu/vsinpu_ep_graph.h"
#include "core/providers/vsinpu/vsinpu_util.h"
#include "tim/vx/operation.h"
#include "tim/vx/ops.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class BaseOpBuilder : public IOpBuilder {
public:
virtual ~BaseOpBuilder() = default;
bool IsSupported(const onnxruntime::GraphViewer& graph_viewer,
const NodeUnit& node_unit) const override;
bool BuildOp(vsi::npu::GraphEP* graph_ep,
const onnxruntime::GraphViewer& graph_viewer, const NodeUnit& node_unit);
virtual bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const {
return true;
}
virtual bool IsQuantizedOp(const NodeUnit& /* node_unit */) const { return false; }
virtual int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const { return 1; }
virtual int GetMaxSupportedOpSet(const NodeUnit& /* node_unit */) const { return 22; }
virtual bool HasSupportedInputOutputsImpl(
const InitializedTensorSet& initializers, const NodeUnit& node_unit) const;
// TODO(cfy): Check if this node_unit's type is supported
virtual bool IsNodeUnitTypeSupported(const NodeUnit& node_unit) const { return true; }
virtual bool HandleBuildOp(
vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) {
return true;
}
private:
bool HasSupportedOpSet(const NodeUnit& node_unit) const;
bool HasSupportedInputOutputs(const InitializedTensorSet& initializers,
const NodeUnit& node_unit) const;
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,47 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/shared/utils/utils.h"
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class CastOpBuilder : public BaseOpBuilder {
protected:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs, const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Cast Op.";
NodeAttrHelper helper(node_unit.GetNode());
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::DataConvert>();
(*op).BindInput(inputs[0]).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,115 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <limits>
#include <utility>
#include "core/providers/vsinpu/builders/impl/clip_op_builder.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
namespace clip_internal {
template <typename T>
struct LowMax {
constexpr static T low() {
return std::numeric_limits<T>::lowest();
}
constexpr static T max() {
return std::numeric_limits<T>::max();
}
};
} // namespace clip_internal
template <typename T>
struct ClipOpBuilder::ClipImpl {
ClipImpl(vsi::npu::GraphEP* graph_ep, std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs) {
T min_default = clip_internal::LowMax<T>::low();
T max_default = clip_internal::LowMax<T>::max();
T* min_data = &min_default;
T* max_data = &max_default;
std::shared_ptr<tim::vx::Tensor> min_tensor = nullptr;
std::shared_ptr<tim::vx::Tensor> max_tensor = nullptr;
if (inputs.size() > 1) {
min_tensor = inputs[1];
if (inputs.size() > 2) {
max_tensor = inputs[2];
}
}
if (min_tensor) {
min_tensor->CopyDataFromTensor(min_data);
}
if (max_tensor) {
max_tensor->CopyDataFromTensor(max_data);
}
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Clip>(
static_cast<float>(*min_data), static_cast<float>(*max_data));
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
}
};
bool ClipOpBuilder::HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) {
LOGS_DEFAULT(INFO) << "Creating Clip Op.";
if (node_unit.SinceVersion() <= 6) {
NodeAttrHelper helper(node_unit.GetNode());
auto min = helper.Get("min", -3.402e+38f);
auto max = helper.Get("max", 3.402e+38f);
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Clip>(min, max);
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
} else {
switch (inputs[0]->GetDataType()) {
case tim::vx::DataType::INT8:
ClipImpl<int8_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::UINT8:
ClipImpl<uint8_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::INT16:
ClipImpl<int16_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::INT32:
ClipImpl<int32_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::FLOAT16:
ClipImpl<Ort::Float16_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::FLOAT32:
default:
ClipImpl<float>(graph_ep, inputs, outputs);
break;
}
}
return true;
}
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,57 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class ClipOpBuilder final : public BaseOpBuilder {
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
if (node->SinceVersion() > 6) {
if (node->InputDefs().size() > 1 &&
!Contains(graph_viewer.GetAllInitializedTensors(), node->InputDefs()[1]->Name())) {
LOGS_DEFAULT(WARNING) << "Min/Max value must be const input or attribute.";
return false;
}
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override;
private:
template <typename T>
struct ClipImpl;
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,65 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/common.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class ConcatOpBuilder : public BaseOpBuilder {
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
NodeAttrHelper helper(*node);
auto axis = helper.Get("axis", 0);
auto input_defs = node->InputDefs();
auto input_shape = vsi::npu::util::GetTensorShape(*input_defs[0]);
int32_t rank = input_shape.NumDimensions();
if (axis >= rank || axis < -rank) {
LOGS_DEFAULT(ERROR) << "Axis is invalid in Concat.";
return false;
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Concat Op.";
NodeAttrHelper helper(node_unit.GetNode());
auto axis = helper.Get("axis", 0);
axis = util::ReverseAxis(axis, inputs[0]->GetShape().size());
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Concat>(static_cast<uint32_t>(axis), inputs.size());
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,162 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <string>
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/shared/utils/utils.h"
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class ConvOpBuilder : public BaseOpBuilder {
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
auto input_defs = node->InputDefs();
auto shape = vsi::npu::util::GetTensorShape(*input_defs[0]);
if (shape.NumDimensions() == 5) {
LOGS_DEFAULT(WARNING) << "Not support conv3d yet.";
return false;
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
auto input_tensor = inputs[0];
auto weight_tensor = inputs[1];
auto OChannel_idx = weight_tensor->GetShape().size() - 1;
const bool is_1d_conv =
weight_tensor->GetShape().size() == 3 ? true : false;
NodeAttrHelper helper(node_unit.GetNode());
auto padtype = helper.Get("auto_pad", std::string(""));
auto group = helper.Get("group", static_cast<uint32_t>(1));
std::string op_type = (group != 1 && group == weight_tensor->GetShape()[OChannel_idx])
? "DepthwiseConv"
: (group != 1) ? "GroupConv"
: "Conv";
op_type += is_1d_conv ? "1D" : "2D";
std::string op_name = std::string("Creating ") + op_type + " Op";
LOGS_DEFAULT(INFO) << op_name;
uint32_t default_uint = 1;
std::vector<uint32_t> default_vec = {1, 1};
auto stride =
helper.Get("strides", is_1d_conv ? std::vector<uint32_t>{default_uint}
: default_vec);
auto dilation =
helper.Get("dilations", is_1d_conv ? std::vector<uint32_t>{default_uint}
: default_vec);
std::shared_ptr<tim::vx::Operation> op;
if (padtype != "NOTSET") { // array "pads" is not set
if (group != 1 && group != weight_tensor->GetShape()[OChannel_idx]) {
if (is_1d_conv) {
op = graph_ep->GetGraph()
->CreateOperation<tim::vx::ops::GroupedConv1d>(
vsi::npu::util::GetPadType(padtype), stride[0],
dilation[0], group, tim::vx::DataLayout::WCN,
tim::vx::DataLayout::WIcOc);
} else {
op = graph_ep->GetGraph()
->CreateOperation<tim::vx::ops::GroupedConv2d>(
vsi::npu::util::GetPadType(padtype),
/* W_stride, H_stride*/
std::array<uint32_t, 2>{stride[1], stride[0]},
/* W_dilation, H_dilation*/
std::array<uint32_t, 2>{dilation[1], dilation[0]}, group,
tim::vx::DataLayout::WHCN, tim::vx::DataLayout::WHIcOc);
}
} else {
int32_t multiplier = group == 1
? 0
: weight_tensor->GetShape()[OChannel_idx] / input_tensor->GetShape()[OChannel_idx - 1];
if (is_1d_conv) {
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Conv1d>(
vsi::npu::util::GetPadType(padtype), stride[0], dilation[0], multiplier,
tim::vx::DataLayout::WCN, tim::vx::DataLayout::WIcOc);
} else {
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Conv2d>(
vsi::npu::util::GetPadType(padtype),
/* W_stride, H_stride*/
std::array<uint32_t, 2>{stride[1], stride[0]},
/* W_dilation, H_dilation*/
std::array<uint32_t, 2>{dilation[1], dilation[0]}, multiplier,
tim::vx::DataLayout::WHCN, tim::vx::DataLayout::WHIcOc);
}
}
} else {
auto pads = helper.Get("pads", std::vector<uint32_t>{0U, 0U});
if (group != 1 && group != weight_tensor->GetShape()[OChannel_idx]) {
if (is_1d_conv) {
op = graph_ep->GetGraph()
->CreateOperation<tim::vx::ops::GroupedConv1d>(
vsi::npu::util::GetPadType(padtype),
std::array<uint32_t, 2>{pads[0], pads[1]}, stride[0],
dilation[0], group, tim::vx::DataLayout::WCN,
tim::vx::DataLayout::WIcOc);
} else {
op = graph_ep->GetGraph()
->CreateOperation<tim::vx::ops::GroupedConv2d>(
/* W_begin,W_end, H_begin,H_end*/ std::array<
uint32_t, 4>{pads[1], pads[3], pads[0], pads[2]},
/* W_stride, H_stide*/
std::array<uint32_t, 2>{stride[1], stride[0]},
/* W_dilation, H_dilation*/
std::array<uint32_t, 2>{dilation[1], dilation[0]}, group,
tim::vx::DataLayout::WHCN, tim::vx::DataLayout::WHIcOc);
}
} else {
int32_t multiplier = group == 1
? 0
: weight_tensor->GetShape()[OChannel_idx] / input_tensor->GetShape()[OChannel_idx - 1];
if (is_1d_conv) {
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Conv1d>(
std::array<uint32_t, 2>{pads[0], pads[1]}, stride[0], dilation[0],
multiplier, tim::vx::DataLayout::WCN, tim::vx::DataLayout::WIcOc);
} else {
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Conv2d>(
/* W_begin,W_end, H_begin,H_end*/ std::array<uint32_t, 4>{pads[1], pads[3],
pads[0], pads[2]},
/* W_stride, H_stride*/
std::array<uint32_t, 2>{stride[1], stride[0]},
/* W_dilation, H_dilation*/
std::array<uint32_t, 2>{dilation[1], dilation[0]}, multiplier,
tim::vx::DataLayout::WHCN, tim::vx::DataLayout::WHIcOc);
}
}
}
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,83 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/common.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class DequantizeLinearOpBuilder : public BaseOpBuilder {
enum DequantizeINPUTS {
input_tensor = 0,
scale_tensor = 1,
zero_point_tensor = 2
};
bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers,
const NodeUnit& node_unit) const override {
auto input_type = node_unit.Inputs()[0].node_arg.Type();
if (*input_type == "tensor(int64)" || !util::IsTypeSupported(&node_unit.Inputs()[0].node_arg)) {
LOGS_DEFAULT(WARNING) << node_unit.OpType() << " has unsupported input type : "
<< *input_type;
return false;
}
if (!node_unit.Inputs()[0].quant_param.has_value()) {
LOGS_DEFAULT(WARNING) << "The quantization params must be known.";
return false;
}
if (node_unit.Inputs()[0].quant_param->scale.Shape()->dim_size() != 0 &&
node_unit.Inputs()[0].quant_param->scale.Shape()->dim(0).dim_value() != 1) {
LOGS_DEFAULT(WARNING) << "Per channel quantized input is not support in DequantizeLinear op.";
return false;
}
return true;
}
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
NodeAttrHelper helper(*node);
if (helper.HasAttr("block_size") && helper.Get("block_size", 0) != 0) {
LOGS_DEFAULT(WARNING) << "Not support block quantization yet.";
return false;
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(INFO) << "Creating Dequantize Op.";
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::DataConvert>();
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,98 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
#define ELEMENTWISE_OP_BUILDER(onnx_op_type, vsinpu_op_kind) \
class onnx_op_type##OpBuilder : public BaseOpBuilder { \
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer, \
const Node* node) const override { \
for (auto input : node->InputDefs()) { \
if (*input->Type() == "tensor(int64)") { \
LOGS_DEFAULT(WARNING) << "Int64 type is not suppoted as elementwise operation input."; \
return false; \
} \
} \
return true; \
} \
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, \
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs, \
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs, \
const NodeUnit& node_unit) override { \
LOGS_DEFAULT(INFO) << "Creating " << #onnx_op_type << " Op"; \
auto op = graph_ep->GetGraph() -> CreateOperation<tim::vx::ops::vsinpu_op_kind>(); \
(*op).BindInputs(inputs).BindOutputs(outputs); \
return true; \
; \
} \
};
ELEMENTWISE_OP_BUILDER(Add, Add);
ELEMENTWISE_OP_BUILDER(Sub, Sub);
ELEMENTWISE_OP_BUILDER(Mul, Multiply);
ELEMENTWISE_OP_BUILDER(Div, Div); // not consider zero
ELEMENTWISE_OP_BUILDER(Abs, Abs);
ELEMENTWISE_OP_BUILDER(Sqrt, Sqrt);
ELEMENTWISE_OP_BUILDER(Exp, Exp);
ELEMENTWISE_OP_BUILDER(Floor, Floor);
ELEMENTWISE_OP_BUILDER(Log, Log);
ELEMENTWISE_OP_BUILDER(Sin, Sin);
ELEMENTWISE_OP_BUILDER(HardSwish, HardSwish);
class PowOpBuilder : public BaseOpBuilder {
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
auto input0_type = *node->InputDefs()[0]->Type();
auto input1_type = *node->InputDefs()[1]->Type();
if (input0_type != input1_type) {
if ((input0_type == "tensor(float)" && input1_type == "tensor(int32)") ||
(input0_type == "tensor(int32)" && input1_type == "tensor(float)")) {
LOGS_DEFAULT(WARNING) << "Pow op does not support one of input is float32 while the other one is int32 type.";
return false;
}
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(INFO) << "Creating Pow Op";
auto op = graph_ep->GetGraph()
->CreateOperation<tim::vx::ops::Pow>();
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,65 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/common.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class FlattenOpBuilder : public BaseOpBuilder {
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Flatten Op.";
std::vector<uint32_t> reshape_param;
if (outputs[0]->GetShape().size() == 2) {
reshape_param = outputs[0]->GetShape();
} else {
auto input_shape = inputs[0]->GetShape();
NodeAttrHelper helper(node_unit.GetNode());
int64_t axis = helper.Get("axis", 1);
axis = util::ReverseAxis(static_cast<int32_t>(axis), input_shape.size());
uint32_t first_dim = 1;
for (int64_t i = 0; i < axis; i++) {
first_dim *= inputs[0]->GetShape()[i];
}
uint32_t second_dim = inputs[0]->GetSpec().GetElementNum() / first_dim;
reshape_param.push_back(first_dim);
reshape_param.push_back(second_dim);
}
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Reshape>(reshape_param);
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,86 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class GatherOpBuilder : public BaseOpBuilder {
bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers,
const NodeUnit& node_unit) const override {
auto input = node_unit.Inputs()[0];
auto indices = node_unit.Inputs()[1];
if (util::IsTypeSupported(&input.node_arg) && util::IsTypeSupported(&indices.node_arg)) {
if (*input.node_arg.Type() == "tensor(int64)") {
LOGS_DEFAULT(WARNING) << "Only support indices tensor to be int64 type in gather op.";
return false;
}
if (*indices.node_arg.Type() != "tensor(int64)" && *indices.node_arg.Type() != "tensor(int32)") {
LOGS_DEFAULT(WARNING) << "Unsupported indices tensor type in gather op.";
return false;
}
if (*indices.node_arg.Type() == "tensor(int64)" && !Contains(initializers, indices.node_arg.Name())) {
LOGS_DEFAULT(WARNING) << "Only support const attribute if indice tensor is in int64 type.";
return false;
}
return true;
}
return false;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Gather Op.";
NodeAttrHelper helper(node_unit.GetNode());
auto axis = helper.Get("axis", 0);
axis = util::ReverseAxis(axis, inputs[0]->GetShape().size());
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Gather>(axis, 0);
bool is_i64_indices = inputs[1]->GetDataType() == tim::vx::DataType::INT64;
if (!is_i64_indices) {
(*op).BindInputs(inputs).BindOutputs(outputs);
} else {
std::vector<int64_t> origin_data(inputs[1]->GetSpec().GetElementNum());
inputs[1]->CopyDataFromTensor(origin_data.data());
std::vector<int32_t> transformed_data(origin_data.begin(), origin_data.end());
tim::vx::TensorSpec ts = inputs[1]->GetSpec().SetAttribute(tim::vx::TensorAttribute::INPUT);
ts.SetDataType(tim::vx::DataType::INT32);
auto transformed_indices = graph_ep->GetGraph()->CreateTensor(ts, transformed_data.data());
(*op).BindInput(inputs[0]).BindInput(transformed_indices).BindOutput(outputs[0]);
}
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,148 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/shared/utils/utils.h"
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class GemmOpBuilder : public BaseOpBuilder {
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
auto input_defs = node->InputDefs();
NodeAttrHelper helper(*node);
auto weight_units = helper.Get("transB", 0) == 1
? vsi::npu::util::GetTensorShape(*input_defs[1]).GetDims()[0]
: vsi::npu::util::GetTensorShape(*input_defs[1]).GetDims()[1];
if (input_defs.size() > 2) {
auto bias_shape = vsi::npu::util::GetTensorShape(*input_defs[2]);
if (bias_shape.NumDimensions() == 1 && bias_shape.GetDims()[0] != weight_units) {
LOGS_DEFAULT(WARNING) << "Not support to broadcast bias shape.";
return false;
} else if (bias_shape.NumDimensions() == 2 &&
(bias_shape.Size() != weight_units ||
(bias_shape.GetDims()[0] != 1 && bias_shape.GetDims()[1] != 1))) {
LOGS_DEFAULT(WARNING) << "Not support 2-dims bias shape.";
return false;
}
if (*input_defs[2]->Type() == "tensor(float16)" &&
!graph_viewer.IsConstantInitializer(input_defs[2]->Name(), true)) {
LOGS_DEFAULT(WARNING) << "Not support f16 bias with input attr.";
return false;
}
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Gemm Op.";
auto input_A = inputs[0];
auto input_B = inputs[1];
NodeAttrHelper helper(node_unit.GetNode());
auto trans_A = helper.Get("transA", 0);
auto trans_B = helper.Get("transB", 0);
const bool has_alpha = (helper.Get("alpha", 1.0f) != 1.0);
const bool has_beta = (helper.Get("beta", 1.0f) != 1.0);
const bool has_C = (inputs.size() == 3);
auto weight_units = helper.Get("transB", 0) == 1 ? inputs[1]->GetShape()[1] : inputs[1]->GetShape()[0];
tim::vx::TensorSpec coef_spec(tim::vx::DataType::FLOAT32, {1},
tim::vx::TensorAttribute::CONSTANT);
auto multiply_impl = [&](std::shared_ptr<tim::vx::Tensor> input,
std::shared_ptr<tim::vx::Tensor> coef,
std::shared_ptr<tim::vx::Tensor> output) {
auto multiply_op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Multiply>();
(*multiply_op).BindInput(input).BindInput(coef).BindOutput(output);
graph_ep->GetOps().push_back(multiply_op);
};
auto transpose_impl = [&](std::shared_ptr<tim::vx::Tensor> input,
std::shared_ptr<tim::vx::Tensor> output) {
std::vector<uint32_t> perm = {1U, 0U};
auto transpose_op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Transpose>(perm);
(*transpose_op).BindInput(input).BindOutput(output);
graph_ep->GetOps().push_back(std::move(transpose_op));
};
auto fc_impl = [&](std::vector<std::shared_ptr<tim::vx::Tensor>> inputs,
std::shared_ptr<tim::vx::Tensor> output) {
auto fc_op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::FullyConnected>(0, weight_units);
(*fc_op).BindInputs(inputs).BindOutput(output);
graph_ep->GetOps().push_back(std::move(fc_op));
};
auto alpha_A = input_A;
std::shared_ptr<tim::vx::Tensor> beta_C;
auto final_A = input_A;
auto final_B = input_B;
if (has_alpha) {
auto alpha_tensor = graph_ep->GetGraph()->CreateTensor(coef_spec);
auto alpha = helper.Get("alpha", 1.0f);
alpha_tensor->CopyDataToTensor(&alpha);
alpha_A = graph_ep->GetGraph()->CreateTensor(
input_A->GetSpec().AsTransientSpec());
multiply_impl(input_A, alpha_tensor, alpha_A);
final_A = alpha_A;
}
if (has_beta) {
auto beta_tensor = graph_ep->GetGraph()->CreateTensor(coef_spec);
auto beta = helper.Get("beta", 1.0f);
beta_tensor->CopyDataToTensor(&beta);
beta_C = graph_ep->GetGraph()->CreateTensor(
inputs[2]->GetSpec().AsTransientSpec());
multiply_impl(inputs[2], beta_tensor, beta_C);
} else if (has_C) {
beta_C = inputs[2];
}
if (trans_A) {
final_A = graph_ep->GetGraph()->CreateTensor(
input_A->GetSpec().AsTransientSpec());
transpose_impl(alpha_A, final_A);
}
if (!trans_B) {
final_B = graph_ep->GetGraph()->CreateTensor(
input_B->GetSpec().AsTransientSpec());
transpose_impl(input_B, final_B);
}
std::vector<std::shared_ptr<tim::vx::Tensor>> fc_inputs = {final_A, final_B};
if (has_C) fc_inputs.push_back(beta_C);
fc_impl(fc_inputs, outputs[0]);
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,56 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class MatMulOpBuilder : public BaseOpBuilder {
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
auto output_defs = node->OutputDefs();
if (output_defs[0]->Shape()->dim_size() == 0) {
LOGS_DEFAULT(WARNING) << "Inner product of 1-D tensor is not supported in MatMul op.";
return false;
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Matmul Op.";
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Matmul>();
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,86 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class BatchNormOpBuilder : public BaseOpBuilder {
enum NormINPUTS {
input_tensor = 0,
scale_tensor = 1,
Bias_tensor = 2,
mean_tensor = 3,
var_tensor = 4
};
int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const override { return 9; }
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
auto input_defs = node->InputDefs();
NodeAttrHelper helper(*node);
auto training_mode = helper.Get("training_mode", 0);
if (training_mode) {
LOGS_DEFAULT(WARNING) << "Training is not supported in batch_norm op.";
return false;
}
if (helper.HasAttr("spatial")) {
LOGS_DEFAULT(WARNING) << "VSINPU does not support 'spatial' parameter.";
return false;
}
if (!graph_viewer.IsConstantInitializer(input_defs[NormINPUTS::scale_tensor]->Name(), true)) {
LOGS_DEFAULT(WARNING) << "Not support mean/var/gamma/beta set as dynamic input yet.";
return false;
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(INFO) << "Creating BatchNorm Op.";
NodeAttrHelper helper(node_unit.GetNode());
auto epsilon = helper.Get("epsilon", 1e-5f);
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::BatchNorm>(epsilon);
std::vector<std::shared_ptr<tim::vx::Tensor>> reordered_inputs;
int indices[] = {NormINPUTS::input_tensor, NormINPUTS::mean_tensor, NormINPUTS::var_tensor,
NormINPUTS::scale_tensor, NormINPUTS::Bias_tensor};
for (int i : indices) {
reordered_inputs.push_back(inputs[i]);
}
(*op).BindInputs(reordered_inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,152 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class BasePoolOpBuilder : public BaseOpBuilder {
public:
explicit BasePoolOpBuilder(tim::vx::PoolType pool_type) : pool_type_(pool_type) {}
protected:
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer, const Node* node) const override {
auto shape = vsi::npu::util::GetTensorShape(*node->InputDefs()[0]);
if (shape.NumDimensions() == 5) {
LOGS_DEFAULT(WARNING) << "3DPool is not supported yet.";
return false;
}
NodeAttrHelper helper(*node);
if (helper.HasAttr("dilations")) {
LOGS_DEFAULT(WARNING) << "NonMaxPool with Dilation parameter is not supported.";
return false;
}
return true;
}
bool CreatePoolingOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const std::array<uint32_t, 2>& kernel_size,
const std::array<uint32_t, 2>& strides,
const std::array<uint32_t, 4>& pads,
bool is_global,
const tim::vx::RoundType ceil_mode) {
const bool is_1d_pool = inputs[0]->GetShape().size() == 3;
std::shared_ptr<tim::vx::Operation> op;
// Create the appropriate pooling operation
if (is_global) {
if (is_1d_pool) {
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Pool1d>(pool_type_, inputs[0]->GetShape()[0],
ceil_mode);
} else {
std::array<uint32_t, 2> input_size = {inputs[0]->GetShape()[0], inputs[0]->GetShape()[1]};
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Pool2d>(pool_type_, input_size, ceil_mode);
}
} else {
if (is_1d_pool) {
std::array<uint32_t, 2> arr = {pads[2], pads[0]};
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Pool1d>(pool_type_, arr,
kernel_size[1], strides[1], ceil_mode);
} else {
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Pool2d>(pool_type_, pads, kernel_size,
strides, ceil_mode);
}
}
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
tim::vx::PoolType pool_type_;
};
class TraditionalPoolOpBuilder : public BasePoolOpBuilder {
public:
TraditionalPoolOpBuilder() : BasePoolOpBuilder(tim::vx::PoolType::MAX) {}
protected:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
NodeAttrHelper helper(node_unit.GetNode());
auto ksize = helper.Get("kernel_shape", std::vector<uint32_t>{1U, 1U});
auto strides = helper.Get("strides", std::vector<uint32_t>{1U, 1U});
auto pads = helper.Get("pads", std::vector<uint32_t>{0U, 0U, 0U, 0U});
tim::vx::RoundType ceil_mode = helper.Get("ceil_mode", 0U) == 0
? tim::vx::RoundType::FLOOR
: tim::vx::RoundType::CEILING;
return CreatePoolingOp(graph_ep, inputs, outputs, {ksize[1], ksize[0]}, {strides[1], strides[0]},
{pads[1], pads[3], pads[0], pads[2]}, false, ceil_mode);
}
};
class GlobalPoolOpBuilder : public BasePoolOpBuilder {
public:
GlobalPoolOpBuilder() : BasePoolOpBuilder(tim::vx::PoolType::MAX) {}
protected:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
NodeAttrHelper helper(node_unit.GetNode());
tim::vx::RoundType ceil_mode = helper.Get("ceil_mode", 0U) == 0
? tim::vx::RoundType::FLOOR
: tim::vx::RoundType::CEILING;
return CreatePoolingOp(graph_ep, inputs, outputs, {}, {}, {}, true, ceil_mode);
}
};
class GlobalAveragePoolOpBuilder : public GlobalPoolOpBuilder {
public:
GlobalAveragePoolOpBuilder() { pool_type_ = tim::vx::PoolType::AVG; }
};
class GlobalMaxPoolOpBuilder : public GlobalPoolOpBuilder {
public:
GlobalMaxPoolOpBuilder() { pool_type_ = tim::vx::PoolType::MAX; }
};
class AveragePoolOpBuilder : public TraditionalPoolOpBuilder {
public:
AveragePoolOpBuilder() { pool_type_ = tim::vx::PoolType::AVG; }
};
class MaxPoolOpBuilder : public TraditionalPoolOpBuilder {
public:
MaxPoolOpBuilder() { pool_type_ = tim::vx::PoolType::MAX; }
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,85 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class BaseQLinearOpBuilder : public BaseOpBuilder {
enum {
INPUT_A = 0,
INPUT_A_SCALE = 1,
INPUT_A_ZP = 2,
INPUT_B = 3,
INPUT_B_SCALE = 4,
INPUT_B_ZP = 5,
OUTPUT_SCALE = 6,
OUTPUT_ZP = 7,
};
protected:
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer, const Node* node) const override {
for (int i = 0; i < node->InputDefs().size(); i++) {
if (i == INPUT_A || i == INPUT_B) continue;
if (!graph_viewer.IsConstantInitializer(node->InputDefs()[i]->Name(), true)) {
LOGS_DEFAULT(WARNING) << "Only support const scale / zero point.";
return false;
}
}
return true;
}
};
class QLinearAddOpBuilder : public BaseQLinearOpBuilder {
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating QLinearAdd Op.";
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Add>();
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
class QLinearMulOpBuilder : public BaseQLinearOpBuilder {
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating QLinearMul Op.";
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Multiply>();
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,48 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/shared/utils/utils.h"
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class QLinearConcatOpBuilder : public BaseOpBuilder {
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs, const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating QLinearConcat Op.";
NodeAttrHelper helper(node_unit.GetNode());
int axis = helper.Get("axis", 0);
axis = util::ReverseAxis(axis, inputs[0]->GetShape().size());
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Concat>(axis, inputs.size());
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,151 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <string>
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/shared/utils/utils.h"
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/framework/tensorprotoutils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class QLinearConvOpBuilder : public BaseOpBuilder {
enum QLinearConvINPUTS {
INPUT_TENSOR = 0,
INPUT_TENSOR_SCALE = 1,
INPUT_TENSOR_ZP = 2,
WEIGHT_TENSOR = 3,
WEIGHT_TENSOR_SCALE = 4,
WEIGHT_TENSOR_ZP = 5,
OUTPUT_TENSOR_SCALE = 6,
OUTPUT_TENSOR_ZP = 7,
BIAS_TENSOR = 8,
};
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
auto input_defs = node->InputDefs();
auto input_shape = vsi::npu::util::GetTensorShape(*input_defs[QLinearConvINPUTS::INPUT_TENSOR]);
auto w_scale_shape = vsi::npu::util::GetTensorShape(*input_defs[QLinearConvINPUTS::WEIGHT_TENSOR_SCALE]);
auto w_shape_dims = vsi::npu::util::GetTensorShape(*input_defs[QLinearConvINPUTS::WEIGHT_TENSOR]).GetDims();
if (input_shape.NumDimensions() != 4) {
LOGS_DEFAULT(WARNING) << "Not support conv3d&& conv1d yet.";
return false;
}
if (!graph_viewer.IsConstantInitializer(input_defs[QLinearConvINPUTS::INPUT_TENSOR_SCALE]->Name(), true) ||
!graph_viewer.IsConstantInitializer(input_defs[WEIGHT_TENSOR]->Name(), true)) {
LOGS_DEFAULT(WARNING) << "Not support quantization definitions or weights that are not constant yet.";
return false;
}
if (w_shape_dims[2] > 15) {
LOGS_DEFAULT(WARNING) << "Not support weight kernel with height higher than 15.";
return false;
}
if (w_scale_shape.Size() != 1 && *input_defs[WEIGHT_TENSOR]->Type() == "tensor(int8)") {
const ONNX_NAMESPACE::TensorProto* tensor_proto =
graph_viewer.GetConstantInitializer(input_defs[QLinearConvINPUTS::WEIGHT_TENSOR_ZP]->Name(), true);
std::vector<int8_t> w_zp(tensor_proto->dims_size() == 0 ? 1 : tensor_proto->dims()[0]);
auto status = onnxruntime::utils::UnpackTensor(
*tensor_proto,
tensor_proto->has_raw_data() ? tensor_proto->raw_data().data() : nullptr,
tensor_proto->has_raw_data() ? tensor_proto->raw_data().size() : 0,
w_zp.data(), w_zp.size());
if (!status.IsOK()) {
LOGS_DEFAULT(ERROR) << "Failed to get data from weight zp tensor.";
return false;
}
if (std::any_of(w_zp.begin(), w_zp.end(), [](int i) { return i != 0; })) {
LOGS_DEFAULT(WARNING) << "Asymmetric perchannel quantization only allows uint8 datatype or int8 with all zero.";
return false;
}
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating QLinearConv Op.";
NodeAttrHelper helper(node_unit.GetNode());
auto padtype = helper.Get("auto_pad", std::string(""));
auto group = helper.Get("group", static_cast<uint32_t>(1));
std::vector<uint32_t> default_vec = {1, 1, 1, 1};
auto stride =
helper.Get("strides", default_vec);
auto dilation =
helper.Get("dilations", default_vec);
std::shared_ptr<tim::vx::Operation> op;
if (padtype != "NOTSET") { // array "pads" is not set
if (group != 1 && group != inputs[1]->GetShape()[3]) {
op = graph_ep->GetGraph()
->CreateOperation<tim::vx::ops::GroupedConv2d>(
vsi::npu::util::GetPadType(padtype),
std::array<uint32_t, 2>{stride[1], stride[0]},
std::array<uint32_t, 2>{dilation[1], dilation[0]}, group,
tim::vx::DataLayout::WHCN, tim::vx::DataLayout::WHIcOc);
} else {
int32_t multiplier = group == 1 ? 0 : inputs[1]->GetShape()[3] / inputs[0]->GetShape()[2];
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Conv2d>(
vsi::npu::util::GetPadType(padtype),
std::array<uint32_t, 2>{stride[1], stride[0]},
std::array<uint32_t, 2>{dilation[1], dilation[0]}, multiplier,
tim::vx::DataLayout::WHCN, tim::vx::DataLayout::WHIcOc);
}
} else {
std::vector<uint32_t> default_pads(4, 0);
auto pads = helper.Get("pads", default_pads);
if (group != 1 && group != inputs[1]->GetShape()[3]) {
op = graph_ep->GetGraph()
->CreateOperation<tim::vx::ops::GroupedConv2d>(
std::array<uint32_t, 4>{pads[1], pads[3], pads[0], pads[2]},
std::array<uint32_t, 2>{stride[1], stride[0]},
std::array<uint32_t, 2>{dilation[1], dilation[0]}, group,
tim::vx::DataLayout::WHCN, tim::vx::DataLayout::WHIcOc);
} else {
int32_t multiplier = group == 1 ? 0 : inputs[1]->GetShape()[3] / inputs[0]->GetShape()[2];
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Conv2d>(
std::array<uint32_t, 4>{pads[1], pads[3],
pads[0], pads[2]},
std::array<uint32_t, 2>{stride[1], stride[0]},
std::array<uint32_t, 2>{dilation[1], dilation[0]}, multiplier,
tim::vx::DataLayout::WHCN, tim::vx::DataLayout::WHIcOc);
}
}
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,83 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class QLinearMatMulOpBuilder : public BaseOpBuilder {
enum {
matrixA = 0,
A_scale = 1,
A_zero_point = 2,
matrixB = 3,
B_scale = 4,
B_zero_point = 5,
out_scale = 6,
out_zero_point = 7
};
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
auto input_defs = node->InputDefs();
auto A_def = input_defs[matrixA];
auto B_def = input_defs[matrixB];
for (auto def : input_defs) {
if (def->Name() == A_def->Name() || def->Name() == B_def->Name()) {
continue;
} else {
if (!graph_viewer.IsConstantInitializer(def->Name(), true)) {
LOGS_DEFAULT(WARNING) << "Scale and zero point must be known before setting graph.";
return false;
}
}
}
int64_t A_elements = util::GetTensorShape(*input_defs[A_scale]).Size();
int64_t B_elements = util::GetTensorShape(*input_defs[B_scale]).Size();
int64_t Out_elements = util::GetTensorShape(*input_defs[out_scale]).Size();
if (A_elements > 1 || B_elements > 1 || Out_elements > 1) {
LOGS_DEFAULT(WARNING) << "Per channel quantized input/output is not supported in QLinearMatmul Op.";
return false;
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(INFO) << "Creating QLinearMatmul Op.";
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Matmul>();
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,79 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/common.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class QuantizeLinearOpBuilder : public BaseOpBuilder {
enum QuantizeINPUTS {
input_tensor = 0,
scale_tensor = 1,
zero_point_tensor = 2
};
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
auto input_defs = node->InputDefs();
auto scale_shape = npu::util::GetTensorShape(*input_defs[QuantizeINPUTS::scale_tensor]);
NodeAttrHelper helper(*node);
if (helper.HasAttr("block_size") && helper.Get("block_size", 0) != 0) {
LOGS_DEFAULT(WARNING) << "Not support block quantization.";
return false;
}
if (!graph_viewer.IsConstantInitializer(input_defs[QuantizeINPUTS::scale_tensor]->Name(), true) ||
(input_defs.size() == 3 && !graph_viewer.IsConstantInitializer(
input_defs[QuantizeINPUTS::zero_point_tensor]->Name(), true))) {
LOGS_DEFAULT(WARNING) << "Only support const scale / zero point.";
return false;
}
if (scale_shape.Size() != 1) {
LOGS_DEFAULT(WARNING) << "Per channel quantized output is not supported in QuantizeLinearOp.";
return false;
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(INFO) << "Creating Quantize Op.";
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::DataConvert>();
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,82 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class ReduceMeanOpBuilder : public BaseOpBuilder {
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
auto input_defs = node->InputDefs();
if (*input_defs[0]->Type() == "tensor(int32)") {
LOGS_DEFAULT(WARNING) << "Not support int32 reduce mean yet.";
return false;
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(INFO) << "Creating ReduceMean Op.";
NodeAttrHelper helper(node_unit.GetNode());
std::vector<int64_t> def_axes;
auto input_shape_size = inputs[0]->GetShape().size();
if (node_unit.SinceVersion() < 18 && helper.HasAttr("axes")) {
def_axes = helper.Get("axes", def_axes);
} else if (inputs.size() > 1) {
def_axes.resize(inputs[1]->GetSpec().GetElementNum());
inputs[1]->CopyDataFromTensor(def_axes.data());
} else {
for (int64_t i = 0; i < input_shape_size; ++i) {
def_axes.push_back(i);
}
}
std::vector<int32_t> axes(def_axes.begin(), def_axes.end());
axes = util::ReverseAxis(axes, input_shape_size);
if (helper.HasAttr("noop_with_empty_axes") && inputs.size() == 1 && helper.Get("noop_with_empty_axes", 0) == 1) {
outputs[0] = inputs[0];
return true;
}
bool keepdims = helper.Get("keepdims", 1) == 1;
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::ReduceMean>(axes, keepdims);
(*op).BindInput(inputs[0]).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,153 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class ResizeOpBuilder : public BaseOpBuilder {
bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers,
const NodeUnit& node_unit) const override {
auto input_type = node_unit.Inputs()[0].node_arg.Type();
if (*input_type == "tensor(int64)" || !util::IsTypeSupported(&node_unit.Inputs()[0].node_arg)) {
LOGS_DEFAULT(WARNING) << node_unit.OpType() << " has unsupported input type : "
<< *input_type;
return false;
}
if (node_unit.SinceVersion() > 10) {
if (node_unit.Inputs().size() > 2 && !Contains(initializers, node_unit.Inputs()[2].node_arg.Name())) {
LOGS_DEFAULT(WARNING) << "Scale tensor must be constant.";
return false;
}
if (node_unit.Inputs().size() > 3 && !Contains(initializers, node_unit.Inputs()[3].node_arg.Name())) {
LOGS_DEFAULT(WARNING) << "Size tensor must be constant.";
return false;
}
} else {
if (!Contains(initializers, node_unit.Inputs()[1].node_arg.Name())) {
LOGS_DEFAULT(WARNING) << "Scale tensor must be constant.";
return false;
}
}
return true;
}
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer, const Node* node) const override {
auto shape = vsi::npu::util::GetTensorShape(*node->InputDefs()[0]);
if (shape.NumDimensions() > 4) {
LOGS_DEFAULT(WARNING) << "3D or more dimesions resize is not supported.";
return false;
}
NodeAttrHelper helper(*node);
if (helper.Get("antialiax", 0) != 0) {
LOGS_DEFAULT(WARNING) << "Antialias attribute is not supported.";
return false;
}
auto& cooridinate = helper.Get("coordinate_transoformation_mode", "half_pixel");
if (cooridinate != "align_corners" && cooridinate != "half_pixel") {
LOGS_DEFAULT(WARNING) << "Only support half_pixel and align_corners attributes now.";
return false;
}
if (helper.Get("keep_aspect_ratio_policy", "stretch") != "stretch") {
LOGS_DEFAULT(WARNING) << "Not support to keep aspect ratio.";
return false;
}
if (helper.Get("mode", "nearest") == "cubic") {
LOGS_DEFAULT(WARNING) << "Not support the cubic resize type yet.";
return false;
}
if (helper.HasAttr("axes")) {
LOGS_DEFAULT(WARNING) << "Axes-specifying is not support.";
return false;
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Resize Op.";
auto inputs_num = inputs.size();
bool is_1dresize = inputs[0]->GetShape().size() == 1;
NodeAttrHelper helper(node_unit.GetNode());
auto onnx_mode = helper.Get("mode", "nearest");
auto coordinate_transformation = helper.Get("coordinate_transformation_mode", "half_pixel");
bool is_size_set = helper.HasAttr("size");
int32_t scale_index = node_unit.SinceVersion() > 10 ? 2 : 1;
auto resize_type = onnx_mode == "nearest" ? tim::vx::ResizeType::NEAREST_NEIGHBOR : tim::vx::ResizeType::BILINEAR;
bool align_corners = coordinate_transformation == "align_corners";
bool half_pixel_center = coordinate_transformation == "half_pixel";
std::shared_ptr<tim::vx::Operation> op = nullptr;
if (is_1dresize) {
int target_size;
if (is_size_set) {
int64_t onnx_size;
inputs[3]->CopyDataFromTensor(&onnx_size);
target_size = static_cast<int>(onnx_size);
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Resize1d>(resize_type, 0.0f, align_corners,
half_pixel_center, target_size);
} else {
float scale;
inputs[scale_index]->CopyDataFromTensor(&scale);
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Resize1d>(resize_type, scale, align_corners,
half_pixel_center, 0);
}
} else {
int target_h, target_w;
if (is_size_set) {
std::vector<int64_t> onnx_sizes(inputs[3]->GetShape().size());
inputs[3]->CopyDataFromTensor(onnx_sizes.data());
target_h = static_cast<int>(onnx_sizes[1]);
target_w = static_cast<int>(onnx_sizes[0]);
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Resize>(resize_type, 0.0f, align_corners,
half_pixel_center, target_h, target_w);
} else {
auto input_shape = inputs[0]->GetShape();
std::vector<float> scales(input_shape.size());
std::vector<uint32_t> out_shape(input_shape.size());
inputs[scale_index]->CopyDataFromTensor(scales.data());
for (int i = 0; i < input_shape.size(); i++) {
out_shape[i] = input_shape[i] * scales[input_shape.size() - 1 - i];
}
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Resize>(resize_type, 0, align_corners,
half_pixel_center, out_shape[1], out_shape[0]);
}
}
(*op).BindInput(inputs[0]).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,101 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include <functional>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/common.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class SoftmaxOpBuilder : public BaseOpBuilder {
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
NodeAttrHelper helper(*node);
auto axis = helper.Get("axis", -1);
auto input_defs = node->InputDefs();
auto input_shape = vsi::npu::util::GetTensorShape(*input_defs[0]);
int32_t rank = input_shape.NumDimensions();
if (axis >= rank || axis < -rank) {
LOGS_DEFAULT(ERROR) << "Axis is invalid in Softmax.";
return false;
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Softmax Op.";
NodeAttrHelper helper(node_unit.GetNode());
int32_t def_val = node_unit.SinceVersion() < 13 ? 1 : -1;
auto axis = helper.Get("axis", def_val);
if (def_val == 1) {
// In earlier opset version of softmax, input is coerced into 2D shape
// Attribute "axis" is to describe the axis of the inputs coerced to 2D but not take part in softmax computation
const bool is_2d_shape = inputs[0]->GetShape().size() == 2 ? true : false;
if (!is_2d_shape) {
axis = HandleNegativeAxis(axis, inputs[0]->GetShape().size());
auto it = inputs[0]->GetShape().end();
uint32_t last_dim = std::accumulate(it - axis, it, 1, std::multiplies<uint32_t>());
uint32_t first_dim = std::accumulate(inputs[0]->GetShape().begin(), it - axis, 1, std::multiplies<uint32_t>());
auto reshaped_spec = inputs[0]->GetSpec().AsTransientSpec().SetShape(
std::vector<uint32_t>{first_dim, last_dim});
auto reshaped_input = graph_ep->GetGraph()->CreateTensor(reshaped_spec);
auto reshaped_output = graph_ep->GetGraph()->CreateTensor(inputs[0]->GetSpec().AsTransientSpec());
auto reshape_input_op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Reshape>(
std::vector<uint32_t>{first_dim, last_dim});
auto softmax_op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Softmax>(1, 0);
auto reshaped_output_op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Reshape>(inputs[0]->GetShape());
(*reshape_input_op).BindInputs(inputs).BindOutput(reshaped_input);
(*softmax_op).BindInput(reshaped_input).BindOutput(reshaped_output);
(*reshaped_output_op).BindInput(reshaped_output).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(reshape_input_op));
graph_ep->GetOps().push_back(std::move(softmax_op));
graph_ep->GetOps().push_back(std::move(reshaped_output_op));
} else {
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Softmax>(1, 0);
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
}
} else {
axis = util::ReverseAxis(axis, inputs[0]->GetShape().size());
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Softmax>(1, static_cast<uint32_t>(axis));
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
}
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,88 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class SqueezeOpBuilder : public BaseOpBuilder {
bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers,
const NodeUnit& node_unit) const override {
auto input_type = node_unit.Inputs()[0].node_arg.Type();
if (*input_type == "tensor(int64)" || !util::IsTypeSupported(&node_unit.Inputs()[0].node_arg)) {
LOGS_DEFAULT(WARNING) << node_unit.OpType() << " has unsupported input type : "
<< *input_type;
return false;
}
if (node_unit.SinceVersion() > 11) {
if (node_unit.Inputs().size() > 1 && !Contains(initializers, node_unit.Inputs()[1].node_arg.Name())) {
LOGS_DEFAULT(WARNING) << "Only support const axes in Squeeze op.";
return false;
}
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(INFO) << "Creating Squeeze Op.";
NodeAttrHelper helper(node_unit.GetNode());
std::vector<int64_t> def_axes;
auto input_shape_size = inputs[0]->GetShape().size();
if (node_unit.SinceVersion() < 13 && helper.HasAttr("axes")) {
def_axes = helper.Get("axes", def_axes);
} else if (inputs.size() > 1) {
def_axes.resize(inputs[1]->GetSpec().GetElementNum());
inputs[1]->CopyDataFromTensor(def_axes.data());
} else { // if axes is empty from onnx, check input shape to determine
for (int64_t i = 0; i < input_shape_size; ++i) {
if (inputs[0]->GetShape()[i] == 1) {
def_axes.push_back(i);
}
}
}
std::vector<int32_t> axes(def_axes.begin(), def_axes.end());
axes = util::ReverseAxis(axes, input_shape_size);
std::vector<uint32_t> timvx_axes(axes.begin(), axes.end());
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Squeeze>(timvx_axes);
(*op).BindInput(inputs[0]).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,142 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class ReshapeOpBuilder : public BaseOpBuilder {
int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const override { return 5; }
bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers,
const NodeUnit& node_unit) const override {
auto input = node_unit.Inputs()[0];
auto shape = node_unit.Inputs()[1];
if (initializers.end() == initializers.find(shape.node_arg.Name())) {
LOGS_DEFAULT(VERBOSE) << "Target shape of reshape op must be known.";
return false;
}
if (util::IsTypeSupported(&input.node_arg) && util::IsTypeSupported(&shape.node_arg)) {
if (*input.node_arg.Type() != "tensor(int64)") {
return true;
}
}
return false;
}
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
auto input_defs = node->InputDefs();
NodeAttrHelper helper(*node);
const bool allow_zero = helper.Get("allowzero", 0) == 1;
auto& perm_tensor_proto = *graph_viewer.GetConstantInitializer(input_defs[1]->Name(), true);
std::vector<int64_t> perm(perm_tensor_proto.dims()[0]);
auto status = onnxruntime::utils::UnpackTensor(
perm_tensor_proto,
perm_tensor_proto.has_raw_data() ? perm_tensor_proto.raw_data().data() : nullptr,
perm_tensor_proto.has_raw_data() ? perm_tensor_proto.raw_data().size() : 0,
perm.data(), perm.size());
// Check if perm has any 0's when allow zero is enabled.
if (allow_zero && std::find(perm.begin(), perm.end(), 0L) != perm.end()) {
LOGS_DEFAULT(VERBOSE) << "Reshape doesn't support 0 as dimension when allowzero is enabled";
return false;
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Reshape Op.";
std::vector<int64_t> new_shape(inputs[1]->GetShape()[0]);
inputs[1]->CopyDataFromTensor(new_shape.data());
for (size_t i = 0; i < new_shape.size(); i++) {
if (new_shape[i] == 0) {
new_shape[i] = inputs[0]->GetShape()[inputs[0]->GetShape().size() - i - 1];
}
}
int64_t element_count = std::accumulate(new_shape.begin(), new_shape.end(), static_cast<int64_t>(1),
[&](int64_t a, int64_t b) {
return b == -1 ? a : a * b;
});
auto negative_it = std::find(new_shape.begin(), new_shape.end(), -1);
if (negative_it != new_shape.end()) {
*negative_it = inputs[0]->GetSpec().GetElementNum() / element_count;
}
std::vector<uint32_t> new_shape_uint32(new_shape.begin(), new_shape.end());
std::reverse(new_shape_uint32.begin(), new_shape_uint32.end());
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Reshape>(new_shape_uint32);
(*op).BindInput(inputs[0]).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
class TransposeOpBuilder : public BaseOpBuilder {
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
auto input_defs = node->InputDefs();
auto shape_dim = vsi::npu::util::GetTensorShape(*input_defs[0]).NumDimensions();
NodeAttrHelper helper(*node);
auto perm = helper.Get("perm", std::vector<uint32_t>(shape_dim, 1));
if (perm.size() != shape_dim) {
LOGS_DEFAULT(VERBOSE) << "Size mismatch between perm vector and input shape.";
return false;
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Transpose Op.";
std::vector<int64_t> def_val(inputs[0]->GetShape().size());
for (int64_t i = 0; i < def_val.size(); i++) def_val[i] = def_val.size() - i - 1;
NodeAttrHelper helper(node_unit.GetNode());
def_val = helper.Get("perm", def_val);
std::vector<uint32_t> timvx_perm;
for (uint32_t i = 0; i < def_val.size(); i++) {
timvx_perm.push_back(def_val.size() - 1 - def_val[def_val.size() - i - 1]);
}
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Transpose>(timvx_perm);
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,71 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class TileOpBuilder : public BaseOpBuilder {
int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const override { return 6; }
bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers,
const NodeUnit& node_unit) const override {
auto input = node_unit.Inputs()[0];
auto multipliers = node_unit.Inputs()[1];
if (initializers.end() == initializers.find(multipliers.node_arg.Name())) {
LOGS_DEFAULT(WARNING) << "Multipliers of tile op must be known.";
return false;
}
if (util::IsTypeSupported(&input.node_arg) && util::IsTypeSupported(&multipliers.node_arg)) {
if (*input.node_arg.Type() != "tensor(int64)") {
return true;
}
}
LOGS_DEFAULT(WARNING) << "Input type not supported.";
return false;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Tile Op.";
std::vector<int64_t> multipliers(inputs[1]->GetShape()[0]);
inputs[1]->CopyDataFromTensor(multipliers.data());
std::reverse(multipliers.begin(), multipliers.end());
std::vector<int32_t> timvx_multipliers(multipliers.begin(), multipliers.end());
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Tile>(timvx_multipliers);
(*op).BindInput(inputs[0]).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,89 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class UnsqueezeOpBuilder : public BaseOpBuilder {
bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers,
const NodeUnit& node_unit) const override {
auto input_type = node_unit.Inputs()[0].node_arg.Type();
if (*input_type == "tensor(int64)" || !util::IsTypeSupported(&node_unit.Inputs()[0].node_arg)) {
LOGS_DEFAULT(WARNING) << node_unit.OpType() << " has unsupported input type : "
<< *input_type;
return false;
}
if (node_unit.SinceVersion() > 11 && !Contains(initializers, node_unit.Inputs()[1].node_arg.Name())) {
LOGS_DEFAULT(WARNING) << "Only support const axes in Unsqueeze op.";
return false;
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(INFO) << "Creating Unsqueeze Op.";
NodeAttrHelper helper(node_unit.GetNode());
std::vector<int64_t> def_axes;
auto input_shape_size = inputs[0]->GetShape().size();
if (node_unit.SinceVersion() < 13 && helper.HasAttr("axes")) {
def_axes = helper.Get("axes", def_axes);
} else if (inputs.size() > 1) {
def_axes.resize(inputs[1]->GetSpec().GetElementNum());
inputs[1]->CopyDataFromTensor(def_axes.data());
} else { // if axes is empty from onnx, check input shape to determine
for (int64_t i = 0; i < input_shape_size; ++i) {
if (inputs[0]->GetShape()[i] == 1) {
def_axes.push_back(i);
}
}
}
std::vector<int32_t> axes(def_axes.begin(), def_axes.end());
axes = util::ReverseAxis(axes, input_shape_size + axes.size());
std::vector<uint32_t> timvx_axes(inputs[0]->GetShape().begin(), inputs[0]->GetShape().end());
for (int32_t dim : axes) {
timvx_axes.insert(timvx_axes.begin() + dim, 1);
}
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Reshape>(timvx_axes);
(*op).BindInput(inputs[0]).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,48 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#pragma once
#include "core/graph/graph_viewer.h"
#include "core/framework/node_unit.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
class GraphEP;
class IOpBuilder {
public:
IOpBuilder() {}
virtual ~IOpBuilder() {}
virtual bool IsSupported(const onnxruntime::GraphViewer& graph_viewer,
const NodeUnit& node_unit) const {
return true;
}
virtual bool BuildOp(GraphEP* graph_ep,
const onnxruntime::GraphViewer& graph_viewer,
const NodeUnit& node_unit) = 0;
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,133 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#pragma once
#include <string>
#include <memory>
#include <map>
#include <utility>
#include "impl/activation_op_builder.h"
#include "impl/conv_op_builder.h"
#include "impl/elementwise_op_builder.h"
#include "impl/gemm_op_builder.h"
#include "impl/pool_op_builder.h"
#include "impl/qlinearconv_op_builder.h"
#include "impl/flatten_op_builder.h"
#include "impl/matmul_op_builder.h"
#include "impl/tensor_op_builder.h"
#include "impl/concat_op_builder.h"
#include "impl/softmax_op_builder.h"
#include "impl/norm_op_builder.h"
#include "impl/clip_op_builder.h"
#include "impl/reduce_op_builder.h"
#include "impl/quantize_op_builder.h"
#include "impl/dequantize_op_builder.h"
#include "impl/qlinearmatmul_op_builder.h"
#include "impl/qlinear_binary_op_builder.h"
#include "impl/qlinearconcat_op_builder.h"
#include "impl/gather_op_builder.h"
#include "impl/tile_op_builder.h"
#include "impl/squeeze_op_builder.h"
#include "impl/unsqueeze_op_builder.h"
#include "impl/resize_op_builder.h"
#include "impl/cast_op_builder.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
using createIOpBuildItemFunc = std::function<std::unique_ptr<IOpBuilder>()>;
using OpBuildItemType = std::map<std::string, std::unique_ptr<IOpBuilder>>;
static const std::map<std::string, createIOpBuildItemFunc> reg = {
#define REGISTER_OP_BUILDER(ONNX_NODE_TYPE, BUILDER_TYPE) \
{ \
ONNX_NODE_TYPE, [] { return std::make_unique<BUILDER_TYPE>(); } \
}
REGISTER_OP_BUILDER("Add", AddOpBuilder),
REGISTER_OP_BUILDER("Sub", SubOpBuilder),
REGISTER_OP_BUILDER("Mul", MulOpBuilder),
REGISTER_OP_BUILDER("Div", DivOpBuilder),
REGISTER_OP_BUILDER("Abs", AbsOpBuilder),
REGISTER_OP_BUILDER("Pow", PowOpBuilder),
REGISTER_OP_BUILDER("Sqrt", SqrtOpBuilder),
REGISTER_OP_BUILDER("Exp", ExpOpBuilder),
REGISTER_OP_BUILDER("Floor", FloorOpBuilder),
REGISTER_OP_BUILDER("Log", LogOpBuilder),
REGISTER_OP_BUILDER("Sin", SinOpBuilder),
REGISTER_OP_BUILDER("Conv", ConvOpBuilder),
REGISTER_OP_BUILDER("Gemm", GemmOpBuilder),
REGISTER_OP_BUILDER("Relu", ReluOpBuilder),
REGISTER_OP_BUILDER("LeakyRelu", LeakyReluOpBuilder),
REGISTER_OP_BUILDER("Tanh", TanhOpBuilder),
REGISTER_OP_BUILDER("Sigmoid", SigmoidOpBuilder),
REGISTER_OP_BUILDER("HardSigmoid", HardSigmoidOpBuilder),
REGISTER_OP_BUILDER("HardSwish", HardSwishOpBuilder),
REGISTER_OP_BUILDER("GlobalAveragePool", GlobalAveragePoolOpBuilder),
REGISTER_OP_BUILDER("QLinearConv", QLinearConvOpBuilder),
REGISTER_OP_BUILDER("Flatten", FlattenOpBuilder),
REGISTER_OP_BUILDER("MatMul", MatMulOpBuilder),
REGISTER_OP_BUILDER("GlobalMaxPool", GlobalMaxPoolOpBuilder),
REGISTER_OP_BUILDER("AveragePool", AveragePoolOpBuilder),
REGISTER_OP_BUILDER("MaxPool", MaxPoolOpBuilder),
REGISTER_OP_BUILDER("Reshape", ReshapeOpBuilder),
REGISTER_OP_BUILDER("Concat", ConcatOpBuilder),
REGISTER_OP_BUILDER("Softmax", SoftmaxOpBuilder),
REGISTER_OP_BUILDER("Transpose", TransposeOpBuilder),
REGISTER_OP_BUILDER("BatchNormalization", BatchNormOpBuilder),
REGISTER_OP_BUILDER("Clip", ClipOpBuilder),
REGISTER_OP_BUILDER("ReduceMean", ReduceMeanOpBuilder),
REGISTER_OP_BUILDER("QuantizeLinear", QuantizeLinearOpBuilder),
REGISTER_OP_BUILDER("DequantizeLinear", DequantizeLinearOpBuilder),
REGISTER_OP_BUILDER("QLinearMatMul", QLinearMatMulOpBuilder),
REGISTER_OP_BUILDER("QLinearAdd", QLinearAddOpBuilder),
REGISTER_OP_BUILDER("QLinearMul", QLinearMulOpBuilder),
REGISTER_OP_BUILDER("QLinearConcat", QLinearConcatOpBuilder),
REGISTER_OP_BUILDER("Gather", GatherOpBuilder),
REGISTER_OP_BUILDER("Tile", TileOpBuilder),
REGISTER_OP_BUILDER("Squeeze", SqueezeOpBuilder),
REGISTER_OP_BUILDER("Unsqueeze", UnsqueezeOpBuilder),
REGISTER_OP_BUILDER("Resize", ResizeOpBuilder),
REGISTER_OP_BUILDER("Cast", CastOpBuilder),
#undef REGISTER_OP_BUILDER
};
template <typename T>
struct OpBuildConstructor {
T supported_builtins;
OpBuildConstructor(
const std::map<typename T::key_type, createIOpBuildItemFunc> reg) {
LOGS_DEFAULT(INFO) << "Initialize supported ops";
for (const auto& kv : reg) {
supported_builtins.insert(std::make_pair(kv.first, kv.second()));
}
}
};
inline const OpBuildItemType& SupportedBuiltinOps() {
static OpBuildConstructor<OpBuildItemType> c(reg);
return c.supported_builtins;
}
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,26 @@
diff --git a/onnxruntime/test/providers/checkers.cc b/onnxruntime/test/providers/checkers.cc
index 47c18c478d..93b44501cd 100644
--- a/onnxruntime/test/providers/checkers.cc
+++ b/onnxruntime/test/providers/checkers.cc
@@ -195,7 +195,7 @@ struct TensorCheck<uint8_t> {
// For any other EPs, we still expect an exact match for the results
// TODO: Verify if DML can possibly have a ROUNDING_MODE parameter and conform to the other EPs #41968513
if ((provider_type == kNnapiExecutionProvider || provider_type == kDmlExecutionProvider ||
- provider_type == kXnnpackExecutionProvider) &&
+ provider_type == kXnnpackExecutionProvider || provider_type == kVSINPUExecutionProvider) &&
(has_abs_err || has_rel_err)) {
double threshold = has_abs_err ? *(params.absolute_error)
: 0.0;
diff --git a/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc b/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc
index 2bc0df5e36..7beb78c2ff 100644
--- a/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc
@@ -498,7 +498,7 @@ class QLinearConvOpTester {
// NOTE, for now the tolerance will only apply if the NNAPI is actually used,
// if for any reason the execution falls back to CPU, we still expect an exact match
// See, 'void Check<uint8_t>(...' in onnxruntime/test/providers/provider_test_utils.cc
-#if defined(USE_NNAPI) || defined(USE_DML)
+#if defined(USE_NNAPI) || defined(USE_DML) || defined(USE_VSINPU)
// TODO: Verify if DML can possibly have a ROUNDING_MODE parameter and conform to the other EPs #41968513
abs_error = 1.0f;
#endif

View file

@ -0,0 +1,343 @@
diff --git a/onnxruntime/test/onnx/dataitem_request.cc b/onnxruntime/test/onnx/dataitem_request.cc
index 1ee302d5d5..5c2dd5ab00 100644
--- a/onnxruntime/test/onnx/dataitem_request.cc
+++ b/onnxruntime/test/onnx/dataitem_request.cc
@@ -135,6 +135,7 @@ std::pair<EXECUTE_RESULT, TIME_SPEC> DataTaskRequestContext::RunImpl() {
}
EXECUTE_RESULT res = EXECUTE_RESULT::SUCCESS;
+ int32_t out_idx = 0;
for (auto& output : expected_output_values) {
const std::string& output_name = output.first;
OrtValue* expected_output_value = output.second; // Automatic cast
@@ -170,7 +171,7 @@ std::pair<EXECUTE_RESULT, TIME_SPEC> DataTaskRequestContext::RunImpl() {
} else { // Both expect and actual OrtValues are not None, proceed with data checking
ret =
CompareOrtValue(*actual_output_value, *expected_output_value, per_sample_tolerance,
- relative_per_sample_tolerance, post_procesing);
+ relative_per_sample_tolerance, post_procesing, out_idx);
}
} else { // Expected output is None, ensure that the received output OrtValue is None as well
if (actual_output_value->IsAllocated()) {
@@ -223,9 +224,10 @@ std::pair<EXECUTE_RESULT, TIME_SPEC> DataTaskRequestContext::RunImpl() {
if (compare_result != COMPARE_RESULT::SUCCESS && !ret.second.empty()) {
LOGS_DEFAULT(ERROR) << test_case_.GetTestCaseName() << ":output=" << output_name << ":" << ret.second;
}
- if (compare_result != COMPARE_RESULT::SUCCESS) {
- break;
- }
+ // if (compare_result != COMPARE_RESULT::SUCCESS) {
+ // break;
+ // }
+ out_idx ++;
}
return std::make_pair(res, spent_time_);
}
diff --git a/onnxruntime/test/providers/checkers.cc b/onnxruntime/test/providers/checkers.cc
index f1a7240ea3..436031dfa8 100644
--- a/onnxruntime/test/providers/checkers.cc
+++ b/onnxruntime/test/providers/checkers.cc
@@ -154,6 +154,7 @@ struct TensorCheck<int8_t> {
}
const bool has_abs_err = params.absolute_error.has_value();
+ const int8_t default_abs_err = 1;
if (has_abs_err) {
double threshold = *(params.absolute_error);
@@ -162,7 +163,8 @@ struct TensorCheck<int8_t> {
}
} else {
for (int i = 0; i < size; ++i) {
- EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i;
+ // EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i;
+ EXPECT_NEAR(cur_expected[i], cur_actual[i], default_abs_err) << "i:" << i;
}
}
}
diff --git a/onnxruntime/test/util/compare_ortvalue.cc b/onnxruntime/test/util/compare_ortvalue.cc
index 3d53d4a3a0..8129af1820 100644
--- a/onnxruntime/test/util/compare_ortvalue.cc
+++ b/onnxruntime/test/util/compare_ortvalue.cc
@@ -138,11 +138,75 @@ std::pair<COMPARE_RESULT, std::string> CompareFloatResult(const Tensor& outvalue
return res;
}
+template <typename FLOAT_TYPE>
+std::pair<COMPARE_RESULT, std::string> CompareFloatResult(const Tensor& outvalue, const Tensor& expected_value,
+ double per_sample_tolerance,
+ double relative_per_sample_tolerance, bool post_processing, int32_t out_idx) {
+ const size_t size1 = static_cast<size_t>(expected_value.Shape().Size());
+ const FLOAT_TYPE* expected_output = expected_value.Data<FLOAT_TYPE>();
+ const FLOAT_TYPE* real_output = outvalue.Data<FLOAT_TYPE>();
+
+ std::string expected_name = "expected_res"+ std::to_string(out_idx)+ ".txt";
+ std::string npures_name = "npu_res"+ std::to_string(out_idx)+ ".txt";
+ std::ofstream expected_res(expected_name), npu_res(npures_name);
+ for(size_t i = 0 ; i < size1; i++){
+ expected_res << expected_output[i] << std::endl;
+ npu_res << real_output[i] << std::endl;
+ }
+ expected_res.close();
+ npu_res.close();
+
+ std::pair<COMPARE_RESULT, std::string> res = std::make_pair(COMPARE_RESULT::SUCCESS, "");
+ double max_diff = 0;
+ size_t diff_count = 0;
+ for (size_t di = 0; di != size1; ++di) {
+ const double real_value =
+ post_processing ? std::max<double>(0.0, std::min<double>(255.0, real_output[di])) : real_output[di];
+ const double diff = std::fabs(expected_output[di] - real_value);
+ const double tol = per_sample_tolerance + relative_per_sample_tolerance * std::fabs(expected_output[di]);
+ if (!IsResultCloselyMatch<double>(real_value, expected_output[di], diff, tol)) {
+ res.first = COMPARE_RESULT::RESULT_DIFFERS;
+ // update error message if this is a larger diff
+ if (diff > max_diff || (std::isnan(diff) && !std::isnan(max_diff))) {
+ int64_t expected_int = 0;
+ int64_t real_int = 0;
+ memcpy(&expected_int, &expected_output[di], sizeof(FLOAT_TYPE));
+ memcpy(&real_int, &real_output[di], sizeof(FLOAT_TYPE));
+
+ std::ostringstream oss;
+ oss << std::hex << "expected " << expected_output[di] << " (" << expected_int << "), got " << real_value << " ("
+ << real_int << ")"
+ << ", diff: " << diff << ", tol=" << tol << std::dec << " idx=" << di << ".";
+ res.second = oss.str();
+ max_diff = diff;
+ }
+ ++diff_count;
+ }
+ }
+
+ if (res.first == COMPARE_RESULT::SUCCESS) return res;
+
+ std::ostringstream oss;
+ oss << res.second << " " << diff_count << " of " << size1 << " differ";
+ res.second = oss.str();
+ return res;
+}
+
+
template <typename T>
-std::pair<COMPARE_RESULT, std::string> IsResultExactlyMatch(const Tensor& outvalue, const Tensor& expected_value) {
+std::pair<COMPARE_RESULT, std::string> IsResultExactlyMatch(const Tensor& outvalue, const Tensor& expected_value, int32_t out_idx) {
const size_t size1 = static_cast<size_t>(expected_value.Shape().Size());
const T* expected_output = expected_value.Data<T>();
const T* real_output = outvalue.Data<T>();
+ std::string expected_name = "expected_res"+ std::to_string(out_idx)+ ".txt";
+ std::string npures_name = "npu_res"+ std::to_string(out_idx)+ ".txt";
+ std::ofstream expected_res(expected_name), npu_res(npures_name);
+ for(size_t i = 0 ; i < size1; i++){
+ expected_res << expected_output[i] << std::endl;
+ npu_res << real_output[i] << std::endl;
+ }
+ expected_res.close();
+ npu_res.close();
for (size_t di = 0; di != size1; ++di) {
if (expected_output[di] != real_output[di]) {
std::ostringstream oss;
@@ -201,7 +265,7 @@ std::pair<COMPARE_RESULT, std::string> CompareBFloat16Result(const Tensor& outva
std::pair<COMPARE_RESULT, std::string> CompareTwoTensors(const Tensor& outvalue, const Tensor& expected_tensor,
double per_sample_tolerance,
- double relative_per_sample_tolerance, bool post_processing) {
+ double relative_per_sample_tolerance, bool post_processing, int32_t out_idx) {
if (expected_tensor.Shape() != outvalue.Shape()) {
std::ostringstream oss;
oss << "shape mismatch, expect " << expected_tensor.Shape().ToString() << " got " << outvalue.Shape().ToString();
@@ -209,30 +273,30 @@ std::pair<COMPARE_RESULT, std::string> CompareTwoTensors(const Tensor& outvalue,
}
if (outvalue.IsDataType<float>()) {
return CompareFloatResult<float>(outvalue, expected_tensor, per_sample_tolerance, relative_per_sample_tolerance,
- post_processing);
+ post_processing, out_idx);
} else if (outvalue.IsDataType<double>()) {
return CompareFloatResult<double>(outvalue, expected_tensor, per_sample_tolerance, relative_per_sample_tolerance,
- post_processing);
+ post_processing, out_idx);
} else if (outvalue.IsDataTypeString()) {
- return IsResultExactlyMatch<std::string>(outvalue, expected_tensor);
+ return IsResultExactlyMatch<std::string>(outvalue, expected_tensor, out_idx);
} else if (outvalue.IsDataType<uint8_t>()) {
- return IsResultExactlyMatch<uint8_t>(outvalue, expected_tensor);
+ return IsResultExactlyMatch<uint8_t>(outvalue, expected_tensor, out_idx);
} else if (outvalue.IsDataType<int8_t>()) {
- return IsResultExactlyMatch<int8_t>(outvalue, expected_tensor);
+ return IsResultExactlyMatch<int8_t>(outvalue, expected_tensor, out_idx);
} else if (outvalue.IsDataType<uint16_t>()) {
- return IsResultExactlyMatch<uint16_t>(outvalue, expected_tensor);
+ return IsResultExactlyMatch<uint16_t>(outvalue, expected_tensor, out_idx);
} else if (outvalue.IsDataType<int16_t>()) {
- return IsResultExactlyMatch<int16_t>(outvalue, expected_tensor);
+ return IsResultExactlyMatch<int16_t>(outvalue, expected_tensor, out_idx);
} else if (outvalue.IsDataType<uint32_t>()) {
- return IsResultExactlyMatch<uint32_t>(outvalue, expected_tensor);
+ return IsResultExactlyMatch<uint32_t>(outvalue, expected_tensor, out_idx);
} else if (outvalue.IsDataType<int32_t>()) {
- return IsResultExactlyMatch<int32_t>(outvalue, expected_tensor);
+ return IsResultExactlyMatch<int32_t>(outvalue, expected_tensor, out_idx);
} else if (outvalue.IsDataType<uint64_t>()) {
- return IsResultExactlyMatch<uint64_t>(outvalue, expected_tensor);
+ return IsResultExactlyMatch<uint64_t>(outvalue, expected_tensor, out_idx);
} else if (outvalue.IsDataType<int64_t>()) {
- return IsResultExactlyMatch<int64_t>(outvalue, expected_tensor);
+ return IsResultExactlyMatch<int64_t>(outvalue, expected_tensor, out_idx);
} else if (outvalue.IsDataType<bool>()) {
- return IsResultExactlyMatch<bool>(outvalue, expected_tensor);
+ return IsResultExactlyMatch<bool>(outvalue, expected_tensor, out_idx);
} else if (outvalue.IsDataType<MLFloat16>()) {
return CompareFloat16Result(outvalue, expected_tensor, per_sample_tolerance, relative_per_sample_tolerance,
post_processing);
@@ -300,7 +364,7 @@ std::pair<COMPARE_RESULT, std::string> CompareSparseTensors(const SparseTensor&
" actual: ", actual.Format());
TEST_RETURN_IF_ERROR(CompareTwoTensors(actual.Values(), expected.Values(),
- per_sample_tolerance, relative_per_sample_tolerance, post_processing),
+ per_sample_tolerance, relative_per_sample_tolerance, post_processing, 0),
"While comparing sparse values");
if (actual.Format() == SparseFormat::kCoo) {
@@ -308,16 +372,16 @@ std::pair<COMPARE_RESULT, std::string> CompareSparseTensors(const SparseTensor&
auto expected_view = expected.AsCoo();
TEST_RETURN_IF_ERROR(CompareTwoTensors(actual_view.Indices(), expected_view.Indices(),
- per_sample_tolerance, relative_per_sample_tolerance, post_processing),
+ per_sample_tolerance, relative_per_sample_tolerance, post_processing, 0),
"Comparing COO indices");
} else if (actual.Format() == SparseFormat::kCsrc) {
auto actual_view = actual.AsCsr();
auto expected_view = expected.AsCsr();
TEST_RETURN_IF_ERROR(CompareTwoTensors(actual_view.Inner(), expected_view.Inner(),
- per_sample_tolerance, relative_per_sample_tolerance, post_processing),
+ per_sample_tolerance, relative_per_sample_tolerance, post_processing, 0),
"Comparing Csr(c) inner indices");
TEST_RETURN_IF_ERROR(CompareTwoTensors(actual_view.Outer(), expected_view.Outer(),
- per_sample_tolerance, relative_per_sample_tolerance, post_processing),
+ per_sample_tolerance, relative_per_sample_tolerance, post_processing, 0),
"Comparing Csr(c) outer indices");
}
@@ -385,7 +449,83 @@ std::pair<COMPARE_RESULT, std::string> CompareOrtValue(const OrtValue& o, const
return std::make_pair(COMPARE_RESULT::TYPE_MISMATCH, oss.str());
}
return CompareTwoTensors(outvalue, expected_tensor, per_sample_tolerance, relative_per_sample_tolerance,
- post_processing);
+ post_processing, 0);
+ } else if (o.IsSparseTensor()) {
+#if !defined(DISABLE_SPARSE_TENSORS)
+ TEST_RETURN_IF_NOT(expected_mlvalue.IsSparseTensor(), COMPARE_RESULT::TYPE_MISMATCH,
+ "SparseTensor is not expected as output");
+ TEST_RETURN_IF_ERROR(CompareSparseTensors(o.Get<SparseTensor>(), expected_mlvalue.Get<SparseTensor>(),
+ per_sample_tolerance, relative_per_sample_tolerance,
+ post_processing),
+ "while comaring sparse tensors");
+#endif
+ return std::make_pair(COMPARE_RESULT::SUCCESS, "");
+ } else if (o.IsTensorSequence()) {
+ auto& expected_tensor_seq = expected_mlvalue.Get<TensorSeq>();
+ auto expected_tensor_count = expected_tensor_seq.Size();
+
+ auto& actual_tensor_seq = o.Get<TensorSeq>();
+ auto actual_tensor_count = actual_tensor_seq.Size();
+
+ if (expected_tensor_count != actual_tensor_count) {
+ std::ostringstream oss;
+ oss << "expected tensor count in the sequence: " << expected_tensor_count << " got "
+ << actual_tensor_count;
+ return std::make_pair(COMPARE_RESULT::RESULT_DIFFERS, oss.str());
+ }
+
+ if (!expected_tensor_seq.IsSameDataType(actual_tensor_seq)) {
+ std::ostringstream oss;
+ oss << "expected tensor type in the sequence: " << expected_tensor_seq.DataType() << " got "
+ << actual_tensor_seq.DataType();
+ return std::make_pair(COMPARE_RESULT::TYPE_MISMATCH, oss.str());
+ }
+
+ for (size_t i = 0; i < expected_tensor_count; ++i) {
+ auto res = CompareTwoTensors(actual_tensor_seq.Get(i), expected_tensor_seq.Get(i), per_sample_tolerance, relative_per_sample_tolerance,
+ post_processing,0);
+ if (res.first != COMPARE_RESULT::SUCCESS) {
+ return res;
+ }
+ }
+
+ return std::make_pair(COMPARE_RESULT::SUCCESS, "");
+
+ } else {
+ // Maps
+#if !defined(DISABLE_ML_OPS)
+ if (o.Type() == DataTypeImpl::GetType<VectorMapInt64ToFloat>()) {
+ return CompareSeqOfMapToFloat(o.Get<VectorMapInt64ToFloat>(), expected_mlvalue.Get<VectorMapInt64ToFloat>(),
+ per_sample_tolerance, relative_per_sample_tolerance, post_processing);
+ }
+ if (o.Type() == DataTypeImpl::GetType<VectorMapStringToFloat>()) {
+ return CompareSeqOfMapToFloat(o.Get<VectorMapStringToFloat>(), expected_mlvalue.Get<VectorMapStringToFloat>(),
+ per_sample_tolerance, relative_per_sample_tolerance, post_processing);
+ }
+ return std::make_pair(COMPARE_RESULT::NOT_SUPPORT, "");
+#else
+ return std::make_pair(COMPARE_RESULT::NOT_SUPPORT, "Map type is not supported in this build.");
+#endif
+ }
+}
+
+std::pair<COMPARE_RESULT, std::string> CompareOrtValue(const OrtValue& o, const OrtValue& expected_mlvalue,
+ double per_sample_tolerance,
+ double relative_per_sample_tolerance, bool post_processing, int32_t out_idx) {
+ if (o.Type() != expected_mlvalue.Type()) {
+ return std::make_pair(COMPARE_RESULT::TYPE_MISMATCH, "");
+ }
+ if (o.IsTensor()) {
+ const Tensor& outvalue = o.Get<Tensor>();
+ const Tensor& expected_tensor = expected_mlvalue.Get<Tensor>();
+ if (outvalue.DataType() != expected_tensor.DataType()) {
+ std::ostringstream oss;
+ oss << "expect " << ElementTypeToString(expected_tensor.DataType()) << " got "
+ << ElementTypeToString(outvalue.DataType());
+ return std::make_pair(COMPARE_RESULT::TYPE_MISMATCH, oss.str());
+ }
+ return CompareTwoTensors(outvalue, expected_tensor, per_sample_tolerance, relative_per_sample_tolerance,
+ post_processing, out_idx);
} else if (o.IsSparseTensor()) {
#if !defined(DISABLE_SPARSE_TENSORS)
TEST_RETURN_IF_NOT(expected_mlvalue.IsSparseTensor(), COMPARE_RESULT::TYPE_MISMATCH,
@@ -419,7 +559,7 @@ std::pair<COMPARE_RESULT, std::string> CompareOrtValue(const OrtValue& o, const
for (size_t i = 0; i < expected_tensor_count; ++i) {
auto res = CompareTwoTensors(actual_tensor_seq.Get(i), expected_tensor_seq.Get(i), per_sample_tolerance, relative_per_sample_tolerance,
- post_processing);
+ post_processing, out_idx);
if (res.first != COMPARE_RESULT::SUCCESS) {
return res;
}
diff --git a/onnxruntime/test/util/include/compare_ortvalue.h b/onnxruntime/test/util/include/compare_ortvalue.h
index 24b74b9002..8269346528 100644
--- a/onnxruntime/test/util/include/compare_ortvalue.h
+++ b/onnxruntime/test/util/include/compare_ortvalue.h
@@ -24,7 +24,9 @@ enum class COMPARE_RESULT { SUCCESS,
std::pair<COMPARE_RESULT, std::string> CompareOrtValue(const OrtValue& real, const OrtValue& expected,
double per_sample_tolerance,
double relative_per_sample_tolerance, bool post_processing);
-
+std::pair<COMPARE_RESULT, std::string> CompareOrtValue(const OrtValue& real, const OrtValue& expected,
+ double per_sample_tolerance,
+ double relative_per_sample_tolerance, bool post_processing, int32_t out_idx);
// verify if the 'value' matches the 'expected' ValueInfoProto. 'value' is a model output
std::pair<COMPARE_RESULT, std::string> VerifyValueInfo(const ONNX_NAMESPACE::ValueInfoProto& expected,
const OrtValue* value);
diff --git a/onnxruntime/test/util/include/test/compare_ortvalue.h b/onnxruntime/test/util/include/test/compare_ortvalue.h
index 545df706c9..170eb9dc4c 100644
--- a/onnxruntime/test/util/include/test/compare_ortvalue.h
+++ b/onnxruntime/test/util/include/test/compare_ortvalue.h
@@ -28,7 +28,9 @@ enum class COMPARE_RESULT {
std::pair<COMPARE_RESULT, std::string> CompareOrtValue(const OrtValue& real, const OrtValue& expected,
double per_sample_tolerance,
double relative_per_sample_tolerance, bool post_processing);
-
+std::pair<COMPARE_RESULT, std::string> CompareOrtValue(const OrtValue& real, const OrtValue& expected,
+ double per_sample_tolerance,
+ double relative_per_sample_tolerance, bool post_processing, int32_t out_idx);
// Compare two OrtValue numerically equal or not. The difference with CompareOrtValue is that this function
// will only check the numerical values of the OrtValue, and ignore the type, shape, etc.
//

View file

@ -0,0 +1,34 @@
diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake
index e0ccc504d7..6c5aa6ea53 100644
--- a/cmake/onnxruntime_mlas.cmake
+++ b/cmake/onnxruntime_mlas.cmake
@@ -335,7 +335,7 @@ else()
${MLAS_SRC_DIR}/qgemm_kernel_udot.cpp
${MLAS_SRC_DIR}/qgemm_kernel_sdot.cpp
)
- if (NOT APPLE)
+ if (NOT APPLE AND NOT onnxruntime_USE_VSINPU)
set(mlas_platform_srcs
${mlas_platform_srcs}
${MLAS_SRC_DIR}/aarch64/HalfGemmKernelNeon.S
diff --git a/onnxruntime/core/mlas/inc/mlas.h b/onnxruntime/core/mlas/inc/mlas.h
index fd6b3df934..f81f1c42b6 100644
--- a/onnxruntime/core/mlas/inc/mlas.h
+++ b/onnxruntime/core/mlas/inc/mlas.h
@@ -79,6 +79,7 @@ Abstract:
#if (!defined(_MSC_VER)) || (_MSC_VER >= 1930)
#if defined(MLAS_TARGET_ARM64) || defined(MLAS_TARGET_ARM64EC)
+#if !defined(USE_VSINPU)
#if !defined(__APPLE__)
// Had to temporary disable fp16 under APPLE ARM64, as compiling
// the source files require a hardware specific compilation flag.
@@ -87,7 +88,8 @@ Abstract:
#define MLAS_F16VEC_INTRINSICS_SUPPORTED
-#endif //
+#endif
+#endif //
#endif // ARM64
#endif // Visual Studio 16 or earlier does not support fp16 intrinsic

View file

@ -0,0 +1,29 @@
import sys
import numpy as np
from numpy.linalg import norm
def read_values(filename):
with open(filename) as file:
values = np.array([float(line.strip()) for line in file])
return values
def cosine_similarity(vec1, vec2):
return np.dot(vec1, vec2) / (norm(vec1) * norm(vec2))
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python cosine_similarity.py <file1> <file2>")
sys.exit(1)
file1 = sys.argv[1]
file2 = sys.argv[2]
vec1 = read_values(file1)
vec2 = read_values(file2)
similarity = cosine_similarity(vec1, vec2)
print(f"Cosine Similarity: {similarity}")

View file

@ -0,0 +1,34 @@
import sys
def read_values(filename):
with open(filename) as file:
values = [(float(line.strip()), i + 1) for i, line in enumerate(file)]
return values
def top_n(values, N):
return sorted(values, key=lambda x: x[0], reverse=True)[:N]
def compare_files(cpu_file, npu_file, N):
cpu_values = read_values(cpu_file)
npu_values = read_values(npu_file)
cpu_topn = top_n(cpu_values, N)
npu_topn = top_n(npu_values, N)
print(f"Top-{N} values in {cpu_file}: {cpu_topn}")
print(f"Top-{N} values in {npu_file}: {npu_topn}")
if __name__ == "__main__":
if len(sys.argv) != 4:
print("Usage: python compare_topn.py <N> <cpu_file> <npu_file>")
sys.exit(1)
N = int(sys.argv[1])
cpu_file = sys.argv[2]
npu_file = sys.argv[3]
compare_files(cpu_file, npu_file, N)

View file

@ -0,0 +1,23 @@
#!/bin/bash
res_file_dir=$1
output_num=$2
# specifying N value
N=5
for i in $(seq 0 $((output_num-1)));
do
# 构建文件名
golden_file="${res_file_dir}/expected_res${i}.txt"
npu_file="${res_file_dir}/npu_res${i}.txt"
echo "Comparing Top-${N} for the output_${i}"
python3 compare_topn.py $N $golden_file $npu_file
echo "--------------------------------"
echo "Comparing Cosine Similarity for output_${i}:"
python3 compare_cosine_sim.py $golden_file $npu_file
echo ""
done

View file

@ -0,0 +1 @@
OrtSessionOptionsAppendExecutionProvider_VSINPU

View file

@ -0,0 +1,296 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <algorithm>
#include "core/providers/vsinpu/vsinpu_ep_graph.h"
#include "core/providers/vsinpu/builders/op_builder_factory.h"
#include "core/providers/vsinpu/vsinpu_util.h"
#include "core/framework/node_unit.h"
#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h"
#include "core/optimizer/qdq_transformer/selectors_actions/shared/utils.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
GraphEP::GraphEP(const onnxruntime::GraphViewer& graph_viewer) : graph_viewer_(graph_viewer) {
Prepare();
context_ = tim::vx::Context::Create();
graph_ = context_->CreateGraph();
compiled_ = false;
}
bool GraphEP::Prepare() {
std::tie(node_unit_holder_, node_unit_map_) = QDQ::GetAllNodeUnits(graph_viewer_);
for (const auto& node_unit : node_unit_holder_) {
auto quant_op_type = util::GetQuantizedOpType(*node_unit);
// Not a qlinear op or qdq node group
if (quant_op_type == util::QuantizedOpType::Unknown)
continue;
const auto add_quantized_input =
[&all_quantized_op_inputs = all_quantized_op_inputs_](const NodeUnit& node_unit, size_t input_idx) {
const auto& input_name = node_unit.Inputs()[input_idx].node_arg.Name();
all_quantized_op_inputs[input_name].push_back(&node_unit);
};
// All quantized ops EXCEPT QuantizeLinear has quantized input
if (quant_op_type != util::QuantizedOpType::QuantizeLinear) {
add_quantized_input(*node_unit, 0);
}
if (util::IsQuantizedBinaryOp(quant_op_type)) {
add_quantized_input(*node_unit, 1);
if (util::IsQuantizedConv(quant_op_type) && node_unit->Inputs().size() == 3) {
add_quantized_input(*node_unit, 2);
}
}
} // All quantized inputs is recorded
return true;
}
bool GraphEP::SupportedOp(const onnxruntime::GraphViewer& graph_viewer,
const NodeUnit& node_unit) {
const auto& supported_builtins = vsi::npu::SupportedBuiltinOps();
const auto& target_node = node_unit.GetNode();
const auto& it = supported_builtins.find(target_node.OpType());
if (supported_builtins.end() != it) {
return it->second->IsSupported(graph_viewer, node_unit);
}
LOGS_DEFAULT(WARNING) << "Fallback unsupported op (node_unit) " << node_unit.OpType()
<< " to cpu.";
return false;
}
bool GraphEP::IsNodeSupportedInGroup(const NodeUnit& node_unit, const GraphViewer& graph_viewer) {
return SupportedOp(graph_viewer, node_unit);
}
const NodeUnit& GraphEP::GetNodeUnit(const Node* node) const {
const auto node_unit_it = node_unit_map_.find(node);
ORT_ENFORCE(node_unit_it != node_unit_map_.end(), "Node does not have corresponding NodeUnit.");
return *node_unit_it->second;
}
void GraphEP::UpdateTensorMap(const std::string& name, const std::shared_ptr<tim::vx::Tensor>& dst_tensor) {
auto it = tensors_.find(name);
if (it != tensors_.end()) {
it->second = dst_tensor;
}
for (auto& IO : graph_inputs_) {
if (IO->name == name) {
IO->tensor = dst_tensor;
break;
}
}
for (auto& IO : graph_outputs_) {
if (IO->name == name) {
IO->tensor = dst_tensor;
break;
}
}
}
std::shared_ptr<NodeIOInfo> GraphEP::ConstructNodeIO(const std::shared_ptr<tim::vx::Operation>& op, std::vector<NodeArg*> input_arg, std::vector<NodeArg*> output_arg) {
auto info = std::make_shared<vsi::npu::NodeIOInfo>();
info->op_ = op;
std::vector<std::string> input_names, output_names;
if (input_arg.empty()) {
info->input_names_ = std::vector<std::string>();
} else {
input_names.reserve(input_arg.size());
std::transform(input_arg.begin(), input_arg.end(), std::back_inserter(input_names),
[](const NodeArg* node) -> std::string {
return node->Name();
});
info->input_names_ = input_names;
}
if (output_arg.empty()) {
info->output_names_ = std::vector<std::string>();
} else {
output_names.reserve(output_arg.size());
std::transform(output_arg.begin(), output_arg.end(), std::back_inserter(output_names),
[](const NodeArg* node) -> std::string {
return node->Name();
});
info->output_names_ = output_names;
}
return info;
}
bool GraphEP::BindTensors(const std::shared_ptr<NodeIOInfo>& nodeio_info) {
auto op = nodeio_info->op_;
auto input_names = nodeio_info->input_names_;
auto output_names = nodeio_info->output_names_;
if (!input_names.empty()) {
for (auto& name : input_names) {
if (tensors_.find(name) == tensors_.end() || tensors_[name] == nullptr) {
LOGS_DEFAULT(ERROR) << "Input tensor not defined or not found!";
return false;
}
(*op).BindInput(tensors_[name]);
}
}
if (!output_names.empty()) {
for (auto& name : output_names) {
if (tensors_.find(name) == tensors_.end() || tensors_[name] == nullptr) {
LOGS_DEFAULT(ERROR) << "Output tensor not defined or not found!";
return false;
}
(*op).BindOutput(tensors_[name]);
}
}
return true;
}
std::shared_ptr<tim::vx::Tensor> GraphEP::MapTIMVXTensor(
std::shared_ptr<tim::vx::Graph>& graph, const NodeUnitIODef nudef,
const NodeUnit& node_unit,
const GraphViewer* graph_viewer, tim::vx::TensorAttribute attribute) {
const auto& arg = nudef.node_arg;
if (tensors_.end() != tensors_.find(nudef.node_arg.Name())) {
// if (!quant_param.has_value() || quant_param.has_value() && tensors_[arg.Name()]->GetSpec().GetQuantization().Type() != tim::vx::QuantType::NONE)
return tensors_.find(arg.Name())->second;
}
auto shape = vsi::npu::util::OnnxShapeToTIMVXShape(vsi::npu::util::GetTensorShape(arg));
std::reverse(shape.begin(), shape.end());
tim::vx::DataType dt = vsi::npu::util::OnnxDtypeToTIMVXDtype(arg.Type());
tim::vx::TensorSpec spec = tim::vx::TensorSpec(dt, shape, attribute);
// Tensors have same name may not have same status of quant_param existence, such as QLinearConv->MaxPool->QLinearConv
// Maxpool output tensor is not set quantization at first pass
bool is_qtensor = nudef.quant_param.has_value() || Contains(all_quantized_op_inputs_, arg.Name());
if (is_qtensor) {
float scale = 0.0f;
int32_t zp = 0;
std::optional<std::vector<float>> scales;
std::optional<std::vector<int32_t>> zps;
if (nudef.quant_param.has_value()) {
util::GetQuantizationScaleAndZeroPoint(graph_viewer_.GetAllInitializedTensors(),
nudef, node_unit.ModelPath(),
scale, zp, scales, zps);
} else {
auto target_nodeunit = all_quantized_op_inputs_[arg.Name()][0];
auto qinput = all_quantized_op_inputs_[arg.Name()][0]->Inputs();
auto it = std::find_if(qinput.begin(), qinput.end(), [&arg](const NodeUnitIODef& nud) { return nud.node_arg.Name() == arg.Name(); });
bool is_conv_bias = std::distance(qinput.begin(), it) == 2;
if (!is_conv_bias || it->quant_param.has_value()) {
util::GetQuantizationScaleAndZeroPoint(graph_viewer_.GetAllInitializedTensors(),
*it, target_nodeunit->ModelPath(),
scale, zp, scales, zps);
} else if (!it->quant_param.has_value()) {
float in_scale, w_scale;
int32_t in_zp, w_zp;
std::optional<std::vector<float>> in_scales, w_scales;
std::optional<std::vector<int32_t>> in_zps, w_zps;
// onnx defines conv bias with non quantization, but it must be quantized in VSINPU support
// The bias scale is set as input_scale * weight_scale if per layer quantized, input_scale* weight_scale[i] if per channel quantized
util::GetQuantizationScaleAndZeroPoint(graph_viewer_.GetAllInitializedTensors(),
qinput[0], target_nodeunit->ModelPath(),
in_scale, in_zp, in_scales, in_zps);
util::GetQuantizationScaleAndZeroPoint(graph_viewer_.GetAllInitializedTensors(),
qinput[1], target_nodeunit->ModelPath(),
w_scale, w_zp, w_scales, w_zps);
scale = in_scale * w_scale;
zp = 0;
if (w_scales) {
std::vector<float> temp;
for (size_t i = 0; i < w_scales->size(); i++) {
temp.push_back(w_scales.value()[i] * in_scale);
}
scales = temp;
}
}
}
tim::vx::Quantization quant;
// per tensor quantization
if (!scales.has_value()) {
quant.SetType(tim::vx::QuantType::ASYMMETRIC);
quant.SetScales({scale});
quant.SetZeroPoints({zp});
} else { // per channel quantization
if (zps.has_value()) {
bool has_nonzero = std::find_if(zps->begin(), zps->end(), [](int elem) { return elem != 0; }) != zps->end();
if (has_nonzero && *arg.Type() == "tensor(uint8)") {
quant.SetType(tim::vx::QuantType::ASYMMETRIC_PER_CHANNEL);
} else {
quant.SetType(tim::vx::QuantType::SYMMETRIC_PER_CHANNEL);
}
quant.SetZeroPoints(zps.value());
} else {
if (*arg.Type() == "tensor(int32)" || zp == 0) {
// set bias quant type
quant.SetType(tim::vx::QuantType::SYMMETRIC_PER_CHANNEL);
} else {
quant.SetType(tim::vx::QuantType::ASYMMETRIC_PER_CHANNEL);
}
quant.SetZeroPoints({zp});
}
quant.SetScales(scales.value());
quant.SetChannelDim(shape.size() - 1);
}
spec.SetQuantization(quant);
}
std::shared_ptr<tim::vx::Tensor> tensor;
if (attribute ==
tim::vx::TensorAttribute::CONSTANT) { // create const tensor
const ONNX_NAMESPACE::TensorProto* tensor_proto =
graph_viewer_.GetConstantInitializer(arg.Name(), true);
std::shared_ptr<uint8_t> unpackedTensor =
vsi::npu::util::UnpackTensor(&arg, *tensor_proto);
const void* valueAddr =
reinterpret_cast<const void*>(unpackedTensor.get());
tensor = graph->CreateTensor(spec, valueAddr);
} else {
tensor = graph->CreateTensor(spec);
}
for (auto& input : graph_inputs_) {
if (input->name == arg.Name()) {
input->tensor = tensor;
input->shape = vsi::npu::util::GetTensorShape(arg);
break;
}
}
for (auto& output : graph_outputs_) {
if (output->name == arg.Name()) {
output->tensor = tensor;
output->shape = utils::GetTensorShapeFromTensorShapeProto(*arg.Shape());
break;
}
}
tensors_.insert({arg.Name(), tensor});
return tensor;
}
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,116 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#pragma once
#include <map>
#include <vector>
#include <string>
#include <memory>
#include <unordered_map>
#include "builders/op_builder.h"
#include "tim/vx/context.h"
#include "tim/vx/graph.h"
#include "tim/vx/tensor.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
struct GraphIOInfo {
std::string name;
bool is_initializer;
std::shared_ptr<tim::vx::Tensor> tensor;
TensorShape shape;
};
struct NodeIOInfo {
std::shared_ptr<tim::vx::Operation> op_;
std::vector<std::string> input_names_;
std::vector<std::string> output_names_;
};
class GraphEP {
public:
explicit GraphEP(const GraphViewer& graph_viewer);
~GraphEP() {}
bool Prepare();
static bool SupportedOp(const onnxruntime::GraphViewer& graph_viewer,
const NodeUnit& node_unit);
// If a node is supported by VSINPU in a partition node group
// `node_outputs_in_group` is the set of the output names of the nodes added to this group so far
static bool IsNodeSupportedInGroup(const NodeUnit& node_unit, const GraphViewer& graph_viewer);
const NodeUnit& GetNodeUnit(const Node* node) const;
bool& GetCompiled() { return compiled_; }
std::shared_ptr<tim::vx::Graph>& GetGraph() { return graph_; }
std::vector<std::shared_ptr<tim::vx::Operation>>& GetOps() { return ops_; }
std::map<std::string, std::shared_ptr<tim::vx::Tensor>>& GetTensors() {
return tensors_;
}
std::vector<std::shared_ptr<GraphIOInfo>>& GetGraphInputs() {
return graph_inputs_;
}
std::vector<std::shared_ptr<GraphIOInfo>>& GetGraphOutputs() {
return graph_outputs_;
}
void UpdateTensorMap(const std::string& name, const std::shared_ptr<tim::vx::Tensor>& dst_tensor);
std::shared_ptr<NodeIOInfo> ConstructNodeIO(const std::shared_ptr<tim::vx::Operation>& op, std::vector<NodeArg*> input_arg, std::vector<NodeArg*> output_arg);
bool BindTensors(const std::shared_ptr<NodeIOInfo>& nodeio_info);
std::shared_ptr<tim::vx::Tensor> MapTIMVXTensor(
std::shared_ptr<tim::vx::Graph>& graph, const NodeUnitIODef nudef,
const NodeUnit& nodeunit,
const GraphViewer* graph_viewer, tim::vx::TensorAttribute attribute);
private:
std::shared_ptr<tim::vx::Context> context_;
std::shared_ptr<tim::vx::Graph> graph_;
std::map<std::string, std::shared_ptr<tim::vx::Tensor>> tensors_;
std::vector<std::shared_ptr<tim::vx::Operation>> ops_;
std::vector<std::shared_ptr<GraphIOInfo>> graph_inputs_;
std::vector<std::shared_ptr<GraphIOInfo>> graph_outputs_;
// Contains all quantized operators' input and the NodeUnit(s) using the input
// In the form of {input_name, [NodeUnit(s) using the input]}
std::unordered_map<std::string, std::vector<const NodeUnit*>> all_quantized_op_inputs_;
const GraphViewer& graph_viewer_;
// Holder for the NodeUnits in the graph, this will guarantee the NodeUnits is
// valid throughout the lifetime of the ModelBuilder
std::vector<std::unique_ptr<NodeUnit>> node_unit_holder_;
std::unordered_map<const Node*, const NodeUnit*> node_unit_map_;
bool compiled_;
};
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,277 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <unordered_map>
#include <string>
#include <unordered_set>
#include "core/framework/compute_capability.h"
#include "core/providers/vsinpu/vsinpu_execution_provider.h"
#include "core/providers/vsinpu/vsinpu_ep_graph.h"
#include "core/providers/vsinpu/builders/op_builder.h"
#include "core/providers/vsinpu/builders/op_builder_factory.h"
#include "core/providers/vsinpu/vsinpu_util.h"
#include "core/framework/kernel_registry.h"
#include "core/framework/node_unit.h"
#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h"
#include "core/optimizer/qdq_transformer/selectors_actions/shared/utils.h"
#include "core/providers/partitioning_utils.h"
namespace onnxruntime {
VSINPUExecutionProvider::VSINPUExecutionProvider(const VSINPUExecutionProviderInfo& info)
: IExecutionProvider{onnxruntime::kVSINPUExecutionProvider},
device_id_(info.device_id) {
AllocatorCreationInfo default_memory_info{
[](int) {
return std::make_unique<CPUAllocator>(
OrtMemoryInfo("VSINPU", OrtAllocatorType::OrtDeviceAllocator));
}};
CreateAllocator(default_memory_info);
AllocatorCreationInfo cpu_memory_info{
[](int) {
return std::make_unique<CPUAllocator>(
OrtMemoryInfo("VSINPU", OrtAllocatorType::OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeCPUOutput));
}};
CreateAllocator(cpu_memory_info);
}
VSINPUExecutionProvider::~VSINPUExecutionProvider() {}
std::vector<std::unique_ptr<ComputeCapability>>
VSINPUExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer,
const IKernelLookup& /*kernel_lookup*/) const {
std::vector<std::unique_ptr<ComputeCapability>> result;
if (graph_viewer.IsSubgraph()) {
return result;
}
for (const auto& tensor : graph_viewer.GetAllInitializedTensors()) {
if (tensor.second->has_data_location()) {
LOGS_DEFAULT(VERBOSE) << "location:" << tensor.second->data_location();
if (tensor.second->data_location() ==
ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL) {
LOGS_DEFAULT(WARNING) << "VSINPU: Initializers with external data location are not "
"currently supported";
return result;
}
}
}
// Get all the NodeUnits in the graph_viewer
std::vector<std::unique_ptr<NodeUnit>> node_unit_holder;
std::unordered_map<const Node*, const NodeUnit*> node_unit_map;
std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer);
// This holds the result of whether a NodeUnit is supported or not,
// to prevent nodes in a NodeUnit to be checked for multiple times
std::unordered_map<const NodeUnit*, bool> node_unit_supported_result;
node_unit_supported_result.reserve(node_unit_holder.size());
std::unordered_set<std::string> node_outputs_in_current_group{};
const auto is_node_supported = [&](const Node& node) -> bool {
const NodeUnit* node_unit = node_unit_map.at(&node);
bool supported = false;
// If we have visited one of the nodes in the node_unit, use the result directly
const auto it = node_unit_supported_result.find(node_unit);
if (it != node_unit_supported_result.cend()) {
supported = it->second;
} else {
// We only check the target node of the node unit
supported = vsi::npu::GraphEP::IsNodeSupportedInGroup(*node_unit, graph_viewer);
node_unit_supported_result[node_unit] = supported;
}
LOGS_DEFAULT(VERBOSE) << "Node supported: [" << supported
<< "] Operator type: [" << node.OpType()
<< "] index: [" << node.Index()
<< "] name: [" << node.Name()
<< "] as part of the NodeUnit type: [" << node_unit->OpType()
<< "] index: [" << node_unit->Index()
<< "] name: [" << node_unit->Name()
<< "]";
if (supported) {
// We want to save all the output names of nodes in the current group for easy query
for (const auto* output : node.OutputDefs()) {
node_outputs_in_current_group.insert(output->Name());
}
}
return supported;
};
const auto on_group_closed = [&](const std::vector<const Node*>& group) -> bool {
// reset per-partition node group tracking
node_outputs_in_current_group.clear();
return true;
};
const auto gen_metadef_name = [&]() {
static size_t group_counter = 0;
return "VSINPU_" + std::to_string(++group_counter);
};
result = utils::CreateSupportedPartitions(graph_viewer, is_node_supported, on_group_closed,
gen_metadef_name, "VSINPU", kVSINPUExecutionProvider, &node_unit_map);
std::for_each(result.begin(), result.end(), [&graph_viewer](auto& capability) {
if (capability && capability->sub_graph && capability->sub_graph->GetMetaDef()) {
const auto* meta_def = capability->sub_graph->GetMetaDef();
bool has_any_non_constant_inputs = std::any_of(meta_def->inputs.begin(), meta_def->inputs.end(), [&graph_viewer](const auto& input) {
return !graph_viewer.IsConstantInitializer(input, true);
});
// ALL inputs are constant
if (!has_any_non_constant_inputs) {
capability.reset();
}
}
});
const auto num_of_partitions = result.size();
const auto num_of_supported_nodes = std::accumulate(
result.begin(), result.end(), size_t{0},
[](const auto& acc, const auto& partition) -> size_t {
return acc + (partition && partition->sub_graph ? partition->sub_graph->nodes.size() : 0);
});
const auto summary_msg = MakeString(
"VSINPUExecutionProvider::GetCapability,",
" number of partitions supported by VSINPU: ", num_of_partitions,
"; number of nodes in the graph: ", graph_viewer.NumberOfNodes(),
"; number of nodes supported by VSINPU: ", num_of_supported_nodes);
// If the graph is partitioned in multiple subgraphs, and this may impact performance,
// we want to give users a summary message at warning level.
if (num_of_partitions > 1) {
LOGS_DEFAULT(WARNING) << summary_msg;
} else {
LOGS_DEFAULT(INFO) << summary_msg;
}
return result;
}
Status ComputeStateFunc(vsi::npu::GraphEP* graph_ep,
OrtKernelContext* context) {
Ort::KernelContext ctx(context);
size_t num_in = ctx.GetInputCount();
const size_t num_inputs = graph_ep->GetGraphInputs().size();
for (size_t i = 0, j = 0; i < num_inputs; i++) {
if (!graph_ep->GetGraphInputs()[i]->is_initializer) {
const auto onnx_input_tensor = ctx.GetInput(i);
const auto tensor_info = onnx_input_tensor.GetTensorTypeAndShapeInfo();
auto origin_tensor = graph_ep->GetGraphInputs()[i]->tensor;
origin_tensor->CopyDataToTensor(onnx_input_tensor.GetTensorRawData(), vsi::npu::util::GetTensorBytes(tensor_info));
j++;
}
}
if (!graph_ep->GetGraph()->Run()) {
LOGS_DEFAULT(ERROR) << "Failed to run graph.";
}
for (size_t i = 0; i < ctx.GetOutputCount(); i++) {
auto timvx_tensor = graph_ep->GetGraphOutputs()[i]->tensor;
auto out_shape = graph_ep->GetGraphOutputs()[i]->shape.GetDims();
auto onnx_output_tensor =
ctx.GetOutput(i, out_shape.data(), out_shape.size());
timvx_tensor->CopyDataFromTensor(const_cast<void*>(onnx_output_tensor.GetTensorRawData()));
}
return Status::OK();
}
Status VSINPUExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>& fused_nodes_and_graphs,
std::vector<NodeComputeInfo>& node_compute_funcs) {
for (const auto& fused_node_graph : fused_nodes_and_graphs) {
const GraphViewer& graph_viewer = fused_node_graph.filtered_graph;
std::shared_ptr<vsi::npu::GraphEP> graph_ep = std::make_shared<vsi::npu::GraphEP>(graph_viewer);
for (auto tensor : graph_viewer.GetInputsIncludingInitializers()) {
LOGS_DEFAULT(VERBOSE) << "subgraph input init:" << vsi::npu::util::PrintNode(*tensor) << "#"
<< graph_viewer.IsInitializedTensor(tensor->Name());
auto input = std::make_shared<vsi::npu::GraphIOInfo>();
input->name = tensor->Name();
input->is_initializer = graph_viewer.IsConstantInitializer(tensor->Name(), true);
graph_ep->GetGraphInputs().push_back(input);
}
for (auto tensor : graph_viewer.GetOutputs()) {
LOGS_DEFAULT(VERBOSE) << "subgraph output:" << vsi::npu::util::PrintNode(*tensor);
auto output = std::make_shared<vsi::npu::GraphIOInfo>();
output->name = tensor->Name();
output->is_initializer = false;
graph_ep->GetGraphOutputs().push_back(output);
}
auto node_indices = graph_viewer.GetNodesInTopologicalOrder();
for (const auto& node_index : node_indices) {
const auto node = graph_viewer.GetNode(node_index);
const NodeUnit& node_unit = graph_ep->GetNodeUnit(node);
// Only add op when we hit the target node
if (node != &node_unit.GetNode()) {
continue;
}
LOGS_DEFAULT(VERBOSE) << "Adding node: [" << node->OpType() << "]";
vsi::npu::SupportedBuiltinOps().at(node->OpType())->BuildOp(graph_ep.get(), graph_viewer, node_unit);
}
LOGS_DEFAULT(INFO) << "Verifying graph";
graph_ep->GetCompiled() = graph_ep->GetGraph()->Compile();
if (!graph_ep->GetCompiled()) {
LOGS_DEFAULT(ERROR) << "Failed to verify graph.";
} else {
LOGS_DEFAULT(INFO) << "Graph has been verified successfully.";
}
NodeComputeInfo compute_info;
compute_info.create_state_func = [graph_ep](ComputeContext* /*context*/,
FunctionState* state) {
*state = graph_ep.get();
return 0;
};
compute_info.compute_func =
[graph_ep, this](FunctionState /*state*/, const OrtApi* /* api */,
OrtKernelContext* context) {
std::lock_guard<OrtMutex> lock(this->GetMutex());
Status res = ComputeStateFunc(graph_ep.get(), context);
return res;
};
compute_info.release_state_func = [](FunctionState /*state*/) {};
node_compute_funcs.push_back(compute_info);
}
return Status::OK();
}
std::shared_ptr<KernelRegistry> VSINPUExecutionProvider::GetKernelRegistry() const {
static std::shared_ptr<KernelRegistry> kernel_registry = std::make_shared<KernelRegistry>();
return kernel_registry;
}
} // namespace onnxruntime

View file

@ -0,0 +1,53 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#pragma once
#include <memory>
#include <vector>
#include "core/framework/execution_provider.h"
#include "core/session/abi_session_options_impl.h"
namespace onnxruntime {
struct VSINPUExecutionProviderInfo {
int device_id{0};
};
class VSINPUExecutionProvider : public IExecutionProvider {
public:
explicit VSINPUExecutionProvider(const VSINPUExecutionProviderInfo& info);
virtual ~VSINPUExecutionProvider();
std::vector<std::unique_ptr<ComputeCapability>> GetCapability(
const onnxruntime::GraphViewer& graph_viewer,
const IKernelLookup& kernel_lookup) const override;
std::shared_ptr<KernelRegistry> GetKernelRegistry() const override;
Status Compile(const std::vector<FusedNodeAndGraph>& fused_nodes_and_graphs,
std::vector<NodeComputeInfo>& node_compute_funcs) override;
OrtMutex& GetMutex() { return mutex_; }
private:
int device_id_;
OrtMutex mutex_;
};
} // namespace onnxruntime

View file

@ -0,0 +1,59 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include "core/framework/compute_capability.h"
#include "core/providers/vsinpu/vsinpu_provider_factory.h"
#include "core/providers/vsinpu/vsinpu_provider_factory_creator.h"
#include "core/providers/vsinpu/vsinpu_execution_provider.h"
namespace onnxruntime {
struct VSINPUProviderFactory : IExecutionProviderFactory {
VSINPUProviderFactory() {}
~VSINPUProviderFactory() override {}
std::unique_ptr<IExecutionProvider> CreateProvider() override;
};
std::unique_ptr<IExecutionProvider> VSINPUProviderFactory::CreateProvider() {
onnxruntime::VSINPUExecutionProviderInfo info;
return std::make_unique<onnxruntime::VSINPUExecutionProvider>(info);
}
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_VSINPU() {
return std::make_shared<onnxruntime::VSINPUProviderFactory>();
}
std::shared_ptr<IExecutionProviderFactory>
VSINPUProviderFactoryCreator::Create() {
return std::make_shared<onnxruntime::VSINPUProviderFactory>();
}
} // namespace onnxruntime
ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_VSINPU,
_In_ OrtSessionOptions* options) {
options->provider_factories.push_back(
onnxruntime::VSINPUProviderFactoryCreator::Create());
return nullptr;
}

View file

@ -0,0 +1,34 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#pragma once
#include <memory>
#include "core/providers/providers.h"
namespace onnxruntime {
struct VSINPUProviderFactoryCreator {
static std::shared_ptr<IExecutionProviderFactory> Create();
};
} // namespace onnxruntime

View file

@ -0,0 +1,502 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <map>
#include <algorithm>
#include <utility>
#include <unordered_set>
#include "core/providers/vsinpu/vsinpu_util.h"
#include "core/optimizer/initializer.h"
#include "core/providers/shared/utils/utils.h"
namespace onnxruntime {
template <typename T>
struct shared_array_deletor {
void operator()(T const* ptr) { delete[] ptr; }
};
namespace vsi {
namespace npu {
namespace util {
tim::vx::DataType OnnxDtypeToTIMVXDtype(const int32_t dtype) {
switch (dtype) {
case onnx::TensorProto_DataType_FLOAT:
return tim::vx::DataType::FLOAT32;
case onnx::TensorProto_DataType_FLOAT16:
return tim::vx::DataType::FLOAT16;
case onnx::TensorProto_DataType_INT8:
return tim::vx::DataType::INT8;
case onnx::TensorProto_DataType_UINT8:
return tim::vx::DataType::UINT8;
case onnx::TensorProto_DataType_INT32:
return tim::vx::DataType::INT32;
case onnx::TensorProto_DataType_INT16:
return tim::vx::DataType::INT16;
case onnx::TensorProto_DataType_UINT16:
return tim::vx::DataType::UINT16;
case onnx::TensorProto_DataType_BOOL:
return tim::vx::DataType::BOOL8;
default:
LOGS_DEFAULT(WARNING) << "Unsupported data type: " << dtype;
break;
}
return tim::vx::DataType::FLOAT32;
}
tim::vx::DataType OnnxDtypeToTIMVXDtype(const ONNX_NAMESPACE::DataType type) {
static const std::map<std::string, tim::vx::DataType> type_table = {
{"tensor(float)", tim::vx::DataType::FLOAT32},
{"tensor(float16)", tim::vx::DataType::FLOAT16},
{"tensor(int8)", tim::vx::DataType::INT8},
{"tensor(uint8)", tim::vx::DataType::UINT8},
{"tensor(int32)", tim::vx::DataType::INT32},
{"tensor(int16)", tim::vx::DataType::INT16},
{"tensor(uint16)", tim::vx::DataType::UINT16},
{"tensor(int64)", tim::vx::DataType::INT64},
{"tensor(bool)", tim::vx::DataType::BOOL8},
};
auto search = type_table.find(*type);
if (search != type_table.end()) {
return search->second;
}
LOGS_DEFAULT(WARNING) << "Unsupported data type: " << *type;
return tim::vx::DataType::FLOAT32;
}
tim::vx::ShapeType OnnxShapeToTIMVXShape(const onnxruntime::TensorShape& ts) {
tim::vx::ShapeType timvx_shape(ts.NumDimensions());
if (ts.NumDimensions() == 0) {
timvx_shape.push_back(1);
} else {
for (size_t i = 0; i < ts.NumDimensions(); i++) {
timvx_shape[i] = ts.GetDims()[i];
}
}
return timvx_shape;
}
std::string PrintNode(const onnxruntime::NodeArg& node_arg) {
auto shape = node_arg.Shape();
if (shape == nullptr) {
return "<null>";
}
std::string s = node_arg.Name() + ":<";
if (shape->dim_size() == 0) {
s += "1>, is a scalar";
return s;
}
for (int i = 0; i < shape->dim_size(); i++) {
auto dim = shape->dim(i);
std::string s1;
std::stringstream ss;
ss << dim.dim_value();
ss >> s1;
s += s1;
if (i < shape->dim_size() - 1) {
s += ",";
} else {
s += ">";
}
}
return s;
}
std::string PrintNode(const std::vector<int64_t> shape) {
if (shape.size() == 0) {
return "<null>";
}
std::string s = "<";
for (std::size_t i = 0; i < shape.size(); i++) {
auto dim = shape[i];
std::string s1;
std::stringstream ss;
ss << dim;
ss >> s1;
s += s1;
if (i < shape.size() - 1) {
s += ",";
} else {
s += ">";
}
}
return s;
}
size_t GetTensorElementSize(const ONNXTensorElementDataType type) {
switch (type) {
case onnx::TensorProto_DataType_INT64:
return 8;
case onnx::TensorProto_DataType_FLOAT:
case onnx::TensorProto_DataType_INT32:
return 4;
case onnx::TensorProto_DataType_FLOAT16:
case onnx::TensorProto_DataType_INT16:
case onnx::TensorProto_DataType_UINT16:
return 2;
case onnx::TensorProto_DataType_INT8:
case onnx::TensorProto_DataType_UINT8:
case onnx::TensorProto_DataType_BOOL:
return 1;
default:
break;
}
return 0;
}
size_t GetTensorBytes(const Ort::TensorTypeAndShapeInfo& info) {
return info.GetElementCount() * GetTensorElementSize(info.GetElementType());
}
TensorShape GetTensorShape(const onnxruntime::NodeArg& node_arg) {
auto shape_proto = node_arg.Shape();
std::vector<int64_t> dims;
if (shape_proto != nullptr) {
for (int i = 0; i < shape_proto->dim_size(); i++) {
auto dim = shape_proto->dim(i);
dims.push_back(dim.dim_value());
}
}
if (dims.size() == 0) {
dims.push_back(1);
}
TensorShape ts(dims);
return ts;
}
std::shared_ptr<uint8_t> UnpackTensor(
const NodeArg* node_arg, const ONNX_NAMESPACE::TensorProto& initializer) {
std::shared_ptr<uint8_t> unpackedTensor;
auto shape = GetTensorShape(*node_arg);
size_t elementCount = shape.Size();
#define CASE_PROTO(X, Y) \
case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X: { \
size_t tensorByteSize = elementCount * sizeof(Y); \
unpackedTensor.reset(new uint8_t[tensorByteSize], \
shared_array_deletor<uint8_t>()); \
auto status = onnxruntime::utils::UnpackTensor( \
initializer, \
initializer.has_raw_data() ? initializer.raw_data().data() : nullptr, \
initializer.has_raw_data() ? initializer.raw_data().size() : 0, \
reinterpret_cast<Y*>(unpackedTensor.get()), elementCount); \
if (!status.IsOK()) { \
LOGS_DEFAULT(ERROR) << "Unpack tensor data failed."; \
} \
break; \
}
switch (initializer.data_type()) {
CASE_PROTO(FLOAT, float);
CASE_PROTO(DOUBLE, double);
CASE_PROTO(BOOL, bool);
CASE_PROTO(INT8, int8_t);
CASE_PROTO(INT16, int16_t);
CASE_PROTO(INT32, int32_t);
CASE_PROTO(INT64, int64_t);
CASE_PROTO(UINT8, uint8_t);
CASE_PROTO(UINT16, uint16_t);
CASE_PROTO(UINT32, uint32_t);
CASE_PROTO(FLOAT16, onnxruntime::MLFloat16);
default:
return nullptr;
}
return unpackedTensor;
}
tim::vx::PadType GetPadType(const std::string type) {
static const std::map<std::string, tim::vx::PadType> type_table = {
{"NOTSET", tim::vx::PadType::AUTO},
{"SAME_UPPER", tim::vx::PadType::SAME},
{"SAME_LOWER", tim::vx::PadType::SAME},
{"VALID", tim::vx::PadType::VALID},
};
auto search = type_table.find(type);
if (search != type_table.end()) {
return search->second;
}
return tim::vx::PadType::NONE;
}
int32_t ReverseAxis(int32_t origin_axis, int32_t length) {
int32_t axis = 0;
if (origin_axis < 0) {
origin_axis += length;
}
axis = length - origin_axis - 1;
return axis;
}
std::vector<int32_t> ReverseAxis(std::vector<int32_t> origin_axes, int32_t length) {
std::vector<int32_t> axes;
for (int32_t& axis : origin_axes) {
if (axis < 0) {
axis += length;
}
axes.push_back(length - axis - 1);
}
std::sort(axes.begin(), axes.end());
return axes;
}
bool IsTypeSupported(const NodeArg* node_arg) {
const auto* type_proto = node_arg->TypeAsProto();
if (!type_proto) {
return false;
}
switch (type_proto->tensor_type().elem_type()) {
case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL:
case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT:
case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16:
case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8:
case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8:
case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32:
case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64:
return true;
default:
return false;
}
}
QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit) {
const auto& op_type = node_unit.OpType();
if (node_unit.UnitType() == NodeUnit::Type::SingleNode) {
if (op_type == "DequantizeLinear")
return QuantizedOpType::DequantizeLinear;
else if (op_type == "QuantizeLinear")
return QuantizedOpType::QuantizeLinear;
else if (op_type == "QLinearConv")
return QuantizedOpType::QLinearConv;
else if (op_type == "QLinearMatMul")
return QuantizedOpType::QLinearMatMul;
else if (op_type == "QLinearAdd")
return QuantizedOpType::QLinearAdd;
else if (op_type == "QLinearMul")
return QuantizedOpType::QLinearMul;
else if (op_type == "QLinearSigmoid")
return QuantizedOpType::QLinearSigmoid;
else if (op_type == "QLinearAveragePool")
return QuantizedOpType::QLinearAveragePool;
} else if (node_unit.UnitType() == NodeUnit::Type::QDQGroup) {
if (op_type == "Conv")
return QuantizedOpType::QDQConv;
else if (op_type == "Resize")
return QuantizedOpType::QDQResize;
else if (op_type == "AveragePool")
return QuantizedOpType::QDQAveragePool;
else if (op_type == "Add")
return QuantizedOpType::QDQAdd;
else if (op_type == "Mul")
return QuantizedOpType::QDQMul;
else if (op_type == "Transpose")
return QuantizedOpType::QDQTranspose;
else if (op_type == "Reshape")
return QuantizedOpType::QDQReshape;
else if (op_type == "Softmax")
return QuantizedOpType::QDQSoftmax;
else if (op_type == "Concat")
return QuantizedOpType::QDQConcat;
else if (op_type == "Gemm")
return QuantizedOpType::QDQGemm;
else if (op_type == "MatMul")
return QuantizedOpType::QDQMatMul;
}
return QuantizedOpType::Unknown;
}
ConvType GetConvType(const NodeUnit& node_unit, const InitializedTensorSet& initializers) {
NodeAttrHelper helper(node_unit);
const auto group = helper.Get("group", 1);
const auto& weight = node_unit.Inputs()[1].node_arg.Name();
const auto& weight_tensor = *initializers.at(weight);
// For ONNX we only have 1 conv ops
// For VSINPU we have 3
// Input is (W, H, C, N)
// group == 1, --> regular conv
// group != 1 && weight is (kW, kH, group, M), --> depthwise conv
// group != 1 && weight is (kW, kH, C/group, M), --> grouped conv
if (group == 1)
return ConvType::Regular;
else if ((weight_tensor.dims()[1] == group))
return ConvType::Depthwise;
else
return ConvType::Grouped;
}
bool IsQuantizedConv(QuantizedOpType quant_op_type) {
return (quant_op_type == QuantizedOpType::QLinearConv) ||
(quant_op_type == QuantizedOpType::QDQConv);
}
bool IsQuantizedPool(QuantizedOpType quant_op_type) {
return (quant_op_type == QuantizedOpType::QLinearAveragePool) ||
(quant_op_type == QuantizedOpType::QDQAveragePool);
}
bool IsQuantizedGemm(QuantizedOpType quant_op_type) {
return (quant_op_type == QuantizedOpType::QLinearMatMul) ||
(quant_op_type == QuantizedOpType::QDQGemm) ||
(quant_op_type == QuantizedOpType::QDQMatMul);
}
bool IsQuantizedBinaryOp(QuantizedOpType quant_op_type) {
return quant_op_type == QuantizedOpType::QLinearMatMul ||
quant_op_type == QuantizedOpType::QLinearAdd ||
quant_op_type == QuantizedOpType::QLinearMul ||
quant_op_type == QuantizedOpType::QDQAdd ||
quant_op_type == QuantizedOpType::QDQMul ||
quant_op_type == QuantizedOpType::QDQGemm ||
quant_op_type == QuantizedOpType::QDQMatMul ||
IsQuantizedConv(quant_op_type);
}
bool HasValidBinaryOpQuantizedInputTypes(const NodeUnit& node_unit) {
auto quant_op_type = GetQuantizedOpType(node_unit);
int32_t a_input_type, b_input_type;
if (!IsQuantizedBinaryOp(quant_op_type)) {
LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType() << "] is not a binary qlinear op";
return false;
}
const auto& inputs = node_unit.Inputs();
if (!GetType(inputs[0].node_arg, a_input_type))
return false;
if (!GetType(inputs[1].node_arg, b_input_type))
return false;
// QlinearConv/MatMul/QDQGemm/QDQMatMul supports u8u8 or u8s8
// QLinearAdd/QLinearMul only support u8u8
bool is_quant_conv_or_gemm = IsQuantizedConv(quant_op_type) || IsQuantizedGemm(quant_op_type);
bool has_valid_qlinear_conv_weight =
(b_input_type == ONNX_NAMESPACE::TensorProto_DataType_UINT8 ||
b_input_type == ONNX_NAMESPACE::TensorProto_DataType_INT8);
bool has_valid_qlinear_conv_input =
(a_input_type == ONNX_NAMESPACE::TensorProto_DataType_UINT8 ||
a_input_type == ONNX_NAMESPACE::TensorProto_DataType_INT8);
if ((is_quant_conv_or_gemm && !has_valid_qlinear_conv_weight) ||
(!is_quant_conv_or_gemm && a_input_type != b_input_type)) {
LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType()
<< "] A Input type: [" << a_input_type
<< "] B Input type: [" << b_input_type
<< "] is not supported for now";
return false;
}
return true;
}
void GetQuantizationScaleAndZeroPoint(
const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path,
float& scale, int32_t& zero_point, std::optional<std::vector<float>>& pcq_scales,
std::optional<std::vector<int32_t>>& pcq_zps) {
scale = 0.0f;
zero_point = 0;
const auto& quant_param = *io_def.quant_param;
{ // get the scale
const auto& name = quant_param.scale.Name();
Initializer unpacked_tensor(*initializers.at(name), model_path);
scale = unpacked_tensor.DataAsSpan<float>()[0];
// per channel quantized handling
if (!unpacked_tensor.dims().empty() && unpacked_tensor.dims()[0] != 0 && unpacked_tensor.dims()[0] != 1) {
auto scales = unpacked_tensor.DataAsSpan<float>();
std::vector<float> scales_vec(scales.begin(), scales.end());
pcq_scales = onnxruntime::make_optional(std::move(scales_vec));
}
}
if (quant_param.zero_point) { // get the zero point if it exists
const auto& name = quant_param.zero_point->Name();
Initializer unpacked_tensor(*initializers.at(name), model_path);
bool is_i8_zp = unpacked_tensor.data_type() == onnx::TensorProto_DataType_INT8;
// some qdq conv bias is int32 quantized
bool is_int32_zp = unpacked_tensor.data_type() == onnx::TensorProto_DataType_INT32;
zero_point = is_i8_zp ? static_cast<int32_t>(unpacked_tensor.DataAsSpan<int8_t>()[0]) : is_int32_zp ? static_cast<int32_t>(unpacked_tensor.DataAsSpan<int32_t>()[0])
: static_cast<int32_t>(unpacked_tensor.DataAsByteSpan()[0]);
// per channel quantized handling
if (!unpacked_tensor.dims().empty() && unpacked_tensor.dims()[0] != 0 && unpacked_tensor.dims()[0] != 1) {
auto type = unpacked_tensor.data_type();
if (is_i8_zp) {
auto zps = unpacked_tensor.DataAsSpan<int8_t>();
std::vector<int32_t> zps_vec(zps.begin(), zps.end());
pcq_zps = onnxruntime::make_optional(std::move(zps_vec));
} else if (is_int32_zp) {
auto zps = unpacked_tensor.DataAsByteSpan();
std::vector<int32_t> zps_vec(zps.begin(), zps.end());
pcq_zps = onnxruntime::make_optional(std::move(zps_vec));
} else {
auto zps = unpacked_tensor.DataAsSpan<int32_t>();
std::vector<int32_t> zps_vec(zps.begin(), zps.end());
pcq_zps = onnxruntime::make_optional(std::move(zps_vec));
}
}
}
}
static bool IsInternalQuantizedNodeUnit(const NodeUnit& node_unit) {
// First, ignore QDQ NodeUnit which is not internal quantized node
if (node_unit.UnitType() == NodeUnit::Type::QDQGroup)
return false;
// These operators can use uint8 input without specific QLinear version of it
// However, the mode has to be internal to the graph/partition (they cannot consume graph inputs)
static const std::unordered_set<std::string> internal_quantized_op_types = {
"Transpose",
"Resize",
"Concat",
"MaxPool",
};
const auto& node = node_unit.GetNode();
if (!Contains(internal_quantized_op_types, node.OpType()))
return false;
int32_t input_type;
ORT_ENFORCE(GetType(*node.InputDefs()[0], input_type));
return input_type == ONNX_NAMESPACE::TensorProto_DataType_UINT8 || input_type == ONNX_NAMESPACE::TensorProto_DataType_INT8;
}
bool GetType(const NodeArg& node_arg, int32_t& type) {
type = ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED;
const auto* type_proto = node_arg.TypeAsProto();
if (!type_proto || !type_proto->has_tensor_type() || !type_proto->tensor_type().has_elem_type()) {
LOGS_DEFAULT(WARNING) << "NodeArg [" << node_arg.Name() << "] has no input type";
return false;
}
type = type_proto->tensor_type().elem_type();
return true;
}
} // namespace util
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -0,0 +1,131 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "core/framework/op_kernel.h"
#include "core/framework/tensor_type_and_shape.h"
#include "core/framework/tensorprotoutils.h"
#include "core/session/onnxruntime_cxx_api.h"
#include "core/framework/node_unit.h"
#include "tim/vx/tensor.h"
#include "tim/vx/types.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
namespace util {
tim::vx::DataType OnnxDtypeToTIMVXDtype(const int32_t dtype);
tim::vx::DataType OnnxDtypeToTIMVXDtype(const ONNX_NAMESPACE::DataType type);
tim::vx::ShapeType OnnxShapeToTIMVXShape(const onnxruntime::TensorShape& ts);
std::string PrintNode(const onnxruntime::NodeArg& node_arg);
std::string PrintNode(const std::vector<int64_t> shape);
size_t GetTensorElementSize(const ONNXTensorElementDataType type);
size_t GetTensorBytes(const Ort::TensorTypeAndShapeInfo& info);
TensorShape GetTensorShape(const onnxruntime::NodeArg& node_arg);
std::shared_ptr<uint8_t> UnpackTensor(
const NodeArg* node, const ONNX_NAMESPACE::TensorProto& initializer);
tim::vx::PadType GetPadType(const std::string type);
int32_t ReverseAxis(int32_t origin_axis, int32_t length);
std::vector<int32_t> ReverseAxis(std::vector<int32_t> origin_axes, int32_t length);
bool IsTypeSupported(const NodeArg* node_arg);
enum class QuantizedOpType : uint8_t {
Unknown, // Unknown or not a quantized NodeUnit
DequantizeLinear,
QuantizeLinear,
QLinearConv,
QLinearMatMul,
QLinearAdd,
QLinearSigmoid,
QLinearAveragePool,
QLinearMul,
// Not yet supported
// QLinearReduceMean,
QDQConv,
QDQResize,
QDQAveragePool,
QDQAdd,
QDQMul,
QDQTranspose,
QDQReshape,
QDQSoftmax,
QDQConcat,
QDQGemm,
QDQMatMul,
// TODO(cfy) :Add other QDQ NodeUnit types
};
enum class ConvType : uint8_t {
Regular,
Depthwise,
Grouped,
};
QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit);
ConvType GetConvType(const NodeUnit& node_unit, const InitializedTensorSet& initializers);
// If this is a quantized Conv (QLinearConv or QDQConv)
bool IsQuantizedConv(QuantizedOpType quant_op_type);
// If this is a quantized Pool (QLinearAveragePool or QDQAveragePool)
bool IsQuantizedPool(QuantizedOpType quant_op_type);
// If this is a quantized Gemm (QLinearMatMul or QDQMatMul/QDQGemm)
bool IsQuantizedGemm(QuantizedOpType quant_op_type);
// This quantized op is an operator or qdq node unit takes 2 inputs and produces 1 output
// Such as QLinearConv, QLinearMatMul, QLinearAdd, QDQConv,...
bool IsQuantizedBinaryOp(QuantizedOpType quant_op_type);
// Check if a qlinear binary op has valid inputs, Qlinear[Conv/MatMul/Add]
bool HasValidBinaryOpQuantizedInputTypes(const NodeUnit& node_unit);
void GetQuantizationScaleAndZeroPoint(
const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path,
float& scale, int32_t& zero_point,
std::optional<std::vector<float>>& pcq_scales,
std::optional<std::vector<int32_t>>& pcq_zps);
bool GetType(const NodeArg& node_arg, int32_t& type);
} // namespace util
} // namespace npu
} // namespace vsi
} // namespace onnxruntime

View file

@ -796,7 +796,7 @@ void LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths
auto test_case_dir = model_info->GetDir();
auto test_case_name_in_log = test_case_name + ORT_TSTR(" in ") + test_case_dir.native();
#if !defined(ORT_MINIMAL_BUILD) && !defined(USE_QNN)
#if !defined(ORT_MINIMAL_BUILD) && !defined(USE_QNN) && !defined(USE_VSINPU)
// to skip some models like *-int8 or *-qdq
if ((reinterpret_cast<OnnxModelInfo*>(model_info.get()))->HasDomain(ONNX_NAMESPACE::AI_ONNX_TRAINING_DOMAIN) ||
(reinterpret_cast<OnnxModelInfo*>(model_info.get()))->HasDomain(ONNX_NAMESPACE::AI_ONNX_PREVIEW_TRAINING_DOMAIN)) {

View file

@ -44,7 +44,7 @@ void usage() {
"\t-r [repeat]: Specifies the number of times to repeat\n"
"\t-v: verbose\n"
"\t-n [test_case_name]: Specifies a single test case to run.\n"
"\t-e [EXECUTION_PROVIDER]: EXECUTION_PROVIDER could be 'cpu', 'cuda', 'dnnl', 'tensorrt', "
"\t-e [EXECUTION_PROVIDER]: EXECUTION_PROVIDER could be 'cpu', 'cuda', 'dnnl', 'tensorrt', 'vsinpu'"
"'openvino', 'rocm', 'migraphx', 'acl', 'armnn', 'xnnpack', 'nnapi', 'qnn', 'snpe' or 'coreml'. "
"Default: 'cpu'.\n"
"\t-p: Pause after launch, can attach debugger and continue\n"
@ -169,6 +169,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
bool enable_mem_pattern = true;
bool enable_qnn = false;
bool enable_nnapi = false;
bool enable_vsinpu = false;
bool enable_coreml = false;
bool enable_snpe = false;
bool enable_dml = false;
@ -248,6 +249,8 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
enable_qnn = true;
} else if (!CompareCString(optarg, ORT_TSTR("nnapi"))) {
enable_nnapi = true;
} else if (!CompareCString(optarg, ORT_TSTR("vsinpu"))) {
enable_vsinpu = true;
} else if (!CompareCString(optarg, ORT_TSTR("coreml"))) {
enable_coreml = true;
} else if (!CompareCString(optarg, ORT_TSTR("snpe"))) {
@ -561,6 +564,14 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
#else
fprintf(stderr, "NNAPI is not supported in this build");
return -1;
#endif
}
if (enable_vsinpu) {
#ifdef USE_VSINPU
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_VSINPU(sf));
#else
fprintf(stderr, "VSINPU is not supported in this build");
return -1;
#endif
}
if (enable_coreml) {

View file

@ -261,6 +261,8 @@ static bool ParseSessionConfigs(const std::string& configs_string,
test_config.machine_config.provider_type_name = onnxruntime::kSnpeExecutionProvider;
} else if (!CompareCString(optarg, ORT_TSTR("nnapi"))) {
test_config.machine_config.provider_type_name = onnxruntime::kNnapiExecutionProvider;
} else if (!CompareCString(optarg, ORT_TSTR("vsinpu"))) {
test_config.machine_config.provider_type_name = onnxruntime::kVSINPUExecutionProvider;
} else if (!CompareCString(optarg, ORT_TSTR("coreml"))) {
test_config.machine_config.provider_type_name = onnxruntime::kCoreMLExecutionProvider;
} else if (!CompareCString(optarg, ORT_TSTR("dml"))) {

View file

@ -397,6 +397,12 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Nnapi(session_options, nnapi_flags));
#else
ORT_THROW("NNAPI is not supported in this build\n");
#endif
} else if (provider_name_ == onnxruntime::kVSINPUExecutionProvider) {
#ifdef USE_VSINPU
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_VSINPU(session_options));
#else
ORT_THROW("VSINPU is not supported in this build\n");
#endif
} else if (provider_name_ == onnxruntime::kCoreMLExecutionProvider) {
#ifdef __APPLE__

View file

@ -428,6 +428,7 @@ bool SetEpsForAllNodes(Graph& graph,
if (provider_type == onnxruntime::kOpenVINOExecutionProvider ||
provider_type == onnxruntime::kTensorrtExecutionProvider ||
provider_type == onnxruntime::kNnapiExecutionProvider ||
provider_type == onnxruntime::kVSINPUExecutionProvider ||
provider_type == onnxruntime::kCoreMLExecutionProvider ||
provider_type == onnxruntime::kDnnlExecutionProvider ||
provider_type == onnxruntime::kQnnExecutionProvider ||
@ -649,6 +650,7 @@ void BaseTester::RunWithConfig(size_t* number_of_pre_packed_weights_counter,
kAclExecutionProvider,
kArmNNExecutionProvider,
kNnapiExecutionProvider,
kVSINPUExecutionProvider,
kRocmExecutionProvider,
kCoreMLExecutionProvider,
kCoreMLExecutionProviderMLProgram,
@ -688,6 +690,8 @@ void BaseTester::RunWithConfig(size_t* number_of_pre_packed_weights_counter,
execution_provider = DefaultTensorrtExecutionProvider();
else if (provider_type == onnxruntime::kNnapiExecutionProvider)
execution_provider = DefaultNnapiExecutionProvider();
else if (provider_type == onnxruntime::kVSINPUExecutionProvider)
execution_provider = DefaultVSINPUExecutionProvider();
else if (provider_type == onnxruntime::kRknpuExecutionProvider)
execution_provider = DefaultRknpuExecutionProvider();
else if (provider_type == onnxruntime::kAclExecutionProvider)

View file

@ -30,6 +30,10 @@
#include "core/providers/nnapi/nnapi_provider_factory.h"
#endif
#ifdef USE_VSINPU
#include "core/providers/vsinpu/vsinpu_provider_factory.h"
#endif
#ifdef USE_RKNPU
#include "core/providers/rknpu/rknpu_provider_factory.h"
#endif
@ -238,6 +242,11 @@ TEST_P(ModelTest, Run) {
ASSERT_ORT_STATUS_OK(OrtSessionOptionsAppendExecutionProvider_Nnapi(ortso, 0));
}
#endif
#ifdef USE_VSINPU
else if (provider_name == "vsinpu") {
ASSERT_ORT_STATUS_OK(OrtSessionOptionsAppendExecutionProvider_VSINPU(ortso));
}
#endif
#ifdef USE_RKNPU
else if (provider_name == "rknpu") {
ASSERT_ORT_STATUS_OK(OrtSessionOptionsAppendExecutionProvider_Rknpu(ortso));
@ -406,6 +415,9 @@ static constexpr ORT_STRING_VIEW provider_name_dnnl = ORT_TSTR("dnnl");
#if defined(USE_NNAPI) && defined(__ANDROID__)
static constexpr ORT_STRING_VIEW provider_name_nnapi = ORT_TSTR("nnapi");
#endif
#ifdef USE_VSINPU
static ORT_STRING_VIEW provider_name_vsinpu = ORT_TSTR("vsinpu");
#endif
#ifdef USE_RKNPU
static constexpr ORT_STRING_VIEW provider_name_rknpu = ORT_TSTR("rknpu");
#endif
@ -447,6 +459,9 @@ static constexpr ORT_STRING_VIEW provider_name_dml = ORT_TSTR("dml");
#if defined(USE_NNAPI) && defined(__ANDROID__)
provider_names[provider_name_nnapi] = {opset7, opset8, opset9, opset10, opset11, opset12, opset13, opset14, opset15, opset16, opset17, opset18};
#endif
#ifdef USE_VSINPU
provider_names[provider_name_vsinpu] = {};
#endif
#ifdef USE_RKNPU
provider_names[provider_name_rknpu] = {};
#endif

View file

@ -190,6 +190,14 @@ std::unique_ptr<IExecutionProvider> DefaultNnapiExecutionProvider() {
#endif
}
std::unique_ptr<IExecutionProvider> DefaultVSINPUExecutionProvider() {
#if defined(USE_VSINPU)
return VSINPUProviderFactoryCreator::Create()->CreateProvider();
#else
return nullptr;
#endif
}
std::unique_ptr<IExecutionProvider> DefaultRknpuExecutionProvider() {
#ifdef USE_RKNPU
return RknpuProviderFactoryCreator::Create()->CreateProvider();

View file

@ -20,6 +20,7 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGrap
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nnapi(
uint32_t flags, const optional<std::string>& partitioning_stop_ops_list);
// std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tvm(const char*);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_VSINPU();
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rknpu();
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rocm(const OrtROCMProviderOptions* provider_options);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params);
@ -50,6 +51,7 @@ std::unique_ptr<IExecutionProvider> MIGraphXExecutionProviderWithOptions(const O
std::unique_ptr<IExecutionProvider> OpenVINOExecutionProviderWithOptions(const OrtOpenVINOProviderOptions* params);
std::unique_ptr<IExecutionProvider> DefaultOpenVINOExecutionProvider();
std::unique_ptr<IExecutionProvider> DefaultNnapiExecutionProvider();
std::unique_ptr<IExecutionProvider> DefaultVSINPUExecutionProvider();
std::unique_ptr<IExecutionProvider> DefaultRknpuExecutionProvider();
std::unique_ptr<IExecutionProvider> DefaultAclExecutionProvider(bool enable_arena = true);
std::unique_ptr<IExecutionProvider> DefaultArmNNExecutionProvider(bool enable_arena = true);

View file

@ -16,6 +16,9 @@
#ifdef USE_NNAPI
#include "core/providers/nnapi/nnapi_provider_factory.h"
#endif
#ifdef USE_VSINPU
#include "core/providers/vsinpu/vsinpu_provider_factory.h"
#endif
#ifdef USE_COREML
#include "core/providers/coreml/coreml_provider_factory.h"
#endif

View file

@ -561,6 +561,7 @@ def parse_arguments():
parser.add_argument("--use_snpe", action="store_true", help="Build with SNPE support.")
parser.add_argument("--snpe_root", help="Path to SNPE SDK root.")
parser.add_argument("--use_nnapi", action="store_true", help="Build with NNAPI support.")
parser.add_argument("--use_vsinpu", action="store_true", help="Build with VSINPU support.")
parser.add_argument(
"--nnapi_min_api", type=int, help="Minimum Android API level to enable NNAPI, should be no less than 27"
)
@ -1020,6 +1021,7 @@ def generate_build_tree(
"-Donnxruntime_BUILD_APPLE_FRAMEWORK=" + ("ON" if args.build_apple_framework else "OFF"),
"-Donnxruntime_USE_DNNL=" + ("ON" if args.use_dnnl else "OFF"),
"-Donnxruntime_USE_NNAPI_BUILTIN=" + ("ON" if args.use_nnapi else "OFF"),
"-Donnxruntime_USE_VSINPU=" + ("ON" if args.use_vsinpu else "OFF"),
"-Donnxruntime_USE_RKNPU=" + ("ON" if args.use_rknpu else "OFF"),
"-Donnxruntime_USE_LLVM=" + ("ON" if args.use_tvm else "OFF"),
"-Donnxruntime_ENABLE_MICROSOFT_INTERNAL=" + ("ON" if args.enable_msinternal else "OFF"),