mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-16 01:33:39 +00:00
[VSINPU]Code improvement && Slice/Dropout OP support (#21217)
### Description - Refactor codes to meet line length limit and guard missing warning - Add slice/dropout op support - Move vsinpu ep's cmake settings from onnxruntime_providers.cmake to a separate file - Modify apis with param onnxruntime::Path because this kind is replaced by std:filesystem::path by #20920
This commit is contained in:
parent
cc0de0d526
commit
fffd430091
39 changed files with 365 additions and 59 deletions
|
|
@ -192,32 +192,7 @@ if (onnxruntime_USE_TVM)
|
|||
endif()
|
||||
|
||||
if (onnxruntime_USE_VSINPU)
|
||||
add_definitions(-DUSE_VSINPU=1)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
|
||||
file(GLOB_RECURSE onnxruntime_providers_vsinpu_srcs
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/builders/*.h"
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/builders/*.cc"
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/*.h"
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/*.cc"
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h"
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
|
||||
)
|
||||
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_vsinpu_srcs})
|
||||
add_library(onnxruntime_providers_vsinpu ${onnxruntime_providers_vsinpu_srcs})
|
||||
onnxruntime_add_include_to_target(onnxruntime_providers_vsinpu
|
||||
onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf-lite flatbuffers Boost::mp11
|
||||
safeint_interface nsync::nsync_cpp)
|
||||
add_dependencies(onnxruntime_providers_vsinpu ${onnxruntime_EXTERNAL_DEPENDENCIES})
|
||||
set_target_properties(onnxruntime_providers_vsinpu PROPERTIES FOLDER "ONNXRuntime" LINKER_LANGUAGE CXX)
|
||||
target_include_directories(onnxruntime_providers_vsinpu PRIVATE ${ONNXRUNTIME_ROOT} $ENV{TIM_VX_INSTALL}/include)
|
||||
|
||||
find_library(TIMVX_LIBRARY NAMES tim-vx PATHS $ENV{TIM_VX_INSTALL}/lib NO_DEFAULT_PATH)
|
||||
if(TIMVX_LIBRARY)
|
||||
target_link_libraries(onnxruntime_providers_vsinpu PRIVATE ${TIMVX_LIBRARY})
|
||||
else()
|
||||
message(FATAL_ERROR "Cannot find TIM-VX library!")
|
||||
endif()
|
||||
|
||||
include(onnxruntime_providers_vsinpu.cmake)
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_XNNPACK)
|
||||
|
|
|
|||
37
cmake/onnxruntime_providers_vsinpu.cmake
Normal file
37
cmake/onnxruntime_providers_vsinpu.cmake
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
add_definitions(-DUSE_VSINPU=1)
|
||||
file(GLOB_RECURSE onnxruntime_providers_vsinpu_srcs
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/builders/*.h"
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/builders/*.cc"
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/*.h"
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/*.cc"
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h"
|
||||
"${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
|
||||
)
|
||||
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_vsinpu_srcs})
|
||||
add_library(onnxruntime_providers_vsinpu ${onnxruntime_providers_vsinpu_srcs})
|
||||
onnxruntime_add_include_to_target(onnxruntime_providers_vsinpu
|
||||
onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf-lite flatbuffers Boost::mp11
|
||||
safeint_interface nsync::nsync_cpp)
|
||||
add_dependencies(onnxruntime_providers_vsinpu ${onnxruntime_EXTERNAL_DEPENDENCIES})
|
||||
set_target_properties(onnxruntime_providers_vsinpu PROPERTIES FOLDER "ONNXRuntime" LINKER_LANGUAGE CXX)
|
||||
target_include_directories(onnxruntime_providers_vsinpu PRIVATE ${ONNXRUNTIME_ROOT} $ENV{TIM_VX_INSTALL}/include)
|
||||
|
||||
find_library(TIMVX_LIBRARY NAMES tim-vx PATHS $ENV{TIM_VX_INSTALL}/lib NO_DEFAULT_PATH)
|
||||
if(NOT TIMVX_LIBRARY)
|
||||
message(FATAL_ERROR "TIM-VX library is not found!")
|
||||
endif()
|
||||
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
message(STATUS "VSINPU ep will be cross compiled.")
|
||||
if(EXISTS "$ENV{VIVANTE_SDK_DIR}/drivers")
|
||||
set(DRIVER_DIR "$ENV{VIVANTE_SDK_DIR}/drivers")
|
||||
elseif(EXISTS "$ENV{VIVANTE_SDK_DIR}/lib")
|
||||
set(DRIVER_DIR "$ENV{VIVANTE_SDK_DIR}/lib")
|
||||
else()
|
||||
message(FATAL_ERROR "Neither drivers nor lib directory exists in this VIVANTE_SDK_DIR.")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wl,-rpath-link ${DRIVER_DIR} ${TIMVX_LIBRARY}")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
|
||||
target_link_libraries(onnxruntime_providers_vsinpu PRIVATE ${TIMVX_LIBRARY})
|
||||
endif()
|
||||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -100,6 +100,16 @@ bool BaseOpBuilder::HasSupportedInputOutputs(const InitializedTensorSet& initial
|
|||
}
|
||||
}
|
||||
for (const auto& output : node_unit.Outputs()) {
|
||||
for (const auto& dim : output.node_arg.Shape()->dim()) {
|
||||
if (!dim.has_dim_value()) {
|
||||
LOGS_DEFAULT(WARNING) << "Dynamic shape is not supported for now, for output:" << output.node_arg.Name();
|
||||
return false;
|
||||
}
|
||||
if (dim.dim_value() == 0 && output.node_arg.Shape()->dim_size() > 1) {
|
||||
LOGS_DEFAULT(WARNING) << "Zero in shape is not supported for now, for output:" << output.node_arg.Name();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (output.quant_param.has_value()) {
|
||||
if (!has_supported_shape(output.quant_param->scale, node_unit.Name(), node_unit.OpType()))
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ class BaseOpBuilder : public IOpBuilder {
|
|||
bool IsSupported(const onnxruntime::GraphViewer& graph_viewer,
|
||||
const NodeUnit& node_unit) const override;
|
||||
bool BuildOp(vsi::npu::GraphEP* graph_ep,
|
||||
const onnxruntime::GraphViewer& graph_viewer, const NodeUnit& node_unit);
|
||||
const onnxruntime::GraphViewer& graph_viewer, const NodeUnit& node_unit) override;
|
||||
virtual bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
|
||||
const Node* node) const {
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,81 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2024 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
|
||||
#include "core/providers/shared/utils/utils.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace vsi {
|
||||
namespace npu {
|
||||
class DropoutOpBuilder : public BaseOpBuilder {
|
||||
bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers,
|
||||
const NodeUnit& node_unit) const override {
|
||||
if (node_unit.Inputs().size() > 2) {
|
||||
const ONNX_NAMESPACE::TensorProto* tensor_proto =
|
||||
initializers.at(node_unit.Inputs()[2].node_arg.Name());
|
||||
std::vector<uint8_t> training_mode(1);
|
||||
auto status = onnxruntime::utils::UnpackTensor(
|
||||
*tensor_proto,
|
||||
tensor_proto->has_raw_data() ? tensor_proto->raw_data().data() : nullptr,
|
||||
tensor_proto->has_raw_data() ? tensor_proto->raw_data().size() : 0,
|
||||
training_mode.data(), training_mode.size());
|
||||
if (!status.IsOK()) {
|
||||
LOGS_DEFAULT(ERROR) << "Failed to get data training mode tensor.";
|
||||
return false;
|
||||
}
|
||||
if (training_mode[0] == true) {
|
||||
LOGS_DEFAULT(WARNING) << "Only support inference typed dropout now.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (node_unit.Inputs().size() > 1) return false;
|
||||
return true;
|
||||
}
|
||||
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
|
||||
const Node* node) const override {
|
||||
NodeAttrHelper helper(*node);
|
||||
if (helper.HasAttr("seed")) {
|
||||
LOGS_DEFAULT(WARNING) << "Not support seed in Dropout op.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
|
||||
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
|
||||
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
|
||||
const NodeUnit& node_unit) override {
|
||||
LOGS_DEFAULT(VERBOSE) << "Creating DropOut Op.";
|
||||
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Dropout>(1.0);
|
||||
(*op).BindInput(inputs[0]).BindOutputs(outputs);
|
||||
graph_ep->GetOps().push_back(std::move(op));
|
||||
return true;
|
||||
}
|
||||
};
|
||||
} // namespace npu
|
||||
|
||||
} // namespace vsi
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -22,6 +22,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
@ -136,8 +137,10 @@ class ResizeOpBuilder : public BaseOpBuilder {
|
|||
for (int i = 0; i < input_shape.size(); i++) {
|
||||
out_shape[i] = input_shape[i] * scales[input_shape.size() - 1 - i];
|
||||
}
|
||||
target_h = static_cast<int>(out_shape[1]);
|
||||
target_w = static_cast<int>(out_shape[0]);
|
||||
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Resize>(resize_type, 0, align_corners,
|
||||
half_pixel_center, out_shape[1], out_shape[0]);
|
||||
half_pixel_center, target_h, target_w);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,148 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2024 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
|
||||
#include "core/providers/common.h"
|
||||
#include "core/providers/shared/utils/utils.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace vsi {
|
||||
namespace npu {
|
||||
enum SliceInputs {
|
||||
data = 0,
|
||||
starts = 1,
|
||||
ends = 2,
|
||||
axes = 3,
|
||||
steps = 4
|
||||
};
|
||||
|
||||
class SliceOpBuilder : public BaseOpBuilder {
|
||||
public:
|
||||
int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const override { return 10; }
|
||||
|
||||
bool HasSupportedInputOutputsImpl(const InitializedTensorSet& initializers,
|
||||
const NodeUnit& node_unit) const override {
|
||||
for (size_t i = 0; i < node_unit.Inputs().size(); ++i) {
|
||||
const auto& iodef = node_unit.Inputs()[i];
|
||||
if (!util::IsTypeSupported(&iodef.node_arg) ||
|
||||
(i == 0 && *iodef.node_arg.Type() == "tensor(int64)") ||
|
||||
(i != 0 && !Contains(initializers, iodef.node_arg.Name()))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void CopyTensorDataToVector(const std::shared_ptr<tim::vx::Tensor>& tensor, std::vector<int32_t>& vec) {
|
||||
std::vector<T> data(tensor->GetSpec().GetElementNum());
|
||||
tensor->CopyDataFromTensor(data.data());
|
||||
std::transform(data.begin(), data.end(), vec.begin(), [](T val) {
|
||||
return static_cast<int32_t>(std::clamp(val, static_cast<T>(std::numeric_limits<int32_t>::min()),
|
||||
static_cast<T>(std::numeric_limits<int32_t>::max())));
|
||||
});
|
||||
}
|
||||
|
||||
void ProcessAxes(const std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
|
||||
int dims, bool full_axes,
|
||||
std::vector<int32_t>& timvx_starts,
|
||||
std::vector<int32_t>& timvx_ends,
|
||||
std::vector<int32_t>& timvx_strides) {
|
||||
auto num_elements = full_axes ? dims : inputs[SliceInputs::axes]->GetSpec().GetElementNum();
|
||||
std::vector<int32_t> onnx_starts(num_elements), onnx_ends(num_elements),
|
||||
onnx_axes(num_elements), onnx_strides(num_elements, 1);
|
||||
|
||||
auto data_type = inputs[SliceInputs::starts]->GetSpec().GetDataType();
|
||||
std::iota(onnx_axes.begin(), onnx_axes.end(), 0);
|
||||
if (data_type == tim::vx::DataType::INT64) {
|
||||
CopyTensorDataToVector<int64_t>(inputs[SliceInputs::starts], onnx_starts);
|
||||
CopyTensorDataToVector<int64_t>(inputs[SliceInputs::ends], onnx_ends);
|
||||
if (inputs.size() > 3) {
|
||||
CopyTensorDataToVector<int64_t>(inputs[SliceInputs::axes], onnx_axes);
|
||||
if (inputs.size() == 5) {
|
||||
CopyTensorDataToVector<int64_t>(inputs[SliceInputs::steps], onnx_strides);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
CopyTensorDataToVector<int32_t>(inputs[SliceInputs::starts], onnx_starts);
|
||||
CopyTensorDataToVector<int32_t>(inputs[SliceInputs::ends], onnx_ends);
|
||||
if (inputs.size() > 3) {
|
||||
CopyTensorDataToVector<int32_t>(inputs[SliceInputs::axes], onnx_axes);
|
||||
if (inputs.size() == 5) {
|
||||
CopyTensorDataToVector<int32_t>(inputs[SliceInputs::steps], onnx_strides);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!full_axes) {
|
||||
for (auto& axis : onnx_axes) {
|
||||
axis = HandleNegativeAxis(axis, inputs[0]->GetShape().size());
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < dims; ++i) {
|
||||
if (full_axes || std::find(onnx_axes.begin(), onnx_axes.end(), i) != onnx_axes.end()) {
|
||||
int axes_index = std::distance(onnx_axes.begin(), std::find(onnx_axes.begin(), onnx_axes.end(), i));
|
||||
timvx_starts[i] = onnx_starts[axes_index];
|
||||
timvx_ends[i] = onnx_ends[axes_index];
|
||||
if (inputs.size() == 5) {
|
||||
timvx_strides[i] = onnx_strides[axes_index];
|
||||
}
|
||||
} else if (!full_axes) {
|
||||
timvx_starts[i] = 0;
|
||||
timvx_ends[i] = inputs[SliceInputs::data]->GetShape()[dims - i - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
|
||||
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
|
||||
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
|
||||
const NodeUnit& node_unit) override {
|
||||
LOGS_DEFAULT(VERBOSE) << "Creating Slice Op.";
|
||||
auto total_dims = inputs[SliceInputs::data]->GetShape().size();
|
||||
bool full_axes = inputs.size() <= 3 || (inputs[SliceInputs::axes]->GetSpec().GetElementNum() == total_dims);
|
||||
std::vector<int32_t> timvx_starts(total_dims), timvx_ends(total_dims), timvx_strides(total_dims, 1);
|
||||
|
||||
ProcessAxes(inputs, total_dims, full_axes, timvx_starts, timvx_ends, timvx_strides);
|
||||
|
||||
std::reverse(timvx_starts.begin(), timvx_starts.end());
|
||||
std::reverse(timvx_ends.begin(), timvx_ends.end());
|
||||
std::reverse(timvx_strides.begin(), timvx_strides.end());
|
||||
|
||||
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::StridedSlice>(
|
||||
timvx_starts, timvx_ends, timvx_strides, 0, 0, 0);
|
||||
op->BindInput(inputs[SliceInputs::data]).BindOutputs(outputs);
|
||||
graph_ep->GetOps().push_back(std::move(op));
|
||||
return true;
|
||||
}
|
||||
};
|
||||
} // namespace npu
|
||||
} // namespace vsi
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
@ -67,7 +68,8 @@ class SoftmaxOpBuilder : public BaseOpBuilder {
|
|||
auto reshaped_spec = inputs[0]->GetSpec().AsTransientSpec().SetShape(
|
||||
std::vector<uint32_t>{first_dim, last_dim});
|
||||
auto reshaped_input = graph_ep->GetGraph()->CreateTensor(reshaped_spec);
|
||||
auto reshaped_output = graph_ep->GetGraph()->CreateTensor(inputs[0]->GetSpec().AsTransientSpec());
|
||||
auto reshaped_output = graph_ep->GetGraph()->CreateTensor(
|
||||
inputs[0]->GetSpec().AsTransientSpec());
|
||||
|
||||
auto reshape_input_op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Reshape>(
|
||||
std::vector<uint32_t>{first_dim, last_dim});
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@
|
|||
#include "impl/unsqueeze_op_builder.h"
|
||||
#include "impl/resize_op_builder.h"
|
||||
#include "impl/cast_op_builder.h"
|
||||
#include "impl/dropout_op_builder.h"
|
||||
#include "impl/slice_op_builder.h"
|
||||
namespace onnxruntime {
|
||||
namespace vsi {
|
||||
namespace npu {
|
||||
|
|
@ -108,7 +110,8 @@ static const std::map<std::string, createIOpBuildItemFunc> reg = {
|
|||
REGISTER_OP_BUILDER("Unsqueeze", UnsqueezeOpBuilder),
|
||||
REGISTER_OP_BUILDER("Resize", ResizeOpBuilder),
|
||||
REGISTER_OP_BUILDER("Cast", CastOpBuilder),
|
||||
|
||||
REGISTER_OP_BUILDER("Dropout", DropoutOpBuilder),
|
||||
REGISTER_OP_BUILDER("Slice", SliceOpBuilder)
|
||||
#undef REGISTER_OP_BUILDER
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,34 +1,35 @@
|
|||
diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake
|
||||
index e0ccc504d7..6c5aa6ea53 100644
|
||||
index 304aa77f54..5c22b7097b 100644
|
||||
--- a/cmake/onnxruntime_mlas.cmake
|
||||
+++ b/cmake/onnxruntime_mlas.cmake
|
||||
@@ -335,7 +335,7 @@ else()
|
||||
${MLAS_SRC_DIR}/qgemm_kernel_udot.cpp
|
||||
${MLAS_SRC_DIR}/qgemm_kernel_sdot.cpp
|
||||
@@ -354,7 +354,7 @@ else()
|
||||
)
|
||||
set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon.cpp
|
||||
PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+dotprod")
|
||||
- if (NOT APPLE)
|
||||
+ if (NOT APPLE AND NOT onnxruntime_USE_VSINPU)
|
||||
set(mlas_platform_srcs
|
||||
${mlas_platform_srcs}
|
||||
${MLAS_SRC_DIR}/aarch64/HalfGemmKernelNeon.S
|
||||
diff --git a/onnxruntime/core/mlas/inc/mlas.h b/onnxruntime/core/mlas/inc/mlas.h
|
||||
index fd6b3df934..f81f1c42b6 100644
|
||||
index cdfd283899..678a055b24 100644
|
||||
--- a/onnxruntime/core/mlas/inc/mlas.h
|
||||
+++ b/onnxruntime/core/mlas/inc/mlas.h
|
||||
@@ -79,6 +79,7 @@ Abstract:
|
||||
@@ -82,6 +82,9 @@ Abstract:
|
||||
|
||||
#if (!defined(_MSC_VER)) || (_MSC_VER >= 1930)
|
||||
#if defined(MLAS_TARGET_ARM64) || defined(MLAS_TARGET_ARM64EC)
|
||||
+#if !defined(USE_VSINPU)
|
||||
+// Had to tempory disable fp16 under VeriSilicon ARM64 to avoid
|
||||
+// conflict of compilation flag.
|
||||
#if !defined(__APPLE__)
|
||||
// Had to temporary disable fp16 under APPLE ARM64, as compiling
|
||||
// the source files require a hardware specific compilation flag.
|
||||
@@ -87,7 +88,8 @@ Abstract:
|
||||
@@ -90,6 +93,7 @@ Abstract:
|
||||
|
||||
#define MLAS_F16VEC_INTRINSICS_SUPPORTED
|
||||
|
||||
-#endif //
|
||||
+#endif
|
||||
+#endif //
|
||||
#endif //
|
||||
#endif // ARM64
|
||||
#endif // Visual Studio 16 or earlier does not support fp16 intrinsic
|
||||
|
|
|
|||
|
|
@ -113,7 +113,9 @@ void GraphEP::UpdateTensorMap(const std::string& name, const std::shared_ptr<tim
|
|||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<NodeIOInfo> GraphEP::ConstructNodeIO(const std::shared_ptr<tim::vx::Operation>& op, std::vector<NodeArg*> input_arg, std::vector<NodeArg*> output_arg) {
|
||||
std::shared_ptr<NodeIOInfo> GraphEP::ConstructNodeIO(const std::shared_ptr<tim::vx::Operation>& op,
|
||||
std::vector<NodeArg*> input_arg,
|
||||
std::vector<NodeArg*> output_arg) {
|
||||
auto info = std::make_shared<vsi::npu::NodeIOInfo>();
|
||||
info->op_ = op;
|
||||
std::vector<std::string> input_names, output_names;
|
||||
|
|
@ -173,7 +175,6 @@ std::shared_ptr<tim::vx::Tensor> GraphEP::MapTIMVXTensor(
|
|||
const auto& arg = nudef.node_arg;
|
||||
|
||||
if (tensors_.end() != tensors_.find(nudef.node_arg.Name())) {
|
||||
// if (!quant_param.has_value() || quant_param.has_value() && tensors_[arg.Name()]->GetSpec().GetQuantization().Type() != tim::vx::QuantType::NONE)
|
||||
return tensors_.find(arg.Name())->second;
|
||||
}
|
||||
auto shape = vsi::npu::util::OnnxShapeToTIMVXShape(vsi::npu::util::GetTensorShape(arg));
|
||||
|
|
@ -190,16 +191,18 @@ std::shared_ptr<tim::vx::Tensor> GraphEP::MapTIMVXTensor(
|
|||
std::optional<std::vector<float>> scales;
|
||||
std::optional<std::vector<int32_t>> zps;
|
||||
if (nudef.quant_param.has_value()) {
|
||||
util::GetQuantizationScaleAndZeroPoint(graph_viewer_.GetAllInitializedTensors(),
|
||||
util::GetQuantizationScaleAndZeroPoint(graph_viewer_,
|
||||
nudef, node_unit.ModelPath(),
|
||||
scale, zp, scales, zps);
|
||||
} else {
|
||||
auto target_nodeunit = all_quantized_op_inputs_[arg.Name()][0];
|
||||
auto qinput = all_quantized_op_inputs_[arg.Name()][0]->Inputs();
|
||||
auto it = std::find_if(qinput.begin(), qinput.end(), [&arg](const NodeUnitIODef& nud) { return nud.node_arg.Name() == arg.Name(); });
|
||||
auto it = std::find_if(qinput.begin(), qinput.end(), [&arg](const NodeUnitIODef& nud) {
|
||||
return nud.node_arg.Name() == arg.Name();
|
||||
});
|
||||
bool is_conv_bias = std::distance(qinput.begin(), it) == 2;
|
||||
if (!is_conv_bias || it->quant_param.has_value()) {
|
||||
util::GetQuantizationScaleAndZeroPoint(graph_viewer_.GetAllInitializedTensors(),
|
||||
util::GetQuantizationScaleAndZeroPoint(graph_viewer_,
|
||||
*it, target_nodeunit->ModelPath(),
|
||||
scale, zp, scales, zps);
|
||||
} else if (!it->quant_param.has_value()) {
|
||||
|
|
@ -209,11 +212,12 @@ std::shared_ptr<tim::vx::Tensor> GraphEP::MapTIMVXTensor(
|
|||
std::optional<std::vector<int32_t>> in_zps, w_zps;
|
||||
|
||||
// onnx defines conv bias with non quantization, but it must be quantized in VSINPU support
|
||||
// The bias scale is set as input_scale * weight_scale if per layer quantized, input_scale* weight_scale[i] if per channel quantized
|
||||
util::GetQuantizationScaleAndZeroPoint(graph_viewer_.GetAllInitializedTensors(),
|
||||
// The bias scale is set as input_scale * weight_scale if per layer quantized,
|
||||
// otherwise input_scale* weight_scale[i] if per channel quantized
|
||||
util::GetQuantizationScaleAndZeroPoint(graph_viewer_,
|
||||
qinput[0], target_nodeunit->ModelPath(),
|
||||
in_scale, in_zp, in_scales, in_zps);
|
||||
util::GetQuantizationScaleAndZeroPoint(graph_viewer_.GetAllInitializedTensors(),
|
||||
util::GetQuantizationScaleAndZeroPoint(graph_viewer_,
|
||||
qinput[1], target_nodeunit->ModelPath(),
|
||||
w_scale, w_zp, w_scales, w_zps);
|
||||
scale = in_scale * w_scale;
|
||||
|
|
|
|||
|
|
@ -82,7 +82,8 @@ class GraphEP {
|
|||
|
||||
void UpdateTensorMap(const std::string& name, const std::shared_ptr<tim::vx::Tensor>& dst_tensor);
|
||||
|
||||
std::shared_ptr<NodeIOInfo> ConstructNodeIO(const std::shared_ptr<tim::vx::Operation>& op, std::vector<NodeArg*> input_arg, std::vector<NodeArg*> output_arg);
|
||||
std::shared_ptr<NodeIOInfo> ConstructNodeIO(const std::shared_ptr<tim::vx::Operation>& op,
|
||||
std::vector<NodeArg*> input_arg, std::vector<NodeArg*> output_arg);
|
||||
|
||||
bool BindTensors(const std::shared_ptr<NodeIOInfo>& nodeio_info);
|
||||
|
||||
|
|
|
|||
|
|
@ -137,9 +137,10 @@ VSINPUExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_vie
|
|||
std::for_each(result.begin(), result.end(), [&graph_viewer](auto& capability) {
|
||||
if (capability && capability->sub_graph && capability->sub_graph->GetMetaDef()) {
|
||||
const auto* meta_def = capability->sub_graph->GetMetaDef();
|
||||
bool has_any_non_constant_inputs = std::any_of(meta_def->inputs.begin(), meta_def->inputs.end(), [&graph_viewer](const auto& input) {
|
||||
return !graph_viewer.IsConstantInitializer(input, true);
|
||||
});
|
||||
bool has_any_non_constant_inputs = std::any_of(meta_def->inputs.begin(),
|
||||
meta_def->inputs.end(), [&graph_viewer](const auto& input) {
|
||||
return !graph_viewer.IsConstantInitializer(input, true);
|
||||
});
|
||||
|
||||
// ALL inputs are constant
|
||||
if (!has_any_non_constant_inputs) {
|
||||
|
|
@ -184,7 +185,8 @@ Status ComputeStateFunc(vsi::npu::GraphEP* graph_ep,
|
|||
const auto tensor_info = onnx_input_tensor.GetTensorTypeAndShapeInfo();
|
||||
|
||||
auto origin_tensor = graph_ep->GetGraphInputs()[i]->tensor;
|
||||
origin_tensor->CopyDataToTensor(onnx_input_tensor.GetTensorRawData(), vsi::npu::util::GetTensorBytes(tensor_info));
|
||||
origin_tensor->CopyDataToTensor(onnx_input_tensor.GetTensorRawData(),
|
||||
vsi::npu::util::GetTensorBytes(tensor_info));
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -412,7 +412,7 @@ bool HasValidBinaryOpQuantizedInputTypes(const NodeUnit& node_unit) {
|
|||
}
|
||||
|
||||
void GetQuantizationScaleAndZeroPoint(
|
||||
const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path,
|
||||
const GraphViewer& graph_viewer, const NodeUnitIODef& io_def, const std::filesystem::path& model_path,
|
||||
float& scale, int32_t& zero_point, std::optional<std::vector<float>>& pcq_scales,
|
||||
std::optional<std::vector<int32_t>>& pcq_zps) {
|
||||
scale = 0.0f;
|
||||
|
|
@ -421,7 +421,11 @@ void GetQuantizationScaleAndZeroPoint(
|
|||
const auto& quant_param = *io_def.quant_param;
|
||||
{ // get the scale
|
||||
const auto& name = quant_param.scale.Name();
|
||||
Initializer unpacked_tensor(*initializers.at(name), model_path);
|
||||
const auto* s = graph_viewer.GetConstantInitializer(name);
|
||||
if (!s) {
|
||||
LOGS_DEFAULT(ERROR) << name + " is not a constant initializer";
|
||||
};
|
||||
Initializer unpacked_tensor(*s, model_path);
|
||||
scale = unpacked_tensor.DataAsSpan<float>()[0];
|
||||
|
||||
// per channel quantized handling
|
||||
|
|
@ -434,12 +438,18 @@ void GetQuantizationScaleAndZeroPoint(
|
|||
|
||||
if (quant_param.zero_point) { // get the zero point if it exists
|
||||
const auto& name = quant_param.zero_point->Name();
|
||||
Initializer unpacked_tensor(*initializers.at(name), model_path);
|
||||
const auto* s = graph_viewer.GetConstantInitializer(name);
|
||||
if (!s) {
|
||||
LOGS_DEFAULT(ERROR) << name + " is not a constant initializer";
|
||||
};
|
||||
Initializer unpacked_tensor(*s, model_path);
|
||||
bool is_i8_zp = unpacked_tensor.data_type() == onnx::TensorProto_DataType_INT8;
|
||||
// some qdq conv bias is int32 quantized
|
||||
bool is_int32_zp = unpacked_tensor.data_type() == onnx::TensorProto_DataType_INT32;
|
||||
zero_point = is_i8_zp ? static_cast<int32_t>(unpacked_tensor.DataAsSpan<int8_t>()[0]) : is_int32_zp ? static_cast<int32_t>(unpacked_tensor.DataAsSpan<int32_t>()[0])
|
||||
: static_cast<int32_t>(unpacked_tensor.DataAsByteSpan()[0]);
|
||||
zero_point = is_i8_zp
|
||||
? static_cast<int32_t>(unpacked_tensor.DataAsSpan<int8_t>()[0])
|
||||
: is_int32_zp ? static_cast<int32_t>(unpacked_tensor.DataAsSpan<int32_t>()[0])
|
||||
: static_cast<int32_t>(unpacked_tensor.DataAsByteSpan()[0]);
|
||||
|
||||
// per channel quantized handling
|
||||
if (!unpacked_tensor.dims().empty() && unpacked_tensor.dims()[0] != 0 && unpacked_tensor.dims()[0] != 1) {
|
||||
|
|
@ -482,7 +492,8 @@ static bool IsInternalQuantizedNodeUnit(const NodeUnit& node_unit) {
|
|||
int32_t input_type;
|
||||
ORT_ENFORCE(GetType(*node.InputDefs()[0], input_type));
|
||||
|
||||
return input_type == ONNX_NAMESPACE::TensorProto_DataType_UINT8 || input_type == ONNX_NAMESPACE::TensorProto_DataType_INT8;
|
||||
return input_type == ONNX_NAMESPACE::TensorProto_DataType_UINT8 ||
|
||||
input_type == ONNX_NAMESPACE::TensorProto_DataType_INT8;
|
||||
}
|
||||
|
||||
bool GetType(const NodeArg& node_arg, int32_t& type) {
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ bool IsQuantizedBinaryOp(QuantizedOpType quant_op_type);
|
|||
bool HasValidBinaryOpQuantizedInputTypes(const NodeUnit& node_unit);
|
||||
|
||||
void GetQuantizationScaleAndZeroPoint(
|
||||
const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path,
|
||||
const GraphViewer& graph_viewer, const NodeUnitIODef& io_def, const std::filesystem::path& model_path,
|
||||
float& scale, int32_t& zero_point,
|
||||
std::optional<std::vector<float>>& pcq_scales,
|
||||
std::optional<std::vector<int32_t>>& pcq_zps);
|
||||
|
|
|
|||
|
|
@ -35,8 +35,10 @@ void RunSliceTest(const std::vector<int64_t>& input_dims,
|
|||
excluded_providers.insert(excluded_providers_input.cbegin(), excluded_providers_input.cend());
|
||||
|
||||
// NNAPI EP does not support empty output
|
||||
// VSINPU EP does not support empty output
|
||||
if (std::any_of(output_dims.cbegin(), output_dims.cend(), [](int64_t i) { return i == 0; })) {
|
||||
excluded_providers.insert(kNnapiExecutionProvider);
|
||||
excluded_providers.insert(kVSINPUExecutionProvider);
|
||||
}
|
||||
|
||||
// TODO: ORT behavior when step < 0 and end = INT_MAX is wrong. Fix it and
|
||||
|
|
@ -515,6 +517,9 @@ TEST(SliceTest, Slice1D_ReverseAllAxes_1) {
|
|||
if (DefaultDmlExecutionProvider().get() != nullptr) {
|
||||
GTEST_SKIP() << "Skipping because of the following error: Expected output shape [{2,2}] did not match run output shape [{0,0}] for output";
|
||||
}
|
||||
if (DefaultVSINPUExecutionProvider().get() != nullptr) {
|
||||
GTEST_SKIP() << "Skipping because of the following error: Expected output shape [{4}] did not match run output shape [{0}] for output";
|
||||
}
|
||||
|
||||
RunSliceTest<float>({4},
|
||||
{1.0f, 2.0f, 3.0f, 4.0f},
|
||||
|
|
|
|||
Loading…
Reference in a new issue