mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-24 22:17:32 +00:00
Various armv7 related fixes (#5394)
* - Link with libatomic if needed - Install pip differently so it doesn't clash with the system pip which may involve a wrapper script - Remove ability to specify offset when Tensor allocates the data. The data prior to offset isn't accessible by anything. - Fix use of offset in TensorOpTest to work on armv7 where it must be aligned to the type it points to. - Fix ActivationOpNoInfTest.Softsign to allow for armv7 behavior - Fix ReductionOpTest.ReduceMean_*keepdims to allow for armv7 floating point inaccuracy * Address PR comments
This commit is contained in:
parent
b99eaa99cd
commit
a92ccbe1bc
9 changed files with 163 additions and 101 deletions
|
|
@ -305,6 +305,26 @@ if(onnxruntime_DISABLE_EXCEPTIONS)
|
|||
endif()
|
||||
endif()
|
||||
|
||||
# We need to link with libatomic on systems that do not have built-in atomics, or
|
||||
# don't have built-in support for 8 byte atomics
|
||||
# Derived from https://github.com/protocolbuffers/protobuf/blob/master/cmake/CMakeLists.txt
|
||||
set(onnxruntime_LINK_LIBATOMIC false)
|
||||
if (NOT MSVC)
|
||||
include(CheckCXXSourceCompiles)
|
||||
set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} -std=c++11)
|
||||
check_cxx_source_compiles("
|
||||
#include <atomic>
|
||||
int main() {
|
||||
return std::atomic<int64_t>{};
|
||||
}
|
||||
" onnxruntime_HAVE_BUILTIN_ATOMICS)
|
||||
if (NOT onnxruntime_HAVE_BUILTIN_ATOMICS)
|
||||
set(onnxruntime_LINK_LIBATOMIC true)
|
||||
endif ()
|
||||
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
|
||||
endif ()
|
||||
|
||||
set(REPO_ROOT ${PROJECT_SOURCE_DIR}/..)
|
||||
set(ONNXRUNTIME_ROOT ${PROJECT_SOURCE_DIR}/../onnxruntime)
|
||||
set(ORTTRAINING_ROOT ${PROJECT_SOURCE_DIR}/../orttraining)
|
||||
|
|
|
|||
|
|
@ -138,4 +138,10 @@ endif()
|
|||
|
||||
if (onnxruntime_WINML_NAMESPACE_OVERRIDE STREQUAL "Windows")
|
||||
target_compile_definitions(onnxruntime_common PRIVATE "BUILD_INBOX=1")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# check if we need to link against libatomic due to std::atomic usage by the threadpool code
|
||||
# e.g. Raspberry Pi requires this
|
||||
if (onnxruntime_LINK_LIBATOMIC)
|
||||
list(APPEND onnxruntime_EXTERNAL_LIBRARIES atomic)
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
# Import info for 32-bit Qemu based build
|
||||
# There are also raspberry pi 4 and 64-bit images available so adjust as required
|
||||
FROM balenalib/raspberrypi3-python:latest-stretch-build
|
||||
|
||||
ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
|
||||
ARG ONNXRUNTIME_SERVER_BRANCH=master
|
||||
|
||||
#Enforces cross-compilation through Quemu
|
||||
# Enforces cross-compilation through Qemu.
|
||||
RUN [ "cross-build-start" ]
|
||||
|
||||
RUN install_packages \
|
||||
|
|
@ -14,44 +16,41 @@ RUN install_packages \
|
|||
libssl-dev \
|
||||
wget \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-dev \
|
||||
git \
|
||||
tar \
|
||||
libatlas-base-dev
|
||||
|
||||
RUN pip3 install --upgrade pip
|
||||
# Carefully install the latest version of pip
|
||||
WORKDIR /pip
|
||||
RUN wget https://bootstrap.pypa.io/get-pip.py
|
||||
RUN python3 get-pip.py
|
||||
RUN pip3 install --upgrade setuptools
|
||||
RUN pip3 install --upgrade wheel
|
||||
RUN pip3 install numpy
|
||||
|
||||
# Build the latest cmake
|
||||
WORKDIR /code
|
||||
RUN wget https://github.com/Kitware/CMake/releases/download/v3.14.3/cmake-3.14.3.tar.gz
|
||||
RUN tar zxf cmake-3.14.3.tar.gz
|
||||
RUN wget https://github.com/Kitware/CMake/releases/download/v3.18.3/cmake-3.18.3.tar.gz
|
||||
RUN tar zxf cmake-3.18.3.tar.gz
|
||||
|
||||
WORKDIR /code/cmake-3.14.3
|
||||
WORKDIR /code/cmake-3.18.3
|
||||
RUN ./configure --system-curl
|
||||
RUN make
|
||||
RUN sudo make install
|
||||
|
||||
# Set up build args
|
||||
ARG BUILDTYPE=MinSizeRel
|
||||
# if doing a 64-bit build change '--arm' to '--arm64'
|
||||
ARG BUILDARGS="--config ${BUILDTYPE} --arm"
|
||||
|
||||
# Prepare onnxruntime Repo
|
||||
WORKDIR /code
|
||||
RUN git clone --single-branch --branch ${ONNXRUNTIME_SERVER_BRANCH} --recursive ${ONNXRUNTIME_REPO} onnxruntime
|
||||
|
||||
# Start the basic build
|
||||
# Build ORT including the shared lib and python bindings
|
||||
WORKDIR /code/onnxruntime
|
||||
RUN ./build.sh --use_openmp ${BUILDARGS} --update --build
|
||||
|
||||
# Build Shared Library
|
||||
RUN ./build.sh --use_openmp ${BUILDARGS} --build_shared_lib
|
||||
|
||||
# Build Python Bindings and Wheel
|
||||
RUN ./build.sh --use_openmp ${BUILDARGS} --enable_pybind --build_wheel
|
||||
RUN ./build.sh --use_openmp ${BUILDARGS} --update --build --build_shared_lib --build_wheel
|
||||
|
||||
# Build Output
|
||||
RUN ls -l /code/onnxruntime/build/Linux/${BUILDTYPE}/*.so
|
||||
|
|
|
|||
|
|
@ -61,11 +61,12 @@ class Tensor final {
|
|||
Tensor() = default; // to allow creating vector<Tensor> to support seq(tensor)
|
||||
|
||||
/**
|
||||
* Create tensor with given type, shape, pre-allocate memory and allocator info.
|
||||
* Create tensor with given type, shape, pre-allocated memory and allocator info.
|
||||
* This function won't check if the preallocated buffer(p_data) has enough room for the shape.
|
||||
* \param data A preallocated buffer. Can be NULL if the shape is empty.
|
||||
* Tensor does not own the data and will not delete it
|
||||
* \param alloc Where the buffer('data') was allocated from
|
||||
* \param offset Offset in bytes to start of Tensor within p_data.
|
||||
*/
|
||||
Tensor(MLDataType p_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& alloc,
|
||||
ptrdiff_t offset = 0);
|
||||
|
|
@ -74,7 +75,7 @@ class Tensor final {
|
|||
* Deprecated. The orginal design is this Tensor class won't do any allocation / release.
|
||||
* However, this function will allocate the buffer for the shape, and do placement new if p_type is string tensor.
|
||||
*/
|
||||
Tensor(MLDataType p_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator, ptrdiff_t offset = 0);
|
||||
Tensor(MLDataType p_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator);
|
||||
|
||||
~Tensor();
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ SparseTensor::SparseTensor(MLDataType elt_type,
|
|||
void* values_data,
|
||||
void* indices_data,
|
||||
const OrtMemoryInfo& memory_info)
|
||||
: values_(elt_type, TensorShape({static_cast<int64_t>(nnz)}), values_data, memory_info, 0),
|
||||
: values_(elt_type, TensorShape({static_cast<int64_t>(nnz)}), values_data, memory_info),
|
||||
indices_(DataTypeImpl::GetType<int64_t>(),
|
||||
TensorShape({static_cast<int64_t>(nnz), static_cast<int64_t>(shape.NumDimensions())}),
|
||||
indices_data, memory_info, 0),
|
||||
|
|
@ -26,10 +26,10 @@ SparseTensor::SparseTensor(MLDataType elt_type,
|
|||
const TensorShape& shape,
|
||||
size_t nnz,
|
||||
std::shared_ptr<IAllocator> allocator)
|
||||
: values_(elt_type, TensorShape({static_cast<int64_t>(nnz)}), allocator, 0),
|
||||
: values_(elt_type, TensorShape({static_cast<int64_t>(nnz)}), allocator),
|
||||
indices_(DataTypeImpl::GetType<int64_t>(),
|
||||
TensorShape({static_cast<int64_t>(nnz), static_cast<int64_t>(shape.NumDimensions())}),
|
||||
allocator, 0),
|
||||
allocator),
|
||||
shape_(shape) {}
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ Tensor::Tensor(MLDataType p_type, const TensorShape& shape, void* p_data, const
|
|||
Init(p_type, shape, p_data, nullptr, offset);
|
||||
}
|
||||
|
||||
Tensor::Tensor(MLDataType p_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator, ptrdiff_t offset)
|
||||
Tensor::Tensor(MLDataType p_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator)
|
||||
: alloc_info_(allocator->Info()) {
|
||||
ORT_ENFORCE(p_type != nullptr);
|
||||
int64_t shape_size = shape.Size(); // value returned is checked for overflow by TensorShape::Size()
|
||||
|
|
@ -30,13 +30,10 @@ Tensor::Tensor(MLDataType p_type, const TensorShape& shape, std::shared_ptr<IAll
|
|||
if (!allocator->CalcMemSizeForArray(SafeInt<size_t>(shape_size), p_type->Size(), &len))
|
||||
ORT_THROW("tensor failed memory size calculation");
|
||||
|
||||
// TODO: Use case for this isn't clear. We allocate a buffer based on the tensor shape and increase it by offset.
|
||||
// Who is going to use the memory prior to offset, and/or why should it be allocated here?
|
||||
len += offset;
|
||||
p_data = allocator->Alloc(len);
|
||||
}
|
||||
|
||||
Init(p_type, shape, p_data, allocator, offset);
|
||||
Init(p_type, shape, p_data, allocator);
|
||||
}
|
||||
|
||||
size_t Tensor::SizeInBytes() const {
|
||||
|
|
|
|||
|
|
@ -13,42 +13,43 @@
|
|||
namespace onnxruntime {
|
||||
namespace test {
|
||||
template <typename T>
|
||||
void CPUTensorTest(std::vector<int64_t> dims, const int offset = 0) {
|
||||
//not own the buffer
|
||||
TensorShape shape(dims);
|
||||
void CPUTensorTest(std::vector<int64_t> dims, const int offset_elements = 0) {
|
||||
// create Tensor where we provide the buffer
|
||||
TensorShape shape(dims); // this is the shape that will be available starting at the offset in the Tensor
|
||||
auto alloc = TestCPUExecutionProvider()->GetAllocator(0, OrtMemTypeDefault);
|
||||
auto data = alloc->Alloc(sizeof(T) * (shape.Size() + offset));
|
||||
EXPECT_TRUE(data);
|
||||
Tensor t(DataTypeImpl::GetType<T>(), shape, data, alloc->Info(), offset);
|
||||
// alloc extra data if needed, as anything before the offset is not covered by the shape
|
||||
auto num_elements = shape.Size() + offset_elements;
|
||||
auto num_bytes = num_elements * sizeof(T);
|
||||
auto offset_bytes = offset_elements * sizeof(T);
|
||||
void* data = alloc->Alloc(num_bytes);
|
||||
const T* first_element = static_cast<const T*>(data) + offset_elements;
|
||||
|
||||
Tensor t(DataTypeImpl::GetType<T>(), shape, data, alloc->Info(), offset_bytes);
|
||||
auto tensor_shape = t.Shape();
|
||||
//Use reinterpret_cast to bypass a gcc bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51213
|
||||
EXPECT_EQ(*reinterpret_cast<const std::vector<int64_t>*>(&shape), *reinterpret_cast<const std::vector<int64_t>*>(&tensor_shape));
|
||||
EXPECT_EQ(shape.GetDims(), tensor_shape.GetDims());
|
||||
EXPECT_EQ(t.DataType(), DataTypeImpl::GetType<T>());
|
||||
auto& location = t.Location();
|
||||
EXPECT_STREQ(location.name, CPU);
|
||||
EXPECT_EQ(location.id, 0);
|
||||
|
||||
auto t_data = t.template MutableData<T>();
|
||||
EXPECT_TRUE(t_data);
|
||||
memset(t_data, 0, sizeof(T) * shape.Size());
|
||||
EXPECT_EQ(*(T*)((char*)data + offset), (T)0);
|
||||
const T* t_data = t.Data<T>();
|
||||
EXPECT_EQ(first_element, t_data);
|
||||
alloc->Free(data);
|
||||
|
||||
Tensor new_t(DataTypeImpl::GetType<T>(), shape, alloc, offset);
|
||||
// test when the Tensor allocates the buffer.
|
||||
// there's no point using an offset_elements here as you'd be allocating extra data prior to the buffer needed
|
||||
// by the Tensor instance.
|
||||
if (offset_elements == 0) {
|
||||
Tensor new_t(DataTypeImpl::GetType<T>(), shape, alloc);
|
||||
EXPECT_TRUE(new_t.OwnsBuffer());
|
||||
|
||||
tensor_shape = new_t.Shape();
|
||||
//Use reinterpret_cast to bypass a gcc bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51213
|
||||
EXPECT_EQ(*reinterpret_cast<const std::vector<int64_t>*>(&shape), *reinterpret_cast<const std::vector<int64_t>*>(&tensor_shape));
|
||||
EXPECT_EQ(new_t.DataType(), DataTypeImpl::GetType<T>());
|
||||
auto& new_location = new_t.Location();
|
||||
ASSERT_STREQ(new_location.name, CPU);
|
||||
EXPECT_EQ(new_location.id, 0);
|
||||
|
||||
auto new_data = new_t.template MutableData<T>();
|
||||
EXPECT_TRUE(new_data);
|
||||
memset(new_data, 0, sizeof(T) * shape.Size());
|
||||
EXPECT_EQ(*(T*)((char*)new_data + offset), (T)0);
|
||||
//no free op as the tensor own the buffer
|
||||
tensor_shape = new_t.Shape();
|
||||
EXPECT_EQ(shape.GetDims(), tensor_shape.GetDims());
|
||||
EXPECT_EQ(new_t.DataType(), DataTypeImpl::GetType<T>());
|
||||
auto& new_location = new_t.Location();
|
||||
ASSERT_STREQ(new_location.name, CPU);
|
||||
EXPECT_EQ(new_location.id, 0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TensorTest, CPUFloatTensorTest) {
|
||||
|
|
@ -208,11 +209,6 @@ TEST(TensorTest, SizeOverflow) {
|
|||
|
||||
Tensor t(type, shape1, nullptr, alloc->Info());
|
||||
EXPECT_THROW(t.SizeInBytes(), OnnxRuntimeException);
|
||||
|
||||
// overflow due to offset. max/4 from shape, *4 from float size, + 4 from offset
|
||||
TensorShape shape2({static_cast<int64_t>(std::numeric_limits<size_t>::max() / 4)});
|
||||
ptrdiff_t offset = sizeof(float); // one more element to push past max
|
||||
EXPECT_THROW(Tensor(type, shape2, alloc, offset), OnnxRuntimeException);
|
||||
}
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -9,60 +9,60 @@ namespace test {
|
|||
|
||||
TEST_F(ActivationOpTest, Sigmoid) {
|
||||
TestActivationOp("Sigmoid",
|
||||
input_values,
|
||||
[](float x) {
|
||||
auto y = 1.f / (1.f + std::exp(-std::abs(x))); // safe sigmoid
|
||||
y = x > 0 ? y : 1 - y;
|
||||
return y;
|
||||
});
|
||||
input_values,
|
||||
[](float x) {
|
||||
auto y = 1.f / (1.f + std::exp(-std::abs(x))); // safe sigmoid
|
||||
y = x > 0 ? y : 1 - y;
|
||||
return y;
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(ActivationOpTest, HardSigmoid) {
|
||||
float alpha = 0.2f;
|
||||
float beta = 0.5f;
|
||||
TestActivationOp("HardSigmoid",
|
||||
input_values,
|
||||
[alpha, beta](float x) {
|
||||
return std::max(std::min((alpha * x + beta), 1.0f), 0.0f);
|
||||
},
|
||||
{{"alpha", alpha}, {"beta", beta}});
|
||||
input_values,
|
||||
[alpha, beta](float x) {
|
||||
return std::max(std::min((alpha * x + beta), 1.0f), 0.0f);
|
||||
},
|
||||
{{"alpha", alpha}, {"beta", beta}});
|
||||
}
|
||||
|
||||
TEST_F(ActivationOpTest, Tanh) {
|
||||
TestActivationOp("Tanh",
|
||||
input_values,
|
||||
[](float x) { return std::tanh(x); });
|
||||
input_values,
|
||||
[](float x) { return std::tanh(x); });
|
||||
}
|
||||
|
||||
TEST_F(ActivationOpTest, Relu) {
|
||||
TestActivationOp("Relu",
|
||||
input_values,
|
||||
[](float x) { return std::max(x, 0.0f); });
|
||||
input_values,
|
||||
[](float x) { return std::max(x, 0.0f); });
|
||||
}
|
||||
|
||||
TEST_F(ActivationOpTest, Elu) {
|
||||
float alpha = 0.1f;
|
||||
TestActivationOp("Elu",
|
||||
input_values,
|
||||
[alpha](float x) { return (x >= 0) ? x : alpha * (exp(x) - 1); },
|
||||
{{"alpha", alpha}});
|
||||
input_values,
|
||||
[alpha](float x) { return (x >= 0) ? x : alpha * (exp(x) - 1); },
|
||||
{{"alpha", alpha}});
|
||||
}
|
||||
|
||||
TEST_F(ActivationOpTest, LeakyRelu) {
|
||||
float alpha = 0.1f;
|
||||
TestActivationOp("LeakyRelu",
|
||||
input_values,
|
||||
[alpha](float x) { return (x >= 0) ? x : alpha * x; },
|
||||
{{"alpha", alpha}});
|
||||
input_values,
|
||||
[alpha](float x) { return (x >= 0) ? x : alpha * x; },
|
||||
{{"alpha", alpha}});
|
||||
}
|
||||
|
||||
TEST_F(ActivationOpTest, ThresholdedRelu) {
|
||||
float alpha = 0.1f;
|
||||
TestActivationOp(
|
||||
"ThresholdedRelu",
|
||||
input_values,
|
||||
[alpha](float x) { return (x >= alpha) ? x : 0; },
|
||||
{{"alpha", alpha}}, true, 10);
|
||||
"ThresholdedRelu",
|
||||
input_values,
|
||||
[alpha](float x) { return (x >= alpha) ? x : 0; },
|
||||
{{"alpha", alpha}}, true, 10);
|
||||
}
|
||||
|
||||
TEST_F(ActivationOpTest, Selu) {
|
||||
|
|
@ -70,9 +70,9 @@ TEST_F(ActivationOpTest, Selu) {
|
|||
static constexpr float gamma = 1.0507f;
|
||||
|
||||
TestActivationOp("Selu",
|
||||
input_values,
|
||||
[](float x) { return x <= 0 ? gamma * (alpha * exp(x) - alpha) : gamma * x; },
|
||||
{{"alpha", alpha}, {"gamma", gamma}});
|
||||
input_values,
|
||||
[](float x) { return x <= 0 ? gamma * (alpha * exp(x) - alpha) : gamma * x; },
|
||||
{{"alpha", alpha}, {"gamma", gamma}});
|
||||
}
|
||||
|
||||
TEST_F(ActivationOpTest, Selu_Attributes) {
|
||||
|
|
@ -80,9 +80,9 @@ TEST_F(ActivationOpTest, Selu_Attributes) {
|
|||
static constexpr float gamma = 0.5f;
|
||||
|
||||
TestActivationOp("Selu",
|
||||
input_values,
|
||||
[](float x) { return x <= 0 ? gamma * (alpha * exp(x) - alpha) : gamma * x; },
|
||||
{{"alpha", alpha}, {"gamma", gamma}});
|
||||
input_values,
|
||||
[](float x) { return x <= 0 ? gamma * (alpha * exp(x) - alpha) : gamma * x; },
|
||||
{{"alpha", alpha}, {"gamma", gamma}});
|
||||
}
|
||||
|
||||
TEST_F(ActivationOpTest, PRelu) {
|
||||
|
|
@ -145,20 +145,46 @@ TEST_F(ActivationOpTest, PRelu_MultiChannel) {
|
|||
|
||||
TEST_F(ActivationOpTest, Softplus) {
|
||||
TestActivationOp("Softplus",
|
||||
input_values,
|
||||
[](float x) {
|
||||
if (x > 0)
|
||||
return x + logf(expf(-x) + 1);
|
||||
else
|
||||
return logf(expf(x) + 1);
|
||||
});
|
||||
input_values,
|
||||
[](float x) {
|
||||
if (x > 0)
|
||||
return x + logf(expf(-x) + 1);
|
||||
else
|
||||
return logf(expf(x) + 1);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(ActivationOpNoInfTest, Softsign) {
|
||||
TestActivationOp(
|
||||
"Softsign",
|
||||
input_values,
|
||||
[](float x) { return x / (1 + std::abs(x)); }, {}, false); // Disable TensorRT because result mismatches
|
||||
"Softsign",
|
||||
input_values,
|
||||
[](float x) {
|
||||
auto result = x / (1 + std::abs(x));
|
||||
|
||||
#if defined(__arm__)
|
||||
// Softsign uses Eigen inverse(), which on ARM32 results in a different value when x is FLT_MAX or -FLT_MAX
|
||||
// 3.40282347e+38 -> 0 with ARM32 inverse() vs something like 2.939e-39#DEN with other platforms.
|
||||
//
|
||||
// Possibly explained by https://en.wikipedia.org/wiki/ARM_architecture#Advanced_SIMD_(Neon)
|
||||
// 'A quirk of Neon in Armv7 devices is that it flushes all subnormal numbers to zero'
|
||||
//
|
||||
// c.f.
|
||||
// cmake\external\eigen\Eigen\src\Core\arch\SSE\PacketMath.h uses _mm_div_ps for 'pdiv<Packet4f>'
|
||||
// cmake\external\eigen\Eigen\src\Core\arch\NEON\PacketMath.h uses a custom implementation for 'pdiv<Packet4f>'
|
||||
//
|
||||
// Special case the expected values to allow for that. If handling FLT_MAX more consistently is required
|
||||
// we'd need to not use Eigen for Softsign on ARM32.
|
||||
//
|
||||
if (x == FLT_MAX) {
|
||||
result = 0.;
|
||||
} else if (x == -FLT_MAX) {
|
||||
result = -0.;
|
||||
}
|
||||
#endif
|
||||
|
||||
return result;
|
||||
},
|
||||
{}, false); // Disable TensorRT because result mismatches
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
|
|
|
|||
|
|
@ -598,7 +598,7 @@ TEST(ReductionOpTest, ReduceMax_int32) {
|
|||
#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_MYRIAD)
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO: Disabled temporarily
|
||||
#else
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -619,7 +619,7 @@ TEST(ReductionOpTest, ReduceMax_int64) {
|
|||
#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_MYRIAD)
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO: Disabled temporarily
|
||||
#else
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -640,7 +640,7 @@ TEST(ReductionOpTest, ReduceMax_int8) {
|
|||
#if defined(OPENVINO_CONFIG_MYRIAD)
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO: Disabled temporarily
|
||||
#else
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -661,7 +661,7 @@ TEST(ReductionOpTest, ReduceMax_uint8) {
|
|||
#if defined(OPENVINO_CONFIG_MYRIAD)
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO: Disabled temporarily
|
||||
#else
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -720,6 +720,14 @@ TEST(ReductionOpTest, ReduceMean_do_not_keepdims) {
|
|||
55.0f, 1.0f,
|
||||
60.0f, 2.0f});
|
||||
test.AddOutput<float>("reduced", {3, 2}, {12.5f, 1.5f, 35.0f, 1.5f, 57.5f, 1.5f});
|
||||
|
||||
#if defined(__arm__)
|
||||
// armv7 isn't as accurate so need to add a little tolerance for the diffs
|
||||
// expected[i] evaluates to 35,
|
||||
// output[i] evaluates to 34.999866485595703
|
||||
test.SetOutputRelErr("reduced", 1e-5f);
|
||||
#endif
|
||||
|
||||
test.Run();
|
||||
}
|
||||
|
||||
|
|
@ -747,6 +755,14 @@ TEST(ReductionOpTest, ReduceMean_keepdims) {
|
|||
55.0f, 1.0f,
|
||||
60.0f, 2.0f});
|
||||
test.AddOutput<float>("reduced", {3, 1, 2}, {12.5f, 1.5f, 35.0f, 1.5f, 57.5f, 1.5f});
|
||||
|
||||
#if defined(__arm__)
|
||||
// armv7 isn't as accurate so need to add a little tolerance for the diffs
|
||||
// expected[i] evaluates to 35,
|
||||
// output[i] evaluates to 34.999866485595703
|
||||
test.SetOutputRelErr("reduced", 1e-5f);
|
||||
#endif
|
||||
|
||||
test.Run();
|
||||
}
|
||||
|
||||
|
|
@ -764,6 +780,7 @@ TEST(ReductionOpTest, ReduceMean) {
|
|||
9.0f, 10.0f,
|
||||
11.0f, 12.0f});
|
||||
test.AddOutput<float>("reduced", {1, 2, 1}, {5.5f, 7.5f});
|
||||
|
||||
test.Run();
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue