mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-03 03:58:54 +00:00
Cast Op performance fix. (#6509)
Update CPU Cast implementation to fix performance regressions. Update Cast unit tests for more coverage.
This commit is contained in:
parent
2ef792ae6e
commit
318b82ca7e
9 changed files with 348 additions and 396 deletions
|
|
@ -59,50 +59,10 @@ struct MLFloat16 {
|
|||
explicit MLFloat16(uint16_t x) : val(x) {}
|
||||
explicit MLFloat16(float f);
|
||||
|
||||
// Taken from https://stackoverflow.com/a/60047308/12627730
|
||||
float AsFloat(uint32_t x) const {
|
||||
float out = 0.0f;
|
||||
std::memcpy(&out, &x, sizeof(x));
|
||||
return out;
|
||||
}
|
||||
|
||||
// Taken from https://stackoverflow.com/a/60047308/12627730
|
||||
uint32_t AsUint(float x) const {
|
||||
uint32_t out = 0;
|
||||
std::memcpy(&out, &x, sizeof(x));
|
||||
return out;
|
||||
}
|
||||
|
||||
float HalfToFloat(const uint16_t x) const {
|
||||
uint16_t half = x;
|
||||
if (endian::native == endian::big) {
|
||||
// Taken from https://stackoverflow.com/a/2182184/12627730
|
||||
half = (x >> 8) | (x << 8);
|
||||
}
|
||||
|
||||
// Taken from https://stackoverflow.com/a/60047308/12627730
|
||||
// IEEE-754 16-bit floating-point format (without infinity): 1-5-10, exp-15, +-131008.0, +-6.1035156E-5,
|
||||
// +-5.9604645E-8, 3.311 digits
|
||||
const uint32_t e = (half & 0x7C00) >> 10; // exponent
|
||||
const uint32_t m = (half & 0x03FF) << 13; // mantissa
|
||||
// evil log2 bit hack to count leading zeros in denormalized format
|
||||
const uint32_t v = AsUint(static_cast<float>(m)) >> 23;
|
||||
uint32_t full = (half & 0x8000) << 16 | (e != 0) * ((e + 112) << 23 | m) |
|
||||
((e == 0) & (m != 0)) * ((v - 37) << 23 | ((m << (150 - v)) & 0x007FE000)); // sign : normalized : denormalized
|
||||
|
||||
if (endian::native == endian::big) {
|
||||
// Taken from https://stackoverflow.com/a/2182184/12627730
|
||||
full = ((full >> 24) & 0xff) | // move byte 3 to byte 0
|
||||
((full << 8) & 0xff0000) | // move byte 1 to byte 2
|
||||
((full >> 8) & 0xff00) | // move byte 2 to byte 1
|
||||
((full << 24) & 0xff000000); // byte 0 to byte 3
|
||||
}
|
||||
|
||||
return AsFloat(full);
|
||||
}
|
||||
float ToFloat() const;
|
||||
|
||||
operator float() const {
|
||||
return HalfToFloat(val);
|
||||
return ToFloat();
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -281,7 +281,7 @@ class ThreadPool {
|
|||
/**
|
||||
* Tries to call the given function in parallel, with calls split into (num_batches) batches.
|
||||
*\param num_batches If it is zero, it will be replaced to the value of DegreeOfParallelism().
|
||||
*\param fn A std::function or STL style functor with signature of "void f(int32_t);"
|
||||
*\param fn A std::function or STL style functor with signature of "void f(std::ptrdiff_t);"
|
||||
* Pitfall: Caller should cap `num_batches` to a reasonable value based on the cost of `fn` and the value of `total`.
|
||||
*For example, if fn is as simple as: int sum=0; fn = [&](int i){sum +=i;} and `total` is 100, then num_batches should
|
||||
*be just 1.
|
||||
|
|
|
|||
|
|
@ -25,6 +25,10 @@ namespace onnxruntime {
|
|||
|
||||
MLFloat16::MLFloat16(float f) : val{math::floatToHalf(f)} {}
|
||||
|
||||
float MLFloat16::ToFloat() const {
|
||||
return math::halfToFloat(val);
|
||||
}
|
||||
|
||||
// Return the MLDataType used for a generic Tensor
|
||||
template <>
|
||||
MLDataType DataTypeImpl::GetType<Tensor>() {
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ static bool GetClipConstantMinMax(const Graph& graph, const Node& node, float& m
|
|||
// value = static_cast<float>(*i.data<double>());
|
||||
// break;
|
||||
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
|
||||
value = math::halfToFloat(i.data<BFloat16>()->val);
|
||||
value = math::halfToFloat(i.data<MLFloat16>()->val);
|
||||
break;
|
||||
default:
|
||||
ORT_THROW("Unexpected data type for Clip input of ", initializer->data_type());
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
// Licensed under the MIT License.
|
||||
|
||||
#include <cstddef>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
#include "boost/mp11.hpp"
|
||||
|
||||
|
|
@ -18,15 +18,13 @@
|
|||
#include "core/providers/op_kernel_type_control.h"
|
||||
#include "core/util/math_cpuonly.h"
|
||||
|
||||
#include "Eigen/src/Core/arch/Default/BFloat16.h"
|
||||
#include "Eigen/src/Core/arch/Default/Half.h"
|
||||
|
||||
#if defined(_M_AMD64)
|
||||
#include "core/mlas/inc/mlas.h"
|
||||
#endif
|
||||
|
||||
using namespace ONNX_NAMESPACE;
|
||||
using namespace boost::mp11;
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
namespace op_kernel_type_control {
|
||||
|
|
@ -56,20 +54,15 @@ using EnabledSrcTypes = ORT_OP_KERNEL_ARG_ENABLED_TYPE_LIST_ALL_OPSETS(kCpuExecu
|
|||
using EnabledDstTypes = ORT_OP_KERNEL_ARG_ENABLED_TYPE_LIST_ALL_OPSETS(kCpuExecutionProvider, kOnnxDomain,
|
||||
Cast, Output, 0);
|
||||
|
||||
using IndirectCastTypes = TypeList<MLFloat16, BFloat16>;
|
||||
|
||||
template <typename Type>
|
||||
using IsDirectCastType = mp_not<mp_contains<IndirectCastTypes, Type>>;
|
||||
|
||||
template <typename... Types>
|
||||
using AreAllDirectCastTypes = mp_all<IsDirectCastType<Types>...>;
|
||||
|
||||
// string cast helpers
|
||||
// Note: when C++17 is available, use <charconv> functions
|
||||
|
||||
// handle floating point input separately
|
||||
// handle floating point output separately
|
||||
template <typename SrcType>
|
||||
typename std::enable_if<std::is_floating_point<SrcType>::value, void>::type
|
||||
CastToString(const SrcType& input, std::string& output) {
|
||||
static_assert(sizeof(SrcType) <= sizeof(double),
|
||||
"largest supported floating point type is double");
|
||||
if (std::isnan(input)) {
|
||||
output = "NaN";
|
||||
} else if (std::isinf(input)) {
|
||||
|
|
@ -79,19 +72,49 @@ CastToString(const SrcType& input, std::string& output) {
|
|||
output = "INF";
|
||||
}
|
||||
} else {
|
||||
// setprecision to 8 to match numpy default behavior
|
||||
std::ostringstream convert;
|
||||
convert << std::setprecision(8) << input;
|
||||
output = convert.str();
|
||||
// set precision to 8 to match numpy default behavior
|
||||
constexpr const char* format = "%.8g";
|
||||
const double value = static_cast<double>(input);
|
||||
|
||||
char static_buffer[256];
|
||||
std::unique_ptr<char[]> dynamic_buffer{};
|
||||
|
||||
gsl::span<char> buffer_span = gsl::make_span(static_buffer);
|
||||
|
||||
auto snprintf_result = std::snprintf(buffer_span.data(), buffer_span.size(), format, value);
|
||||
ORT_ENFORCE(snprintf_result > 0, "snprintf() failed with return value: ", snprintf_result);
|
||||
|
||||
// include trailing '\0'
|
||||
const size_t required_buffer_size = gsl::narrow_cast<size_t>(snprintf_result) + 1;
|
||||
|
||||
if (required_buffer_size > buffer_span.size()) {
|
||||
// didn't get it all, allocate a bigger buffer and retry
|
||||
dynamic_buffer = onnxruntime::make_unique<char[]>(required_buffer_size);
|
||||
buffer_span = gsl::make_span(dynamic_buffer.get(), required_buffer_size);
|
||||
snprintf_result = std::snprintf(buffer_span.data(), buffer_span.size(), format, value);
|
||||
ORT_ENFORCE(
|
||||
snprintf_result > 0 &&
|
||||
gsl::narrow_cast<size_t>(snprintf_result) == buffer_span.size() - 1,
|
||||
"Failed to write value with snprintf().");
|
||||
}
|
||||
|
||||
output.assign(buffer_span.data(), required_buffer_size - 1);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename SrcType>
|
||||
typename std::enable_if<!std::is_floating_point<SrcType>::value, void>::type
|
||||
CastToString(const SrcType& input, std::string& output) {
|
||||
std::ostringstream convert;
|
||||
convert << input;
|
||||
output = convert.str();
|
||||
output = std::to_string(input);
|
||||
}
|
||||
|
||||
// overloads for MLFloat16 and BFloat16
|
||||
void CastToString(const MLFloat16& input, std::string& output) {
|
||||
CastToString(static_cast<float>(input), output);
|
||||
}
|
||||
|
||||
void CastToString(const BFloat16& input, std::string& output) {
|
||||
CastToString(static_cast<float>(input), output);
|
||||
}
|
||||
|
||||
template <typename DstType>
|
||||
|
|
@ -118,115 +141,121 @@ CastFromString(const std::string& input, DstType& output) {
|
|||
output = gsl::narrow_cast<DstType>(std::stoll(input));
|
||||
}
|
||||
|
||||
// generic scalar X -> Y
|
||||
template <typename SrcType, typename DstType>
|
||||
struct ScalarDirectCaster {
|
||||
void Cast(const SrcType& in, DstType& out) const {
|
||||
out = static_cast<DstType>(in);
|
||||
}
|
||||
// overloads for MLFloat16 and BFloat16
|
||||
void CastFromString(const std::string& input, MLFloat16& output) {
|
||||
float intermediate;
|
||||
CastFromString(input, intermediate);
|
||||
output = static_cast<MLFloat16>(intermediate);
|
||||
}
|
||||
|
||||
void CastFromString(const std::string& input, BFloat16& output) {
|
||||
float intermediate;
|
||||
CastFromString(input, intermediate);
|
||||
output = static_cast<BFloat16>(intermediate);
|
||||
}
|
||||
|
||||
// type that is usable with Eigen cast
|
||||
template <typename T>
|
||||
struct EigenCastType {
|
||||
using type = T;
|
||||
};
|
||||
|
||||
// scalar X -> string
|
||||
template <typename SrcType>
|
||||
struct ScalarDirectCaster<SrcType, std::string> {
|
||||
void Cast(const SrcType& in, std::string& out) const {
|
||||
CastToString<SrcType>(in, out);
|
||||
}
|
||||
// ORT float16 types don't support casting, so map them to Eigen ones
|
||||
|
||||
template <>
|
||||
struct EigenCastType<MLFloat16> {
|
||||
using type = Eigen::half;
|
||||
};
|
||||
|
||||
// scalar string -> X
|
||||
template <typename DstType>
|
||||
struct ScalarDirectCaster<std::string, DstType> {
|
||||
void Cast(const std::string& in, DstType& out) const {
|
||||
CastFromString<DstType>(in, out);
|
||||
}
|
||||
};
|
||||
|
||||
// helper for indirect cast types
|
||||
template <typename SrcType, typename DstType, typename IntermediateType>
|
||||
struct ScalarIndirectCaster {
|
||||
void Cast(const SrcType& in, DstType& out) const {
|
||||
IntermediateType intermediate;
|
||||
ScalarDirectCaster<SrcType, IntermediateType>{}.Cast(in, intermediate);
|
||||
ScalarDirectCaster<IntermediateType, DstType>{}.Cast(intermediate, out);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename SrcType, typename DstType, class Enable = void>
|
||||
struct ScalarCaster;
|
||||
|
||||
template <typename SrcType, typename DstType>
|
||||
struct ScalarCaster<
|
||||
SrcType, DstType,
|
||||
typename std::enable_if<AreAllDirectCastTypes<SrcType, DstType>::value>::type> {
|
||||
void Cast(const SrcType& in, DstType& out) const {
|
||||
ScalarDirectCaster<SrcType, DstType>{}.Cast(in, out);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename SrcType, typename DstType>
|
||||
struct ScalarCaster<
|
||||
SrcType, DstType,
|
||||
typename std::enable_if<!AreAllDirectCastTypes<SrcType, DstType>::value>::type> {
|
||||
void Cast(const SrcType& in, DstType& out) const {
|
||||
ScalarIndirectCaster<SrcType, DstType, float>{}.Cast(in, out);
|
||||
}
|
||||
template <>
|
||||
struct EigenCastType<BFloat16> {
|
||||
using type = Eigen::bfloat16;
|
||||
};
|
||||
|
||||
// generic tensor X -> Y
|
||||
template <typename SrcType, typename DstType>
|
||||
template <typename SrcType, typename DstType, typename Enable = void>
|
||||
struct TensorCaster {
|
||||
void Cast(const Tensor& in, Tensor& out, const TensorShape& shape) const {
|
||||
void Cast(const OpKernelContext&, const Tensor& in, Tensor& out, const TensorShape& shape) const {
|
||||
using SrcEigenCastType = typename EigenCastType<SrcType>::type;
|
||||
using DstEigenCastType = typename EigenCastType<DstType>::type;
|
||||
|
||||
const std::ptrdiff_t shape_size = gsl::narrow<std::ptrdiff_t>(shape.Size());
|
||||
const auto in_vector = ConstEigenVectorMap<SrcType>(in.Data<SrcType>(), shape_size);
|
||||
auto out_vector = EigenVectorMap<DstType>(out.MutableData<DstType>(), shape_size);
|
||||
out_vector = in_vector.unaryExpr([](const SrcType& in_scalar) {
|
||||
DstType out_scalar;
|
||||
ScalarCaster<SrcType, DstType>{}.Cast(in_scalar, out_scalar);
|
||||
return out_scalar;
|
||||
});
|
||||
const auto in_vector =
|
||||
ConstEigenVectorMap<SrcEigenCastType>(reinterpret_cast<const SrcEigenCastType*>(in.Data<SrcType>()), shape_size);
|
||||
auto out_vector =
|
||||
EigenVectorMap<DstEigenCastType>(reinterpret_cast<DstEigenCastType*>(out.MutableData<DstType>()), shape_size);
|
||||
out_vector = in_vector.template cast<DstEigenCastType>();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename SrcType, typename DstType>
|
||||
void CastStringTensor(const Tensor& in, Tensor& out, const TensorShape& shape) {
|
||||
static_assert(std::is_same<SrcType, std::string>::value || std::is_same<DstType, std::string>::value,
|
||||
"Either SrcType or DstType must be std::string.");
|
||||
const std::ptrdiff_t shape_size = gsl::narrow<std::ptrdiff_t>(shape.Size());
|
||||
const auto in_data = in.DataAsSpan<SrcType>();
|
||||
const auto out_data = out.MutableDataAsSpan<DstType>();
|
||||
for (std::ptrdiff_t i = 0; i < shape_size; ++i) {
|
||||
ScalarCaster<SrcType, DstType>{}.Cast(in_data[i], out_data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// tensor X -> string
|
||||
template <typename SrcType>
|
||||
struct TensorCaster<SrcType, std::string> {
|
||||
void Cast(const Tensor& in, Tensor& out, const TensorShape& shape) const {
|
||||
CastStringTensor<SrcType, std::string>(in, out, shape);
|
||||
void Cast(const OpKernelContext&, const Tensor& in, Tensor& out, const TensorShape& shape) const {
|
||||
const std::ptrdiff_t shape_size = gsl::narrow<std::ptrdiff_t>(shape.Size());
|
||||
const auto* in_data = in.Data<SrcType>();
|
||||
auto* out_data = out.MutableData<std::string>();
|
||||
for (std::ptrdiff_t i = 0; i < shape_size; ++i) {
|
||||
CastToString(in_data[i], out_data[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// tensor string -> X
|
||||
template <typename DstType>
|
||||
struct TensorCaster<std::string, DstType> {
|
||||
void Cast(const Tensor& in, Tensor& out, const TensorShape& shape) const {
|
||||
CastStringTensor<std::string, DstType>(in, out, shape);
|
||||
void Cast(const OpKernelContext&, const Tensor& in, Tensor& out, const TensorShape& shape) const {
|
||||
const std::ptrdiff_t shape_size = gsl::narrow<std::ptrdiff_t>(shape.Size());
|
||||
const auto* in_data = in.Data<std::string>();
|
||||
auto* out_data = out.MutableData<DstType>();
|
||||
for (std::ptrdiff_t i = 0; i < shape_size; ++i) {
|
||||
CastFromString(in_data[i], out_data[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(_M_AMD64)
|
||||
// specializations to use optimized and Windows x64-specific
|
||||
// MlasConvertHalfToFloatBuffer() routine for MLFloat16 -> float conversion
|
||||
|
||||
template <typename DstType>
|
||||
void CastMLFloat16ThroughFloat(
|
||||
const OpKernelContext& context, const Tensor& in, Tensor& out, const TensorShape& shape) {
|
||||
// use optimized MLFloat16 -> float, then float -> DstType
|
||||
AllocatorPtr allocator;
|
||||
ORT_THROW_IF_ERROR(context.GetTempSpaceAllocator(&allocator));
|
||||
auto intermediate_buffer = IAllocator::MakeUniquePtr<float>(allocator, gsl::narrow<size_t>(shape.Size()));
|
||||
Tensor intermediate_tensor{DataTypeImpl::GetType<float>(), shape, intermediate_buffer.get(), allocator->Info()};
|
||||
TensorCaster<MLFloat16, float>{}.Cast(context, in, intermediate_tensor, shape);
|
||||
TensorCaster<float, DstType>{}.Cast(context, intermediate_tensor, out, shape);
|
||||
}
|
||||
|
||||
// tensor MLFloat16 -> X
|
||||
template <typename DstType>
|
||||
struct TensorCaster<MLFloat16, DstType> {
|
||||
void Cast(const OpKernelContext& context, const Tensor& in, Tensor& out, const TensorShape& shape) const {
|
||||
CastMLFloat16ThroughFloat<DstType>(context, in, out, shape);
|
||||
}
|
||||
};
|
||||
|
||||
// tensor MLFloat16 -> float
|
||||
template <>
|
||||
struct TensorCaster<MLFloat16, float> {
|
||||
void Cast(const Tensor& in, Tensor& out, const TensorShape& shape) const {
|
||||
void Cast(const OpKernelContext&, const Tensor& in, Tensor& out, const TensorShape& shape) const {
|
||||
auto out_data = out.MutableData<float>();
|
||||
auto in_data = in.Data<MLFloat16>();
|
||||
const size_t shape_size = gsl::narrow<size_t>(shape.Size());
|
||||
MlasConvertHalfToFloatBuffer(&in_data[0].val, out_data, shape_size);
|
||||
}
|
||||
};
|
||||
|
||||
// tensor MLFloat16 -> string
|
||||
template <>
|
||||
struct TensorCaster<MLFloat16, std::string> {
|
||||
void Cast(const OpKernelContext& context, const Tensor& in, Tensor& out, const TensorShape& shape) const {
|
||||
CastMLFloat16ThroughFloat<std::string>(context, in, out, shape);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
class Cast final : public OpKernel {
|
||||
|
|
@ -246,17 +275,18 @@ class Cast final : public OpKernel {
|
|||
|
||||
template <typename TSrc, typename TDst>
|
||||
struct Dispatcher {
|
||||
void operator()(const Tensor& src, Tensor& dst, const TensorShape& shape) {
|
||||
TensorCaster<TSrc, TDst>{}.Cast(src, dst, shape);
|
||||
void operator()(const OpKernelContext& context, const Tensor& src, Tensor& dst, const TensorShape& shape) {
|
||||
TensorCaster<TSrc, TDst>{}.Cast(context, src, dst, shape);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename TSrc>
|
||||
struct SrcDispatcher {
|
||||
void operator()(int32_t to, const Tensor& src, Tensor& dst, const TensorShape& shape) {
|
||||
using DstTypes = mp_remove_if_q<EnabledDstTypes, mp_bind_front<std::is_same, TSrc>>;
|
||||
void operator()(
|
||||
int32_t to, const OpKernelContext& context, const Tensor& src, Tensor& dst, const TensorShape& shape) {
|
||||
using DstTypes = boost::mp11::mp_remove_if_q<EnabledDstTypes, boost::mp11::mp_bind_front<std::is_same, TSrc>>;
|
||||
utils::MLTypeCallDispatcherFromTypeList<DstTypes> dispatcher{to};
|
||||
dispatcher.template InvokeWithLeadingTemplateArgs<Dispatcher, TypeList<TSrc>>(src, dst, shape);
|
||||
dispatcher.template InvokeWithLeadingTemplateArgs<Dispatcher, TypeList<TSrc>>(context, src, dst, shape);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -278,7 +308,7 @@ Status Cast::Compute(OpKernelContext* context) const {
|
|||
}
|
||||
|
||||
utils::MLTypeCallDispatcherFromTypeList<EnabledSrcTypes> dispatcher{from};
|
||||
dispatcher.Invoke<SrcDispatcher>(to_, *X, *Y, shape);
|
||||
dispatcher.Invoke<SrcDispatcher>(to_, *context, *X, *Y, shape);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -273,20 +273,25 @@ struct EnabledTypes {
|
|||
*
|
||||
* In MyProvider provider's implementation of MyOp kernel:
|
||||
*
|
||||
* namespace onnxruntime {
|
||||
* namespace op_kernel_type_control {
|
||||
* // specify supported types, i.e., the full set of types that can be enabled
|
||||
* ORT_SPECIFY_OP_KERNEL_ARG_SUPPORTED_TYPES(
|
||||
* MyProvider, DomainContainingMyOp, MyOp, Input, 0,
|
||||
* int, float, double);
|
||||
* } // namespace op_kernel_type_control
|
||||
* } // namespace onnxruntime
|
||||
*
|
||||
* // ...
|
||||
*
|
||||
* // get enabled types
|
||||
* using MyOpFirstInputEnabledTypes =
|
||||
* ORT_OP_KERNEL_ARG_ENABLED_TYPE_LIST(MyProvider, DomainContainingMyOp, MyOp, Input, 0)
|
||||
* ORT_OP_KERNEL_ARG_ENABLED_TYPE_LIST(MyProvider, DomainContainingMyOp, MyOp, Input, 0);
|
||||
*
|
||||
* ...
|
||||
* // ...
|
||||
*
|
||||
* // in the implementation, we can dispatch to the enabled types
|
||||
* utils::MLTypeCallDispatcherFromTypeList<MyOpFirstInputEnabledTypes> dispatcher{firstInputRuntimeType};
|
||||
* ...
|
||||
* // use MLTypeCallDispatcher to dispatch to implementations for enabled types
|
||||
* using Dispatcher = onnxruntime::utils::MLTypeCallDispatcherFromTypeList<MyOpFirstInputEnabledTypes>;
|
||||
*/
|
||||
|
||||
// all allowed type specifications should be contained in the following file
|
||||
|
|
|
|||
188
onnxruntime/test/providers/cpu/tensor/cast_op_test.cc
Normal file
188
onnxruntime/test/providers/cpu/tensor/cast_op_test.cc
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "boost/mp11.hpp"
|
||||
|
||||
#include "gsl/gsl"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "core/framework/data_types_internal.h"
|
||||
|
||||
#include "test/common/cuda_op_test_utils.h"
|
||||
#include "test/providers/provider_test_utils.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
template <typename T>
|
||||
int GetMinRequiredCudaComputeCapability() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
int GetMinRequiredCudaComputeCapability<MLFloat16>() {
|
||||
return 530;
|
||||
}
|
||||
|
||||
template <>
|
||||
int GetMinRequiredCudaComputeCapability<BFloat16>() {
|
||||
return 800;
|
||||
}
|
||||
|
||||
template <typename SrcType,
|
||||
typename DstType>
|
||||
void TestCastOp(gsl::span<const SrcType> input,
|
||||
gsl::span<const DstType> output,
|
||||
const std::vector<int64_t>& dimensions,
|
||||
OpTester::ExpectResult expect_result = OpTester::ExpectResult::kExpectSuccess,
|
||||
const std::string& expected_failure_string = "") {
|
||||
OpTester test("Cast", 13);
|
||||
test.AddAttribute<int64_t>("to", utils::ToTensorProtoElementType<DstType>());
|
||||
test.AddInput<SrcType>("input", dimensions, input.data(), input.size());
|
||||
test.AddOutput<DstType>("output", dimensions, output.data(), output.size());
|
||||
|
||||
std::unordered_set<std::string> excluded_provider_types{kTensorrtExecutionProvider};
|
||||
const auto min_required_cuda_compute_capability =
|
||||
std::max(GetMinRequiredCudaComputeCapability<SrcType>(), GetMinRequiredCudaComputeCapability<DstType>());
|
||||
if (!HasCudaEnvironment(min_required_cuda_compute_capability)) {
|
||||
excluded_provider_types.insert(kCudaExecutionProvider);
|
||||
}
|
||||
|
||||
test.Run(expect_result, expected_failure_string, excluded_provider_types);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using RequiresCastThroughFloat =
|
||||
boost::mp11::mp_any<
|
||||
std::is_same<T, MLFloat16>,
|
||||
std::is_same<T, BFloat16>>;
|
||||
|
||||
template <typename... T>
|
||||
using AnyRequireCastThroughFloat = boost::mp11::mp_any<RequiresCastThroughFloat<T>...>;
|
||||
|
||||
template <typename SrcType, typename DstType>
|
||||
typename std::enable_if<AnyRequireCastThroughFloat<SrcType, DstType>::value>::type
|
||||
CastSpan(gsl::span<const SrcType> src, gsl::span<DstType> dst) {
|
||||
std::transform(
|
||||
src.begin(), src.end(), dst.begin(),
|
||||
[](SrcType s) {
|
||||
return static_cast<DstType>(static_cast<float>(s));
|
||||
});
|
||||
}
|
||||
|
||||
template <typename SrcType, typename DstType>
|
||||
typename std::enable_if<!AnyRequireCastThroughFloat<SrcType, DstType>::value>::type
|
||||
CastSpan(gsl::span<const SrcType> src, gsl::span<DstType> dst) {
|
||||
std::transform(
|
||||
src.begin(), src.end(), dst.begin(),
|
||||
[](SrcType s) {
|
||||
return static_cast<DstType>(s);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename SrcType, typename DstType>
|
||||
std::vector<DstType> CastedValues(gsl::span<const SrcType> src) {
|
||||
std::vector<DstType> result(src.size());
|
||||
CastSpan<SrcType, DstType>(src, gsl::make_span(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
struct CastNonStringTester {
|
||||
template <typename SrcType, typename DstType>
|
||||
void operator()(const std::pair<SrcType, DstType>&) {
|
||||
SCOPED_TRACE(
|
||||
onnxruntime::MakeString(
|
||||
"Cast from type ", utils::ToTensorProtoElementType<SrcType>(),
|
||||
" to type ", utils::ToTensorProtoElementType<DstType>()));
|
||||
|
||||
const std::vector<int> input_int_values{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
const TensorShape shape{2, 3, 2, 2};
|
||||
const size_t size = gsl::narrow<size_t>(shape.Size());
|
||||
ASSERT_EQ(input_int_values.size(), size);
|
||||
|
||||
auto input_buffer = onnxruntime::make_unique<SrcType[]>(size);
|
||||
auto input_span = gsl::make_span<SrcType>(input_buffer.get(), size);
|
||||
CastSpan<int, SrcType>(gsl::make_span(input_int_values), input_span);
|
||||
|
||||
auto output_buffer = onnxruntime::make_unique<DstType[]>(size);
|
||||
auto output_span = gsl::make_span<DstType>(output_buffer.get(), size);
|
||||
CastSpan<SrcType, DstType>(input_span, output_span);
|
||||
|
||||
TestCastOp<SrcType, DstType>(input_span, output_span, shape.GetDims());
|
||||
}
|
||||
};
|
||||
|
||||
using CastNonStringTypes =
|
||||
boost::mp11::mp_list<
|
||||
bool,
|
||||
float, double,
|
||||
uint8_t, uint16_t, uint32_t, uint64_t,
|
||||
int8_t, int16_t, int32_t, int64_t,
|
||||
MLFloat16, BFloat16>;
|
||||
|
||||
TEST(CastOpTest, NonStringTypes) {
|
||||
boost::mp11::mp_for_each<boost::mp11::mp_product<std::pair, CastNonStringTypes, CastNonStringTypes>>(
|
||||
CastNonStringTester{});
|
||||
}
|
||||
|
||||
TEST(CastOpTest, FromString) {
|
||||
const std::vector<int64_t> shape{2, 2, 2};
|
||||
const std::vector<std::string> string_data = {"-inf", "+INF", "0.9767611", "0.28280696",
|
||||
"-0.12019656", "5.0", "NaN", "nan"};
|
||||
const std::vector<float> float_output = {-(std::numeric_limits<float>::infinity()), std::numeric_limits<float>::infinity(),
|
||||
0.9767611f, 0.28280696f,
|
||||
-0.12019656f, 5.0f, NAN, NAN};
|
||||
TestCastOp(gsl::make_span(string_data), gsl::make_span(float_output), shape);
|
||||
|
||||
const std::vector<std::string> float16_string_data = {"-inf", "+INF", "0.5", "0.25",
|
||||
"0.0", "-1.0", "-1.5", "NaN"};
|
||||
const std::vector<MLFloat16> float16_output =
|
||||
CastedValues<float, MLFloat16>(
|
||||
gsl::make_span(
|
||||
std::vector<float>{
|
||||
-std::numeric_limits<float>::infinity(), std::numeric_limits<float>::infinity(), 0.5f, 0.25f,
|
||||
0.0f, -1.0f, -1.5f, NAN}));
|
||||
TestCastOp(gsl::make_span(float16_string_data), gsl::make_span(float16_output), shape);
|
||||
|
||||
const std::vector<std::string> int_16_string_data = {"0", "1", "2", "3", "4", "5", "-32768", "32767"};
|
||||
const std::vector<int16_t> int_16_output = {0, 1, 2, 3, 4, 5, SHRT_MIN, SHRT_MAX};
|
||||
TestCastOp(gsl::make_span(int_16_string_data), gsl::make_span(int_16_output), shape);
|
||||
|
||||
const std::vector<std::string> int_64_string_data = {"0", "1", "2", "3", "4", "5", "-9223372036854775808", "9223372036854775807"};
|
||||
const std::vector<int64_t> int_64_output = {0, 1, 2, 3, 4, 5, LLONG_MIN, LLONG_MAX};
|
||||
TestCastOp(gsl::make_span(int_64_string_data), gsl::make_span(int_64_output), shape);
|
||||
}
|
||||
|
||||
TEST(CastOpTest, ToString) {
|
||||
const std::vector<int64_t> shape{2, 2, 2};
|
||||
const std::vector<float> float_input = {NAN, -1.f, 0.0391877927f, 0.296140194f, -0.120196559f, 5.0f,
|
||||
-std::numeric_limits<float>::infinity(),
|
||||
std::numeric_limits<float>::infinity()};
|
||||
|
||||
// float output precision is 8, so the expected output differs slightly from the input due to that
|
||||
const std::vector<std::string> string_output = {"NaN", "-1", "0.039187793", "0.29614019",
|
||||
"-0.12019656", "5", "-INF", "INF"};
|
||||
TestCastOp(gsl::make_span(float_input), gsl::make_span(string_output), shape);
|
||||
|
||||
const std::vector<MLFloat16> float16_input =
|
||||
CastedValues<float, MLFloat16>(
|
||||
gsl::make_span(
|
||||
std::vector<float>{
|
||||
-std::numeric_limits<float>::infinity(), std::numeric_limits<float>::infinity(), 0.5f, 0.25f,
|
||||
0.0f, -1.0f, -1.5f, NAN}));
|
||||
const std::vector<std::string> float16_string_output = {"-INF", "INF", "0.5", "0.25",
|
||||
"0", "-1", "-1.5", "NaN"};
|
||||
TestCastOp(gsl::make_span(float16_input), gsl::make_span(float16_string_output), shape);
|
||||
|
||||
const std::vector<std::string> int_string_data = {"0", "1", "2", "3", "4", "5", "6", "7"};
|
||||
const std::vector<int16_t> int_16_input = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
TestCastOp(gsl::make_span(int_16_input), gsl::make_span(int_string_data), shape);
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -84,245 +84,6 @@ TEST(TensorOpTest, ShapeTest3D) {
|
|||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: volume of dimensions is not consistent with weights size
|
||||
}
|
||||
|
||||
template <typename SrcType,
|
||||
typename DstType>
|
||||
void TestCastOp(const std::initializer_list<SrcType>& input,
|
||||
const std::initializer_list<DstType>& output,
|
||||
const std::vector<int64_t>& dimensions,
|
||||
int64_t toType,
|
||||
ExpectResult expect_result = ExpectResult::kExpectSuccess,
|
||||
const std::string& expected_failure_string = "") {
|
||||
OpTester test("Cast", 9);
|
||||
test.AddAttribute("to", toType);
|
||||
test.AddInput<SrcType>("input", dimensions, input);
|
||||
test.AddOutput<DstType>("output", dimensions, output);
|
||||
test.Run(expect_result, expected_failure_string, {kTensorrtExecutionProvider});
|
||||
}
|
||||
|
||||
template <typename SrcType>
|
||||
void TestCastFromSrc() {
|
||||
std::initializer_list<SrcType> input_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
const std::vector<int64_t> shape{3, 2, 2};
|
||||
|
||||
auto float_output = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f};
|
||||
TestCastOp(input_data, float_output, shape, TensorProto::FLOAT);
|
||||
|
||||
auto double_output = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0};
|
||||
TestCastOp(input_data, double_output, shape, TensorProto::DOUBLE);
|
||||
|
||||
auto bool_output = {false, true, true, true, true, true, true, true, true, true, true, true};
|
||||
TestCastOp(input_data, bool_output, shape, TensorProto::BOOL);
|
||||
|
||||
const std::initializer_list<uint8_t> uint8_t_output{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input_data, uint8_t_output, shape, TensorProto::UINT8);
|
||||
|
||||
const std::initializer_list<uint16_t> uint16_t_output{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input_data, uint16_t_output, shape, TensorProto::UINT16);
|
||||
|
||||
const std::initializer_list<uint32_t> uint32_t_output{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input_data, uint32_t_output, shape, TensorProto::UINT32);
|
||||
|
||||
const std::initializer_list<uint64_t> uint64_t_output{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input_data, uint64_t_output, shape, TensorProto::UINT64);
|
||||
|
||||
const std::initializer_list<int16_t> int16_t_output{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input_data, int16_t_output, shape, TensorProto::INT16);
|
||||
|
||||
const std::initializer_list<int32_t> int32_t_output{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input_data, int32_t_output, shape, TensorProto::INT32);
|
||||
|
||||
const std::initializer_list<int64_t> int64_t_output{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input_data, int64_t_output, shape, TensorProto::INT64);
|
||||
};
|
||||
|
||||
TEST(TensorOpTest, Cast) {
|
||||
TestCastFromSrc<float>();
|
||||
TestCastFromSrc<double>();
|
||||
TestCastFromSrc<uint8_t>();
|
||||
TestCastFromSrc<uint16_t>();
|
||||
TestCastFromSrc<uint32_t>();
|
||||
TestCastFromSrc<uint64_t>();
|
||||
TestCastFromSrc<int8_t>();
|
||||
TestCastFromSrc<int16_t>();
|
||||
TestCastFromSrc<int32_t>();
|
||||
TestCastFromSrc<int64_t>();
|
||||
}
|
||||
|
||||
TEST(TensorOpTest, CastFromBool) {
|
||||
auto bool_data = {false, true, true, true, true, true, true, true, true, true, false, true};
|
||||
const std::vector<int64_t> shape{3, 2, 2};
|
||||
|
||||
const std::initializer_list<float> float_output = {0.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f};
|
||||
TestCastOp(bool_data, float_output, shape, TensorProto::FLOAT);
|
||||
|
||||
const std::initializer_list<double> double_output = {0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0};
|
||||
TestCastOp(bool_data, double_output, shape, TensorProto::DOUBLE);
|
||||
|
||||
auto bool_output = {false, true, true, true, true, true, true, true, true, true, false, true};
|
||||
TestCastOp(bool_data, bool_output, shape, TensorProto::BOOL);
|
||||
|
||||
const std::initializer_list<uint8_t> uint8_t_output{0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1};
|
||||
TestCastOp(bool_data, uint8_t_output, shape, TensorProto::UINT8);
|
||||
|
||||
const std::initializer_list<uint16_t> uint16_t_output{0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1};
|
||||
TestCastOp(bool_data, uint16_t_output, shape, TensorProto::UINT16);
|
||||
|
||||
const std::initializer_list<uint32_t> uint32_t_output{0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1};
|
||||
TestCastOp(bool_data, uint32_t_output, shape, TensorProto::UINT32);
|
||||
|
||||
const std::initializer_list<uint64_t> uint64_t_output{0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1};
|
||||
TestCastOp(bool_data, uint64_t_output, shape, TensorProto::UINT64);
|
||||
|
||||
const std::initializer_list<int16_t> int16_t_output{0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1};
|
||||
TestCastOp(bool_data, int16_t_output, shape, TensorProto::INT16);
|
||||
|
||||
const std::initializer_list<int32_t> int32_t_output{0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1};
|
||||
TestCastOp(bool_data, int32_t_output, shape, TensorProto::INT32);
|
||||
|
||||
const std::initializer_list<int64_t> int64_t_output{0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1};
|
||||
TestCastOp(bool_data, int64_t_output, shape, TensorProto::INT64);
|
||||
|
||||
const std::initializer_list<MLFloat16> float16_output{
|
||||
MLFloat16(math::floatToHalf(0.0f)),
|
||||
MLFloat16(math::floatToHalf(1.0f)),
|
||||
MLFloat16(math::floatToHalf(1.0f)),
|
||||
MLFloat16(math::floatToHalf(1.0f)),
|
||||
MLFloat16(math::floatToHalf(1.0f)),
|
||||
MLFloat16(math::floatToHalf(1.0f)),
|
||||
MLFloat16(math::floatToHalf(1.0f)),
|
||||
MLFloat16(math::floatToHalf(1.0f)),
|
||||
MLFloat16(math::floatToHalf(1.0f)),
|
||||
MLFloat16(math::floatToHalf(1.0f)),
|
||||
MLFloat16(math::floatToHalf(0.0f)),
|
||||
MLFloat16(math::floatToHalf(1.0f))};
|
||||
TestCastOp(bool_data, float16_output, shape, TensorProto::FLOAT16);
|
||||
}
|
||||
|
||||
TEST(TensorOpTest, CastToFloat16) {
|
||||
const std::vector<int64_t> shape{3, 2, 2};
|
||||
std::initializer_list<float> float_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f};
|
||||
const std::initializer_list<MLFloat16> float16_output{
|
||||
MLFloat16(math::floatToHalf(0.0f)),
|
||||
MLFloat16(math::floatToHalf(1.0f)),
|
||||
MLFloat16(math::floatToHalf(2.0f)),
|
||||
MLFloat16(math::floatToHalf(3.0f)),
|
||||
MLFloat16(math::floatToHalf(4.0f)),
|
||||
MLFloat16(math::floatToHalf(5.0f)),
|
||||
MLFloat16(math::floatToHalf(6.0f)),
|
||||
MLFloat16(math::floatToHalf(7.0f)),
|
||||
MLFloat16(math::floatToHalf(8.0f)),
|
||||
MLFloat16(math::floatToHalf(9.0f)),
|
||||
MLFloat16(math::floatToHalf(10.0f)),
|
||||
MLFloat16(math::floatToHalf(11.0f))};
|
||||
|
||||
TestCastOp(float_data, float16_output, shape, TensorProto::FLOAT16);
|
||||
|
||||
std::initializer_list<uint8_t> uint8_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(uint8_t_data, float16_output, shape, TensorProto::FLOAT16);
|
||||
|
||||
std::initializer_list<uint16_t> uint16_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(uint16_t_data, float16_output, shape, TensorProto::FLOAT16);
|
||||
|
||||
std::initializer_list<uint32_t> uint32_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(uint32_t_data, float16_output, shape, TensorProto::FLOAT16);
|
||||
|
||||
std::initializer_list<uint64_t> uint64_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(uint64_t_data, float16_output, shape, TensorProto::FLOAT16);
|
||||
|
||||
std::initializer_list<int8_t> int8_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(int8_t_data, float16_output, shape, TensorProto::FLOAT16);
|
||||
|
||||
std::initializer_list<int16_t> int16_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(int16_t_data, float16_output, shape, TensorProto::FLOAT16);
|
||||
|
||||
std::initializer_list<int32_t> int32_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(int32_t_data, float16_output, shape, TensorProto::FLOAT16);
|
||||
|
||||
std::initializer_list<int64_t> int64_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(int64_t_data, float16_output, shape, TensorProto::FLOAT16);
|
||||
}
|
||||
|
||||
TEST(TensorOpTest, CastFromFloat16) {
|
||||
const std::vector<int64_t> shape{3, 2, 2};
|
||||
const std::initializer_list<float> float_output = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f};
|
||||
const std::initializer_list<MLFloat16> input = {
|
||||
MLFloat16(math::floatToHalf(0.0f)),
|
||||
MLFloat16(math::floatToHalf(1.0f)),
|
||||
MLFloat16(math::floatToHalf(2.0f)),
|
||||
MLFloat16(math::floatToHalf(3.0f)),
|
||||
MLFloat16(math::floatToHalf(4.0f)),
|
||||
MLFloat16(math::floatToHalf(5.0f)),
|
||||
MLFloat16(math::floatToHalf(6.0f)),
|
||||
MLFloat16(math::floatToHalf(7.0f)),
|
||||
MLFloat16(math::floatToHalf(8.0f)),
|
||||
MLFloat16(math::floatToHalf(9.0f)),
|
||||
MLFloat16(math::floatToHalf(10.0f)),
|
||||
MLFloat16(math::floatToHalf(11.0f))};
|
||||
|
||||
TestCastOp(input, float_output, shape, TensorProto::FLOAT);
|
||||
|
||||
auto bool_data = {false, true, true, true, true, true, true, true, true, true, true, true};
|
||||
TestCastOp(input, bool_data, shape, TensorProto::BOOL);
|
||||
|
||||
std::initializer_list<uint8_t> uint8_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input, uint8_t_data, shape, TensorProto::UINT8);
|
||||
|
||||
std::initializer_list<uint16_t> uint16_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input, uint16_t_data, shape, TensorProto::UINT16);
|
||||
|
||||
std::initializer_list<uint32_t> uint32_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input, uint32_t_data, shape, TensorProto::UINT32);
|
||||
|
||||
std::initializer_list<uint64_t> uint64_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input, uint64_t_data, shape, TensorProto::UINT64);
|
||||
|
||||
std::initializer_list<int8_t> int8_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input, int8_t_data, shape, TensorProto::INT8);
|
||||
|
||||
std::initializer_list<int16_t> int16_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input, int16_t_data, shape, TensorProto::INT16);
|
||||
|
||||
std::initializer_list<int32_t> int32_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input, int32_t_data, shape, TensorProto::INT32);
|
||||
|
||||
std::initializer_list<int64_t> int64_t_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
TestCastOp(input, int64_t_data, shape, TensorProto::INT64);
|
||||
}
|
||||
|
||||
TEST(TensorOpTest, CastFromString) {
|
||||
const std::vector<int64_t> shape{2, 2, 2};
|
||||
std::initializer_list<std::string> string_data = {"-inf", "+INF", "0.9767611", "0.28280696",
|
||||
"-0.12019656", "5.0", "NaN", "nan"};
|
||||
const std::initializer_list<float> float_output = {-(std::numeric_limits<float>::infinity()), std::numeric_limits<float>::infinity(),
|
||||
0.9767611f, 0.28280696f,
|
||||
-0.12019656f, 5.0f, NAN, NAN};
|
||||
TestCastOp(string_data, float_output, shape, TensorProto::FLOAT);
|
||||
|
||||
std::initializer_list<std::string> int_16_string_data = {"0", "1", "2", "3", "4", "5", "-32768", "32767"};
|
||||
const std::initializer_list<int16_t> int_16_output = {0, 1, 2, 3, 4, 5, SHRT_MIN, SHRT_MAX};
|
||||
TestCastOp(int_16_string_data, int_16_output, shape, TensorProto::INT16);
|
||||
|
||||
std::initializer_list<std::string> int_64_string_data = {"0", "1", "2", "3", "4", "5", "-9223372036854775808", "9223372036854775807"};
|
||||
const std::initializer_list<int64_t> int_64_output = {0, 1, 2, 3, 4, 5, LLONG_MIN, LLONG_MAX};
|
||||
TestCastOp(int_64_string_data, int_64_output, shape, TensorProto::INT64);
|
||||
}
|
||||
|
||||
TEST(TensorOpTest, CastToString) {
|
||||
const std::vector<int64_t> shape{2, 2, 2};
|
||||
const std::initializer_list<float> float_input = {NAN, -1.f, 0.0391877927f, 0.296140194f, -0.120196559f, 5.0f,
|
||||
-std::numeric_limits<float>::infinity(),
|
||||
std::numeric_limits<float>::infinity()};
|
||||
|
||||
// float output precision is 8, so the expected output differs slightly from the input due to that
|
||||
std::initializer_list<std::string> string_output = {"NaN", "-1", "0.039187793", "0.29614019",
|
||||
"-0.12019656", "5", "-INF", "INF"};
|
||||
TestCastOp(float_input, string_output, shape, TensorProto::STRING);
|
||||
|
||||
std::initializer_list<std::string> int_string_data = {"0", "1", "2", "3", "4", "5", "6", "7"};
|
||||
const std::initializer_list<int16_t> int_16_input = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
TestCastOp(int_16_input, int_string_data, shape, TensorProto::STRING);
|
||||
}
|
||||
|
||||
void MeanVarianceNormalizationFunctionDefaultPerChannel() {
|
||||
const int64_t N = 2, C = 2, H = 2, W = 3;
|
||||
|
||||
|
|
|
|||
|
|
@ -251,9 +251,11 @@ void Check<MLFloat16>(const OpTester::Data& expected_data,
|
|||
threshold = 0.005f;
|
||||
#endif
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (std::isinf(f_expected[i])) // Test infinity for equality
|
||||
EXPECT_EQ(f_expected[i], f_output[i]) << "i:" << i;
|
||||
else {
|
||||
if (std::isnan(f_expected[i])) {
|
||||
EXPECT_TRUE(std::isnan(f_expected[i])) << "Expected NaN. i:" << i << ", provider_type: " << provider_type;
|
||||
} else if (std::isinf(f_expected[i])) { // Test infinity for equality
|
||||
EXPECT_EQ(f_expected[i], f_output[i]) << "Expected infinity. i:" << i << ", provider_type: " << provider_type;
|
||||
} else {
|
||||
// the default for existing tests
|
||||
EXPECT_NEAR(f_expected[i], f_output[i], threshold)
|
||||
<< "i:" << i << ", provider_type: " << provider_type;
|
||||
|
|
@ -284,9 +286,11 @@ void Check<BFloat16>(const OpTester::Data& expected_data,
|
|||
/// XXX: May need to adjust threshold as BFloat is coarse
|
||||
float threshold = 0.001f;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (std::isinf(f_expected[i])) // Test infinity for equality
|
||||
EXPECT_EQ(f_expected[i], f_output[i]);
|
||||
else {
|
||||
if (std::isnan(f_expected[i])) {
|
||||
EXPECT_TRUE(std::isnan(f_expected[i])) << "Expected NaN. i:" << i << ", provider_type: " << provider_type;
|
||||
} else if (std::isinf(f_expected[i])) { // Test infinity for equality
|
||||
EXPECT_EQ(f_expected[i], f_output[i]) << "Expected infinity. i:" << i << ", provider_type: " << provider_type;
|
||||
} else {
|
||||
// the default for existing tests
|
||||
const float max_value = fmax(fabs(f_expected[i]), fabs(f_output[i]));
|
||||
if (max_value != 0) { // max_value = 0 means output and expected are 0s.
|
||||
|
|
|
|||
Loading…
Reference in a new issue