onnxruntime/onnxruntime/test/providers/cpu/tensor/quantize_linear_test.cc
Linnea May 24b74aebcb
[DML] Register DML operators for opset 19 (#16939)
### Description
<!-- Describe your changes. -->
Register DML operators for opset 19. 
- Cast19
- Castlike19
- Constant19 
- Equal19
- Identity19
- QuantizeLinear19
- DequantizeLinear19
- Reshape19
- Shape19
- Size


### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->

---------

Co-authored-by: linnealovespie <linneamay@microsoft.com>
2024-01-22 15:37:09 -08:00

575 lines
23 KiB
C++

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "gtest/gtest.h"
#include "test/common/cuda_op_test_utils.h"
#include "test/providers/provider_test_utils.h"
#include "test/util/include/default_providers.h"
namespace onnxruntime {
namespace test {
// scalar zero & scale with uint8
TEST(DequantizeLinearOpTest, Uint8) {
OpTester test("DequantizeLinear", 10);
std::vector<int64_t> dims{4};
test.AddInput<uint8_t>("x", dims, {0, 3, 128, 255});
test.AddInput<float>("x_scale", {}, {2.0f});
test.AddInput<uint8_t>("x_zero_point", {}, {128});
test.AddOutput<float>("y", dims, {-256.0f, -250.0f, 0.0f, 254.0f});
// Disable Tensorrt EP due to error:node1_quantize_scale_node: out of bounds channel axis 1. Number of input dimensions is 1.
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
// scalar zero & scale with int8
TEST(DequantizeLinearOpTest, Int8) {
OpTester test("DequantizeLinear", 10);
std::vector<int64_t> dims{4};
test.AddInput<int8_t>("x", dims, {-30, -3, 100, 127});
test.AddInput<float>("x_scale", {}, {2.0f});
test.AddInput<int8_t>("x_zero_point", {}, {-10});
test.AddOutput<float>("y", dims, {-40.0f, 14.0f, 220.0f, 274.0f});
// Disable Tensorrt EP due to error:node1_quantize_scale_node: out of bounds channel axis 1. Number of input dimensions is 1.
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
// scalar zero & scale with int8
TEST(DequantizeLinearOpTest, Int32) {
OpTester test("DequantizeLinear", 10);
std::vector<int64_t> dims{4};
test.AddInput<int32_t>("x", dims, {-30, -3, 100, 127});
test.AddInput<float>("x_scale", {}, {2.0f});
test.AddOutput<float>("y", dims, {-60.f, -6.f, 200.f, 254.f});
test.Run();
}
TEST(DequantizeLinearOpTest_BroadcastTensor, Int32) {
OpTester test("DequantizeLinear", 13);
test.AddInput<int32_t>("x", {4}, {-30, -3, 100, 127});
test.AddAttribute<int64_t>("axis", 0);
test.AddInput<float>("x_scale", {1}, {2.0f});
test.AddInput<int32_t>("x_zero_point", {1}, {0});
test.AddOutput<float>("y", {4}, {-60.f, -6.f, 200.f, 254.f});
test.Run();
}
// 2d inputs
TEST(DequantizeLinearOpTest, 2D) {
OpTester test("DequantizeLinear", 10);
std::vector<int64_t> dims{3, 4};
test.AddInput<uint8_t>("X", dims,
{0, 1, 2, 3,
0, 1, 2, 3,
0, 10, 20, 30});
test.AddInput<float>("scale", {}, {1.0f});
test.AddInput<uint8_t>("zero_point", {}, {0});
test.AddOutput<float>("Y", dims,
{0, 1, 2, 3,
0, 1, 2, 3,
0, 10, 20, 30});
test.Run();
}
// dequantize with scalar data
TEST(DequantizeLinearOpTest, Scalar) {
OpTester test("DequantizeLinear", 10);
test.AddInput<int8_t>("x", {}, {100});
test.AddInput<float>("x_scale", {}, {2.0f});
test.AddInput<int8_t>("x_zero_point", {}, {-10});
test.AddOutput<float>("y", {}, {220.0f});
// Disable Tensorrt EP due to error:node1_quantize_scale_node: out of bounds channel axis 1. Number of input dimensions is 0.
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
// dequantize with scalar data
TEST(DequantizeLinearOpMLFloat16Test, Scalar) {
OpTester test("DequantizeLinear", 19);
test.AddInput<int8_t>("x", {}, {100});
test.AddInput<MLFloat16>("x_scale", {}, {MLFloat16(2.0f)});
test.AddInput<int8_t>("x_zero_point", {}, {-10});
test.AddOutput<MLFloat16>("y", {}, {MLFloat16(220.0f)});
// Disable Tensorrt EP due to error:node1_quantize_scale_node: out of bounds channel axis 1. Number of input dimensions is 0.
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
// dequantize without zero point
TEST(DequantizeLinearOpTest, Without_Zero_Point) {
OpTester test("DequantizeLinear", 10);
test.AddInput<int8_t>("x", {}, {100});
test.AddInput<float>("x_scale", {}, {2.0f});
test.AddOutput<float>("y", {}, {200.0f});
test.Run();
}
// 1d zero & scale with default axis
TEST(DequantizeLinearOpTest, Per_Channel_Axis_Default) {
OpTester test("DequantizeLinear", 13);
std::vector<int64_t> dims{2, 3, 2, 4};
test.AddInput<int8_t>("X", dims,
{7, 9, 10, 10,
5, 8, 9, 1,
8, 6, 7, 9,
10, 0, 7, 10,
8, 2, 6, 0,
5, 9, 8, 1,
2, 7, 5, 3,
2, 4, 1, 3,
8, 7, 4, 8,
10, 1, 5, 5,
7, 7, 0, 2,
4, 4, 0, 5});
test.AddInput<float>("scale", {3}, {1, 10, 7});
test.AddInput<int8_t>("zero_point", {3}, {10, 2, 1});
test.AddOutput<float>("Y", dims,
{-3, -1, 0, 0,
-5, -2, -1, -9,
60, 40, 50, 70,
80, -20, 50, 80,
49, 7, 35, -7,
28, 56, 49, 0,
-8, -3, -5, -7,
-8, -6, -9, -7,
60, 50, 20, 60,
80, -10, 30, 30,
42, 42, -7, 7,
21, 21, -7, 28});
// Disable Tensorrt EP due to the non-zero zero_point.
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
// 1d zero & scale with uint8 broadcast axis 0
TEST(DequantizeLinearOpTest, Per_Channel_Axis_0) {
OpTester test("DequantizeLinear", 13);
std::vector<int64_t> dims{3, 4};
test.AddInput<uint8_t>("X", dims,
{0, 1, 2, 3,
0, 1, 2, 3,
0, 10, 20, 30});
test.AddAttribute<int64_t>("axis", 0);
test.AddInput<float>("scale", {3},
{1.0f,
2.0f,
4.0f});
test.AddInput<uint8_t>("zero_point", {3},
{0,
0,
0});
test.AddOutput<float>("Y", dims,
{0, 1, 2, 3,
0, 2, 4, 6,
0, 40, 80, 120});
test.Run();
}
// 1d zero & scale with int8 broadcast axis 1
TEST(DequantizeLinearOpTest, Per_Channel_Axis_1_int8) {
OpTester test("DequantizeLinear", 13);
std::vector<int64_t> dims{3, 4};
test.AddInput<int8_t>("X", dims,
{0, 1, 2, 3,
0, 2, 4, 6,
0, 10, 20, 30});
test.AddAttribute<int64_t>("axis", 1);
test.AddInput<float>("scale", {4}, {1, 2, 4, 8});
test.AddInput<int8_t>("zero_point", {4}, {0, -10, -20, -30});
test.AddOutput<float>("Y", dims,
{0, 22, 88, 264,
0, 24, 96, 288,
0, 40, 160, 480});
// Disable Tensorrt EP due to the non-zero zero_point.
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
// 1d zero & scale with int32 broadcast axis 1
TEST(DequantizeLinearOpTest, Per_Channel_Axis_1_int32) {
OpTester test("DequantizeLinear", 13);
std::vector<int64_t> dims{3, 4};
test.AddInput<int32_t>("X", dims,
{0, 1, 2, 3,
0, 2, 4, 6,
0, 10, 20, 30});
test.AddAttribute<int64_t>("axis", 1);
test.AddInput<float>("scale", {4}, {1, 2, 4, 8});
test.AddInput<int32_t>("zero_point", {4}, {0, 0, 0, 0});
test.AddOutput<float>("Y", dims,
{0, 2, 8, 24,
0, 4, 16, 48,
0, 20, 80, 240});
// Disable Tensorrt EP due to error, only activation types allowed as input to this layer.
// Disable CUDA, ROCm EP, there is no implementation for int32_t.
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kCudaExecutionProvider, kRocmExecutionProvider});
}
// 1d zero & scale with uint8 broadcast axis -2 (-2 resolves to axis 0)
TEST(DequantizeLinearOpTest, Per_Channel_Neg_2) {
OpTester test("DequantizeLinear", 13);
std::vector<int64_t> dims{3, 4};
test.AddInput<uint8_t>("X", dims,
{0, 1, 2, 3,
0, 1, 2, 3,
0, 10, 20, 30});
test.AddAttribute<int64_t>("axis", -2);
test.AddInput<float>("scale", {3},
{1.0f,
2.0f,
4.0f});
test.AddInput<uint8_t>("zero_point", {3},
{0,
0,
0});
test.AddOutput<float>("Y", dims,
{0, 1, 2, 3,
0, 2, 4, 6,
0, 40, 80, 120});
test.Run();
}
// quantize with scalar zero point and scale
TEST(QuantizeLinearOpTest, Uint8) {
OpTester test("QuantizeLinear", 10);
std::vector<int64_t> dims{6};
test.AddInput<float>("x", dims, {0, 2, 3, 1000, -254, -1000});
test.AddInput<float>("y_scale", {}, {2.0f});
test.AddInput<uint8_t>("y_zero_point", {}, {128});
test.AddOutput<uint8_t>("y", dims, {128, 129, 130, 255, 1, 0});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT doesn't support support UINT8 for quantization
}
TEST(QuantizeLinearOpMLFloat16Test, Uint8) {
OpTester test("QuantizeLinear", 19);
std::vector<int64_t> dims{6};
test.AddInput<MLFloat16>("x", dims, {MLFloat16(0.0f), MLFloat16(2.0f), MLFloat16(4.0f), MLFloat16(1000.0f), MLFloat16(-254.0f), MLFloat16(-1000.0f)});
test.AddInput<MLFloat16>("y_scale", {}, {MLFloat16(2.0f)});
test.AddInput<uint8_t>("y_zero_point", {}, {128});
test.AddOutput<uint8_t>("y", dims, {128, 129, 130, 255, 1, 0});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT doesn't support support UINT8 for quantization
}
// quantize with scalar zero point and scale
TEST(QuantizeLinearOpTest, Int8) {
// TODO: Unskip when fixed #41968513
if (DefaultDmlExecutionProvider().get() != nullptr) {
GTEST_SKIP() << "Skipping because of the following error: Expected equality of these values: -127 and -128";
}
OpTester test("QuantizeLinear", 10);
std::vector<int64_t> dims{6};
test.AddInput<float>("x", dims, {0, 2, 3, 5, -2, -5});
test.AddInput<float>("y_scale", {}, {.039215686f});
test.AddInput<int8_t>("y_zero_point", {}, {0});
test.AddOutput<int8_t>("y", dims, {0, 51, 76, 127, -51, -127});
// Disable Tensorrt EP due to the error, out of bounds channel axis 1. Number of input dimensions is 1.
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
// quantize with scalar zero point and scale
TEST(QuantizeLinearOpTest, Int8_NegativeZeroPoint) {
// TODO: Unskip when fixed #41968513
if (DefaultDmlExecutionProvider().get() != nullptr) {
GTEST_SKIP() << "Skipping because of the following error: Expected equality of these values: 104 and 105";
}
OpTester test("QuantizeLinear", 10);
std::vector<int64_t> dims{8};
test.AddInput<float>("x", dims, {0, 2, 3, 5, 6, -2, -5, -6});
test.AddInput<float>("y_scale", {}, {.039215686f});
test.AddInput<int8_t>("y_zero_point", {}, {-23});
test.AddOutput<int8_t>("y", dims, {-23, 28, 53, 104, 127, -74, -128, -128});
// Disable Tensorrt EP due to the error, node1_quantize_scale_node: out of bounds channel axis 1. Number of input dimensions is 1.
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
// quantize with scalar zero point and scale
TEST(QuantizeLinearOpTest, Int8_PositiveZeroPoint) {
// TODO: Unskip when fixed #41968513
if (DefaultDmlExecutionProvider().get() != nullptr) {
GTEST_SKIP() << "Skipping because of the following error: Expected equality of these values: -104 and -105";
}
OpTester test("QuantizeLinear", 10);
std::vector<int64_t> dims{8};
test.AddInput<float>("x", dims, {0, 2, 3, 5, 6, -2, -5, -6});
test.AddInput<float>("y_scale", {}, {.039215686f});
test.AddInput<int8_t>("y_zero_point", {}, {23});
test.AddOutput<int8_t>("y", dims, {23, 74, 99, 127, 127, -28, -104, -128});
// Disable Tensorrt EP due to error:node1_quantize_scale_node: out of bounds channel axis 1. Number of input dimensions is 1.
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
// quantize with 2D data
TEST(QuantizeLinearOpTest, 2D) {
OpTester test("QuantizeLinear", 10);
std::vector<int64_t> dims{3, 4};
test.AddInput<float>("X", dims,
{0, 2, 3, 1000,
0, 2, 3, 1000,
0, 2, 3, 1000});
test.AddInput<float>("scale", {}, {4});
test.AddInput<uint8_t>("zero_point", {}, {0});
test.AddOutput<uint8_t>("Y", dims,
{0, 0, 1, 250,
0, 0, 1, 250,
0, 0, 1, 250});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT doesn't support support UINT8 for quantization
}
// quantize with scalar data
TEST(QuantizeLinearOpTest, Scalar) {
OpTester test("QuantizeLinear", 10);
test.AddInput<float>("x", {}, {3});
test.AddInput<float>("y_scale", {}, {2.0f});
test.AddInput<uint8_t>("y_zero_point", {}, {128});
test.AddOutput<uint8_t>("y", {}, {130});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT doesn't support support UINT8 for quantization
}
// quantize with scalar data
TEST(QuantizeLinearOpTest, QuantizeLinear_Without_Zero_Point_Opset10) {
// TODO: Unskip when fixed #41968513
if (DefaultDmlExecutionProvider().get() != nullptr) {
GTEST_SKIP() << "Skipping because of the following error: AbiCustomRegistry.cpp(507): The parameter is incorrect";
}
OpTester test("QuantizeLinear", 10);
test.AddInput<float>("x", {}, {3});
test.AddInput<float>("y_scale", {}, {2.0f});
test.AddOutput<uint8_t>("y", {}, {2});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT doesn't support support UINT8 for quantization
}
TEST(QuantizeLinearOpTest, QuantizeLinear_Without_Zero_Point_Opset13) {
// TODO: Unskip when fixed #41968513
if (DefaultDmlExecutionProvider().get() != nullptr) {
GTEST_SKIP() << "Skipping because of the following error: AbiCustomRegistry.cpp(507): The parameter is incorrect";
}
OpTester test("QuantizeLinear", 13);
test.AddInput<float>("x", {}, {3});
test.AddInput<float>("y_scale", {}, {2.0f});
test.AddOutput<uint8_t>("y", {}, {2});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT doesn't support support UINT8 for quantization
}
TEST(QuantizeLinearOpTest, QuantizeLinear_With_Zero_Point0) {
OpTester test("QuantizeLinear", 10);
test.AddInput<float>("x", {}, {3});
test.AddInput<float>("y_scale", {}, {2.0f});
test.AddInput<uint8_t>("y_zero_point", {}, {0});
test.AddOutput<uint8_t>("y", {}, {2});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT doesn't support support UINT8 for quantization
}
TEST(QuantizeLinearOpTest, QuantizeLinear_With_Zero_Dim1) {
// TODO: Unskip when fixed #41968513
if (DefaultDmlExecutionProvider().get() != nullptr) {
GTEST_SKIP() << "Skipping because of the following error: AbiCustomRegistry.cpp(507): The parameter is incorrect";
}
OpTester test("QuantizeLinear", 10);
test.AddInput<float>("x", {1}, {3});
test.AddInput<float>("y_scale", {1}, {2.0f});
test.AddOutput<uint8_t>("y", {1}, {2});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT doesn't support support UINT8 for quantization
}
TEST(QuantizeLinearOpTest, Per_Channel_Axis_Default) {
OpTester test("QuantizeLinear", 13);
std::vector<int64_t> dims{3, 4};
test.AddInput<float>("X", dims,
{0, 2, 1, 1001,
1, 1, 2, 1100,
2, 4.2f, 3, 1200});
test.AddInput<float>("scale", {4}, {1, 2, 3, 20});
test.AddInput<uint8_t>("zero_point", {4}, {64, 100, 127, 127});
test.AddOutput<uint8_t>("Y", dims,
{64, 101, 127, 177,
65, 100, 128, 182,
66, 102, 128, 187});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT doesn't support support UINT8 for quantization
}
TEST(QuantizeLinearOpTest, Per_Channel_Axis_0) {
OpTester test("QuantizeLinear", 13);
std::vector<int64_t> dims{3, 4};
test.AddInput<float>("X", dims,
{0, 2, 3, 1000,
0, 2, 3, 1000,
0, 2, 3, 1000});
test.AddAttribute<int64_t>("axis", 0);
test.AddInput<float>("scale", {3}, {1, 2, 4});
test.AddInput<uint8_t>("zero_point", {3}, {0, 0, 0});
test.AddOutput<uint8_t>("Y", dims,
{0, 2, 3, 255,
0, 1, 2, 255,
0, 0, 1, 250});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT doesn't support support UINT8 for quantization
}
// quantize with per-channel and negative axis (-2 resolves to axis 0)
TEST(QuantizeLinearOpTest, Per_Channel_Axis_neg) {
OpTester test("QuantizeLinear", 13);
std::vector<int64_t> dims{3, 4};
test.AddInput<float>("X", dims,
{0, 2, 3, 1000,
0, 2, 3, 1000,
0, 2, 3, 1000});
test.AddAttribute<int64_t>("axis", -2);
test.AddInput<float>("scale", {3}, {1, 2, 4});
test.AddInput<uint8_t>("zero_point", {3}, {0, 0, 0});
test.AddOutput<uint8_t>("Y", dims,
{0, 2, 3, 255,
0, 1, 2, 255,
0, 0, 1, 250});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT doesn't support support UINT8 for quantization
}
#if !defined(DISABLE_FLOAT8_TYPES)
template <typename InT, typename OutT>
void DequantizeLinearOp19Test() {
OpTester test("DequantizeLinear", 19);
std::vector<int64_t> dims{4};
std::vector<InT> x;
x.push_back(InT(0.0f, true));
x.push_back(InT(1.0f, true));
x.push_back(InT(2.0f, true));
x.push_back(InT(3.0f, true));
test.AddInput<InT>("x", dims, x);
test.AddInput<OutT>("x_scale", {}, {static_cast<OutT>(1.0f)});
test.AddInput<InT>("x_zero_point", {}, {InT(0.0f, true)});
std::vector<OutT> y;
for (auto it : x) {
y.push_back(static_cast<OutT>(it.ToFloat()));
}
test.AddOutput<OutT>("y", dims, y);
// Disable Tensorrt EP due to error:node1_quantize_scale_node: out of bounds channel axis 1. Number of input dimensions is 1.
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
TEST(DequantizeLinearOpTest, Float8) {
constexpr int min_cuda_architecture = 11080;
bool enable_cuda = (nullptr != DefaultCpuExecutionProvider().get()) && HasCudaEnvironment(min_cuda_architecture);
bool enable_cpu = (nullptr != DefaultCpuExecutionProvider().get());
if (enable_cpu || enable_cuda)
DequantizeLinearOp19Test<Float8E4M3FN, float>();
if (enable_cpu)
DequantizeLinearOp19Test<Float8E4M3FNUZ, float>();
if (enable_cpu || enable_cuda)
DequantizeLinearOp19Test<Float8E5M2, float>();
if (enable_cpu)
DequantizeLinearOp19Test<Float8E5M2FNUZ, float>();
}
TEST(DequantizeLinearOpMLFloat16Test, Float8) {
constexpr int min_cuda_architecture = 11080;
bool enable_cuda = (nullptr != DefaultCpuExecutionProvider().get()) && HasCudaEnvironment(min_cuda_architecture);
bool enable_cpu = (nullptr != DefaultCpuExecutionProvider().get());
if (enable_cpu || enable_cuda)
DequantizeLinearOp19Test<Float8E4M3FN, MLFloat16>();
if (enable_cpu)
DequantizeLinearOp19Test<Float8E4M3FNUZ, MLFloat16>();
if (enable_cpu || enable_cuda)
DequantizeLinearOp19Test<Float8E5M2, MLFloat16>();
if (enable_cpu)
DequantizeLinearOp19Test<Float8E5M2FNUZ, MLFloat16>();
}
template <typename InT, typename OutT>
void QuantizeLinearOp19Test(bool saturate) {
OpTester test("QuantizeLinear", 19);
if (!saturate) {
test.AddAttribute<int64_t>("saturate", 0);
}
std::vector<int64_t> dims{6};
std::vector<InT> x{0, 2, 3, 1000, -254, -1000};
test.AddInput<InT>("x", dims, x);
test.AddInput<InT>("y_scale", {}, {1.0f});
test.AddInput<OutT>("y_zero_point", {}, {OutT(0.0f, true)});
std::vector<OutT> y;
for (auto it : x) {
y.push_back(OutT(it, saturate));
}
test.AddOutput<OutT>("y", dims, y);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
TEST(QuantizeLinearOpTest, Float8) {
constexpr int min_cuda_architecture = 11080;
bool enable_cuda = (nullptr != DefaultCpuExecutionProvider().get()) && HasCudaEnvironment(min_cuda_architecture);
bool enable_cpu = (nullptr != DefaultCpuExecutionProvider().get());
if (enable_cpu || enable_cuda)
QuantizeLinearOp19Test<float, Float8E4M3FN>(true);
if (enable_cpu)
QuantizeLinearOp19Test<float, Float8E4M3FNUZ>(true);
if (enable_cpu || enable_cuda)
QuantizeLinearOp19Test<float, Float8E5M2>(true);
if (enable_cpu)
QuantizeLinearOp19Test<float, Float8E5M2FNUZ>(true);
if (enable_cpu || enable_cuda)
QuantizeLinearOp19Test<float, Float8E4M3FN>(false);
if (enable_cpu)
QuantizeLinearOp19Test<float, Float8E4M3FNUZ>(false);
if (enable_cpu || enable_cuda)
QuantizeLinearOp19Test<float, Float8E5M2>(false);
if (enable_cpu)
QuantizeLinearOp19Test<float, Float8E5M2FNUZ>(false);
}
template <typename InT, typename OutT>
void QuantizeLinearOp19F16Test(bool saturate) {
OpTester test("QuantizeLinear", 19);
if (!saturate) {
test.AddAttribute<int64_t>("saturate", 0);
}
std::vector<int64_t> dims{6};
std::vector<InT> x{InT(0.0f), InT(2.0f), InT(3.0f), InT(1000.0f), InT(-254.0f), InT(-1000.0f)};
test.AddInput<InT>("x", dims, x);
test.AddInput<InT>("y_scale", {}, {InT(1.0f)});
test.AddInput<OutT>("y_zero_point", {}, {OutT(0.0f, true)});
std::vector<OutT> y;
for (auto it : x) {
y.push_back(OutT(it, saturate));
}
test.AddOutput<OutT>("y", dims, y);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
}
TEST(QuantizeLinearOpMLFloat16Test, Float8) {
constexpr int min_cuda_architecture = 11080;
bool enable_cuda = (nullptr != DefaultCpuExecutionProvider().get()) && HasCudaEnvironment(min_cuda_architecture);
bool enable_cpu = (nullptr != DefaultCpuExecutionProvider().get());
if (enable_cpu || enable_cuda)
QuantizeLinearOp19F16Test<MLFloat16, Float8E4M3FN>(true);
if (enable_cpu)
QuantizeLinearOp19F16Test<MLFloat16, Float8E4M3FNUZ>(true);
if (enable_cpu || enable_cuda)
QuantizeLinearOp19F16Test<MLFloat16, Float8E5M2>(true);
if (enable_cpu)
QuantizeLinearOp19F16Test<MLFloat16, Float8E5M2FNUZ>(true);
if (enable_cpu || enable_cuda)
QuantizeLinearOp19F16Test<MLFloat16, Float8E4M3FN>(false);
if (enable_cpu)
QuantizeLinearOp19F16Test<MLFloat16, Float8E4M3FNUZ>(false);
if (enable_cpu || enable_cuda)
QuantizeLinearOp19F16Test<MLFloat16, Float8E5M2>(false);
if (enable_cpu)
QuantizeLinearOp19F16Test<MLFloat16, Float8E5M2FNUZ>(false);
}
#endif
} // namespace test
} // namespace onnxruntime