mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-03 23:49:44 +00:00
Enable FP16 Clip and Handle Bias in FP16 Depthwise Conv (#21493)
- Improved accuracy for face-detection, image-classification, and object-detection in the GeekBench ML benchmark on ARM64. - Fixed issue https://github.com/microsoft/onnxruntime/issues/18992
This commit is contained in:
parent
82036b0497
commit
530a2d7b41
9 changed files with 531 additions and 20 deletions
|
|
@ -58,8 +58,8 @@ Do not modify directly.*
|
|||
|Ceil|*in* X:**T**<br> *out* Y:**T**|13+|**T** = tensor(double), tensor(float)|
|
||||
|||[6, 12]|**T** = tensor(double), tensor(float)|
|
||||
|Celu|*in* X:**T**<br> *out* Y:**T**|12+|**T** = tensor(float)|
|
||||
|Clip|*in* input:**T**<br> *in* min:**T**<br> *in* max:**T**<br> *out* output:**T**<br><br>or<br><br>*in* input:**T**<br> *out* output:**T**|13+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint32), tensor(uint64), tensor(uint8)|
|
||||
|||12|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint32), tensor(uint64), tensor(uint8)|
|
||||
|Clip|*in* input:**T**<br> *in* min:**T**<br> *in* max:**T**<br> *out* output:**T**<br><br>or<br><br>*in* input:**T**<br> *out* output:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(int8), tensor(uint32), tensor(uint64), tensor(uint8)|
|
||||
|||12|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(int8), tensor(uint32), tensor(uint64), tensor(uint8)|
|
||||
|||11|**T** = tensor(float)|
|
||||
|||[6, 10]|**T** = tensor(float)|
|
||||
|Col2Im|*in* input:**T**<br> *in* image_shape:**tensor(int64)**<br> *in* block_shape:**tensor(int64)**<br> *out* output:**T**|18+|**T** = tensor(float)|
|
||||
|
|
|
|||
|
|
@ -1751,6 +1751,7 @@ MlasSBGemmConvertPackB(size_t N, size_t K, const float* B, size_t ldb, void* Pac
|
|||
* @brief Indirect Depthwise convolution for fp16
|
||||
* @param Input Supplies the indirect buffer for NHWC input
|
||||
* @param Filter Supplies the address for filter tensor
|
||||
* @param Bias Supplies the address for 1D bias tensor B, has size of M
|
||||
* @param Output Supplies the address for the result tensor
|
||||
* @param Channels # of input channels
|
||||
* @param OutputCount # of output pixels
|
||||
|
|
@ -1762,6 +1763,7 @@ MLASCALL
|
|||
MlasConvDepthwise(
|
||||
const MLAS_FP16* const* Input,
|
||||
const MLAS_FP16* Filter,
|
||||
const MLAS_FP16* Bias,
|
||||
MLAS_FP16* Output,
|
||||
size_t Channels,
|
||||
size_t OutputCount,
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ Abstract:
|
|||
|
||||
--*/
|
||||
|
||||
|
||||
#include "fp16_common.h"
|
||||
|
||||
#ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED
|
||||
|
|
@ -24,19 +23,20 @@ void
|
|||
MlasConvDepthwiseKernel(
|
||||
const _mlas_fp16_* const* Input,
|
||||
const _mlas_fp16_* Filter,
|
||||
const _mlas_fp16_* Bias,
|
||||
_mlas_fp16_* Output,
|
||||
size_t Channels,
|
||||
size_t OutputCount,
|
||||
size_t KernelSize,
|
||||
MLAS_HALF_GEMM_POSTPROCESSOR* PostProc
|
||||
)
|
||||
)
|
||||
{
|
||||
while (OutputCount > 0) {
|
||||
size_t ChannelOffset = 0;
|
||||
size_t c = Channels;
|
||||
|
||||
while (c >= 8) {
|
||||
MLAS_FLOAT16X8 Accumulator = MlasZeroFloat16x8();
|
||||
MLAS_FLOAT16X8 Accumulator = Bias == nullptr ? MlasZeroFloat16x8() : MlasLoadFloat16x8(&Bias[ChannelOffset]);
|
||||
size_t ChannelKernelOffset = ChannelOffset;
|
||||
|
||||
for (size_t k = 0; k < KernelSize; k++) {
|
||||
|
|
@ -54,7 +54,7 @@ MlasConvDepthwiseKernel(
|
|||
}
|
||||
|
||||
if (c >= 4) {
|
||||
MLAS_FLOAT16X4 Accumulator = MlasZeroFloat16x4();
|
||||
MLAS_FLOAT16X4 Accumulator = Bias == nullptr ? MlasZeroFloat16x4() : MlasLoadFloat16x4(&Bias[ChannelOffset]);
|
||||
size_t ChannelKernelOffset = ChannelOffset;
|
||||
|
||||
for (size_t k = 0; k < KernelSize; k++) {
|
||||
|
|
@ -72,7 +72,8 @@ MlasConvDepthwiseKernel(
|
|||
}
|
||||
|
||||
if (c > 0) {
|
||||
MLAS_FLOAT16X4 Accumulator = MlasZeroFloat16x4();
|
||||
MLAS_FLOAT16X4 Accumulator =
|
||||
Bias == nullptr ? MlasZeroFloat16x4() : MlasLoadPartialFloat16x4(&Bias[ChannelOffset], c);
|
||||
size_t ChannelKernelOffset = ChannelOffset;
|
||||
|
||||
for (size_t k = 0; k < KernelSize; k++) {
|
||||
|
|
@ -86,8 +87,7 @@ MlasConvDepthwiseKernel(
|
|||
Output += c;
|
||||
}
|
||||
if (PostProc) {
|
||||
PostProc->Process(reinterpret_cast<MLAS_FP16*>(Output - Channels), 0, 0, 1, Channels,
|
||||
Channels);
|
||||
PostProc->Process(reinterpret_cast<MLAS_FP16*>(Output - Channels), 0, 0, 1, Channels, Channels);
|
||||
}
|
||||
Input += KernelSize;
|
||||
OutputCount -= 1;
|
||||
|
|
@ -101,16 +101,17 @@ void
|
|||
MlasConvDepthwiseKernel(
|
||||
const _mlas_fp16_* const* Input,
|
||||
const _mlas_fp16_* Filter,
|
||||
const _mlas_fp16_* Bias,
|
||||
_mlas_fp16_* Output,
|
||||
size_t Channels,
|
||||
size_t OutputCount,
|
||||
size_t KernelSize,
|
||||
MLAS_HALF_GEMM_POSTPROCESSOR* PostProc
|
||||
)
|
||||
)
|
||||
{
|
||||
while (OutputCount > 0) {
|
||||
for (size_t ChannelOffset = 0; ChannelOffset < Channels; ChannelOffset++) {
|
||||
float Accumulator = 0.0f;
|
||||
float Accumulator = Bias == nullptr ? 0.0f : MLAS_Half2Float(Bias[ChannelOffset]);
|
||||
size_t ChannelKernelOffset = ChannelOffset;
|
||||
|
||||
for (size_t k = 0; k < KernelSize; k++) {
|
||||
|
|
@ -120,35 +121,36 @@ MlasConvDepthwiseKernel(
|
|||
*Output++ = MLAS_Float2Half(Accumulator);
|
||||
}
|
||||
if (PostProc) {
|
||||
PostProc->Process(reinterpret_cast<MLAS_FP16*>(Output - Channels), 0, 0, 1, Channels,
|
||||
Channels);
|
||||
PostProc->Process(reinterpret_cast<MLAS_FP16*>(Output - Channels), 0, 0, 1, Channels, Channels);
|
||||
}
|
||||
Input += KernelSize;
|
||||
OutputCount -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // MLAS_F16VEC_INTRINSICS_SUPPORTED
|
||||
|
||||
#endif // MLAS_F16VEC_INTRINSICS_SUPPORTED
|
||||
|
||||
void
|
||||
MLASCALL
|
||||
MlasConvDepthwise(
|
||||
const MLAS_FP16* const* Input,
|
||||
const MLAS_FP16* Filter,
|
||||
const MLAS_FP16* Bias,
|
||||
MLAS_FP16* Output,
|
||||
size_t Channels,
|
||||
size_t OutputCount,
|
||||
size_t KernelSize,
|
||||
MLAS_HALF_GEMM_POSTPROCESSOR* PostProc
|
||||
)
|
||||
)
|
||||
{
|
||||
MlasConvDepthwiseKernel(
|
||||
reinterpret_cast<const _mlas_fp16_* const*>(Input),
|
||||
reinterpret_cast<const _mlas_fp16_*>(Filter),
|
||||
reinterpret_cast<const _mlas_fp16_*>(Bias),
|
||||
reinterpret_cast<_mlas_fp16_*>(Output),
|
||||
Channels,
|
||||
OutputCount,
|
||||
KernelSize,
|
||||
PostProc);
|
||||
PostProc
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,6 +64,23 @@ MLAS_FORCEINLINE
|
|||
MLAS_FLOAT16X4
|
||||
MlasLoadFloat16x4(const _mlas_fp16_* Buffer) { return vreinterpret_f16_u16(vld1_u16(Buffer)); }
|
||||
|
||||
MLAS_FORCEINLINE
|
||||
MLAS_FLOAT16X4
|
||||
MlasLoadPartialFloat16x4(const _mlas_fp16_* Buffer, size_t len)
|
||||
{
|
||||
MLAS_FLOAT16X4 Vector = MlasZeroFloat16x4();
|
||||
if ((len & 1) != 0) {
|
||||
Vector = vreinterpret_f16_u16(vld1_lane_u16(Buffer + (len - 1), vreinterpret_u16_f16(Vector), 0));
|
||||
}
|
||||
if ((len & 2) != 0) {
|
||||
Vector = vreinterpret_f16_f32(vdup_lane_f32(vreinterpret_f32_f16(Vector), 0));
|
||||
Vector = vreinterpret_f16_f32(
|
||||
vld1_lane_f32(reinterpret_cast<const float*>(Buffer), vreinterpret_f32_f16(Vector), 0)
|
||||
);
|
||||
}
|
||||
return Vector;
|
||||
}
|
||||
|
||||
MLAS_FORCEINLINE
|
||||
void
|
||||
MlasStoreFloat16x8(_mlas_fp16_* Buffer, MLAS_FLOAT16X8 Vector)
|
||||
|
|
|
|||
|
|
@ -139,8 +139,9 @@ Status FusedConvFp16::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr
|
|||
|
||||
bool share_prepacked_weights = (prepacked_weights != nullptr);
|
||||
|
||||
const bool is_depthwise_conv = (group_input_channels == 1 && group_output_channels == 1);
|
||||
// Don't pack the filter buffer if the MlasConvDepthwise path is used.
|
||||
if (!(group_input_channels == 1 && group_output_channels == 1)) {
|
||||
if (!is_depthwise_conv) {
|
||||
packed_W_size_ = MlasHalfGemmPackBSize(group_output_channels, kernel_dim, false);
|
||||
if (packed_W_size_ != 0) {
|
||||
size_t packed_W_data_size = SafeInt<size_t>(group_count) * packed_W_size_;
|
||||
|
|
@ -472,6 +473,7 @@ Status FusedConvFp16::Compute(OpKernelContext* context) const {
|
|||
MlasConvDepthwise(
|
||||
worker_indirection_buffer,
|
||||
reordered_W,
|
||||
Bdata,
|
||||
worker_output,
|
||||
static_cast<size_t>(M),
|
||||
static_cast<size_t>(output_count),
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ ORT_SPECIFY_OP_KERNEL_ARG_DEFAULT_TYPES(
|
|||
float);
|
||||
ORT_SPECIFY_OP_KERNEL_ARG_DEFAULT_TYPES(
|
||||
kCpuExecutionProvider, kOnnxDomain, Clip, 12, Input, 0,
|
||||
float, double, int8_t, uint8_t, int32_t, uint32_t, int64_t, uint64_t);
|
||||
float, MLFloat16, double, int8_t, uint8_t, int32_t, uint32_t, int64_t, uint64_t);
|
||||
} // namespace op_kernel_type_control
|
||||
|
||||
using EnabledClip11Types = ORT_OP_KERNEL_ARG_ENABLED_TYPE_LIST(
|
||||
|
|
|
|||
|
|
@ -119,6 +119,24 @@ TEST(MathOpTest, Clip_Default_uint64) {
|
|||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
|
||||
}
|
||||
|
||||
TEST(MathOpTest, Clip_MLFloat16) {
|
||||
OpTester test("Clip", 12);
|
||||
|
||||
std::vector<int64_t> dims{3, 3};
|
||||
test.AddInput<MLFloat16>("X", dims,
|
||||
{MLFloat16(-1.0f), MLFloat16(-2.0f), MLFloat16(-3.0f),
|
||||
MLFloat16(-4.0f), MLFloat16(0.0f), MLFloat16(2.0f),
|
||||
MLFloat16(4.0f), MLFloat16(6.0f), MLFloat16(8.0f)});
|
||||
test.AddInput<MLFloat16>("min", {}, {MLFloat16(0.0f)});
|
||||
test.AddInput<MLFloat16>("max", {}, {MLFloat16(6.0f)});
|
||||
test.AddOutput<MLFloat16>("Y", dims,
|
||||
{MLFloat16(0.0f), MLFloat16(0.0f), MLFloat16(0.0f),
|
||||
MLFloat16(0.0f), MLFloat16(0.0f), MLFloat16(2.0f),
|
||||
MLFloat16(4.0f), MLFloat16(6.0f), MLFloat16(6.0f)});
|
||||
|
||||
test.Run();
|
||||
}
|
||||
|
||||
TEST(MathOpTest, Clip_int32) {
|
||||
OpTester test("Clip", 12);
|
||||
|
||||
|
|
|
|||
|
|
@ -714,6 +714,241 @@ TEST(ConvFp16Test, Conv2D_group) {
|
|||
TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true);
|
||||
}
|
||||
|
||||
TEST(ConvFp16Test, Depthwise2D_Bias_Group1_Issue18992) {
|
||||
ConvOpAndTestAttributes attrs = {
|
||||
"", // auto_pad
|
||||
vector<int64_t>{1, 1}, // dilations
|
||||
1, // group
|
||||
vector<int64_t>{1, 1}, // kernel_shape
|
||||
vector<int64_t>{0, 0, 0, 0}, // pads
|
||||
vector<int64_t>{1, 1}, // strides
|
||||
{} // excluded EPs
|
||||
};
|
||||
|
||||
vector<MLFloat16> X = {MLFloat16(1.0f)};
|
||||
vector<int64_t> X_shape = {1, 1, 1, 1};
|
||||
vector<MLFloat16> W = {MLFloat16(0.5f)};
|
||||
vector<int64_t> W_shape = {1, 1, 1, 1};
|
||||
vector<MLFloat16> B = {MLFloat16(0.5f)};
|
||||
vector<int64_t> B_shape = {1};
|
||||
vector<int64_t> Y_shape = {1, 1, 1, 1};
|
||||
auto expected_vals = {MLFloat16(1.0f)};
|
||||
|
||||
TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape);
|
||||
TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true);
|
||||
}
|
||||
|
||||
TEST(ConvFp16Test, Depthwise2D_Bias_Group2) {
|
||||
ConvOpAndTestAttributes attrs = {
|
||||
"", // auto_pad
|
||||
vector<int64_t>{1, 1}, // dilations
|
||||
2, // group
|
||||
vector<int64_t>{1, 1}, // kernel_shape
|
||||
vector<int64_t>{0, 0, 0, 0}, // pads
|
||||
vector<int64_t>{1, 1}, // strides
|
||||
{} // excluded EPs
|
||||
};
|
||||
|
||||
vector<MLFloat16> X = {
|
||||
MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(2.0f),
|
||||
MLFloat16(3.0f), MLFloat16(4.0f), MLFloat16(5.0f),
|
||||
MLFloat16(6.0f), MLFloat16(7.0f), MLFloat16(8.0f),
|
||||
|
||||
MLFloat16(9.0f), MLFloat16(10.0f), MLFloat16(11.0f),
|
||||
MLFloat16(12.0f), MLFloat16(13.0f), MLFloat16(14.0f),
|
||||
MLFloat16(15.0f), MLFloat16(16.0f), MLFloat16(17.0f)};
|
||||
vector<int64_t> X_shape = {1, 2, 3, 3};
|
||||
vector<MLFloat16> W = {MLFloat16(1.0f), MLFloat16(2.0f)};
|
||||
vector<int64_t> W_shape = {2, 1, 1, 1};
|
||||
vector<MLFloat16> B = {MLFloat16(1.0f), MLFloat16(-1.0f)};
|
||||
vector<int64_t> B_shape = {2};
|
||||
vector<int64_t> Y_shape = {1, 2, 3, 3};
|
||||
auto expected_vals = {
|
||||
MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f),
|
||||
MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f),
|
||||
MLFloat16(7.0f), MLFloat16(8.0f), MLFloat16(9.0f),
|
||||
|
||||
MLFloat16(17.0f), MLFloat16(19.0f), MLFloat16(21.0f),
|
||||
MLFloat16(23.0f), MLFloat16(25.0f), MLFloat16(27.0f),
|
||||
MLFloat16(29.0f), MLFloat16(31.0f), MLFloat16(33.0f)};
|
||||
|
||||
TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape);
|
||||
TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true);
|
||||
}
|
||||
|
||||
TEST(ConvFp16Test, Depthwise2D_Bias_Group15) {
|
||||
ConvOpAndTestAttributes attrs = {
|
||||
"", // auto_pad
|
||||
vector<int64_t>{1, 1}, // dilations
|
||||
15, // group
|
||||
vector<int64_t>{2, 2}, // kernel_shape
|
||||
vector<int64_t>{0, 0, 0, 0}, // pads
|
||||
vector<int64_t>{1, 1}, // strides
|
||||
{} // excluded EPs
|
||||
};
|
||||
|
||||
vector<MLFloat16> X = {
|
||||
// C = 0
|
||||
MLFloat16(0.0f), MLFloat16(1.0f),
|
||||
MLFloat16(2.0f), MLFloat16(3.0f),
|
||||
|
||||
// C = 1
|
||||
MLFloat16(4.0f), MLFloat16(5.0f),
|
||||
MLFloat16(6.0f), MLFloat16(7.0f),
|
||||
|
||||
// C = 2
|
||||
MLFloat16(8.0f), MLFloat16(9.0f),
|
||||
MLFloat16(10.0f), MLFloat16(11.0f),
|
||||
|
||||
// C = 3
|
||||
MLFloat16(12.0f), MLFloat16(13.0f),
|
||||
MLFloat16(14.0f), MLFloat16(15.0f),
|
||||
|
||||
// C = 4
|
||||
MLFloat16(16.0f), MLFloat16(17.0f),
|
||||
MLFloat16(18.0f), MLFloat16(19.0f),
|
||||
|
||||
// C = 5
|
||||
MLFloat16(20.0f), MLFloat16(21.0f),
|
||||
MLFloat16(22.0f), MLFloat16(23.0f),
|
||||
|
||||
// C = 6
|
||||
MLFloat16(24.0f), MLFloat16(25.0f),
|
||||
MLFloat16(26.0f), MLFloat16(27.0f),
|
||||
|
||||
// C = 7
|
||||
MLFloat16(28.0f), MLFloat16(29.0f),
|
||||
MLFloat16(30.0f), MLFloat16(31.0f),
|
||||
|
||||
// C = 8
|
||||
MLFloat16(32.0f), MLFloat16(33.0f),
|
||||
MLFloat16(34.0f), MLFloat16(35.0f),
|
||||
|
||||
// C = 9
|
||||
MLFloat16(36.0f), MLFloat16(37.0f),
|
||||
MLFloat16(38.0f), MLFloat16(39.0f),
|
||||
|
||||
// C = 10
|
||||
MLFloat16(40.0f), MLFloat16(41.0f),
|
||||
MLFloat16(42.0f), MLFloat16(43.0f),
|
||||
|
||||
// C = 11
|
||||
MLFloat16(44.0f), MLFloat16(45.0f),
|
||||
MLFloat16(46.0f), MLFloat16(47.0f),
|
||||
|
||||
// C = 12
|
||||
MLFloat16(48.0f), MLFloat16(49.0f),
|
||||
MLFloat16(50.0f), MLFloat16(51.0f),
|
||||
|
||||
// C = 13
|
||||
MLFloat16(52.0f), MLFloat16(53.0f),
|
||||
MLFloat16(54.0f), MLFloat16(55.0f),
|
||||
|
||||
// C = 14
|
||||
MLFloat16(56.0f), MLFloat16(57.0f),
|
||||
MLFloat16(58.0f), MLFloat16(59.0f)};
|
||||
vector<int64_t> X_shape = {1, 15, 2, 2};
|
||||
vector<MLFloat16> W = {
|
||||
// M = 0
|
||||
MLFloat16(0.0f), MLFloat16(1.0f),
|
||||
MLFloat16(2.0f), MLFloat16(3.0f),
|
||||
|
||||
// M = 1
|
||||
MLFloat16(4.0f), MLFloat16(5.0f),
|
||||
MLFloat16(6.0f), MLFloat16(7.0f),
|
||||
|
||||
// M = 2
|
||||
MLFloat16(8.0f), MLFloat16(9.0f),
|
||||
MLFloat16(10.0f), MLFloat16(11.0f),
|
||||
|
||||
// M = 3
|
||||
MLFloat16(12.0f), MLFloat16(13.0f),
|
||||
MLFloat16(14.0f), MLFloat16(15.0f),
|
||||
|
||||
// M = 4
|
||||
MLFloat16(16.0f), MLFloat16(17.0f),
|
||||
MLFloat16(18.0f), MLFloat16(19.0f),
|
||||
|
||||
// M = 5
|
||||
MLFloat16(20.0f), MLFloat16(21.0f),
|
||||
MLFloat16(22.0f), MLFloat16(23.0f),
|
||||
|
||||
// M = 6
|
||||
MLFloat16(24.0f), MLFloat16(25.0f),
|
||||
MLFloat16(26.0f), MLFloat16(27.0f),
|
||||
|
||||
// M = 7
|
||||
MLFloat16(28.0f), MLFloat16(29.0f),
|
||||
MLFloat16(30.0f), MLFloat16(31.0f),
|
||||
|
||||
// M = 8
|
||||
MLFloat16(32.0f), MLFloat16(33.0f),
|
||||
MLFloat16(34.0f), MLFloat16(35.0f),
|
||||
|
||||
// M = 9
|
||||
MLFloat16(36.0f), MLFloat16(37.0f),
|
||||
MLFloat16(38.0f), MLFloat16(39.0f),
|
||||
|
||||
// M = 10
|
||||
MLFloat16(40.0f), MLFloat16(41.0f),
|
||||
MLFloat16(42.0f), MLFloat16(43.0f),
|
||||
|
||||
// M = 11
|
||||
MLFloat16(44.0f), MLFloat16(45.0f),
|
||||
MLFloat16(46.0f), MLFloat16(47.0f),
|
||||
|
||||
// M = 12
|
||||
MLFloat16(48.0f), MLFloat16(49.0f),
|
||||
MLFloat16(50.0f), MLFloat16(51.0f),
|
||||
|
||||
// M = 13
|
||||
MLFloat16(52.0f), MLFloat16(53.0f),
|
||||
MLFloat16(54.0f), MLFloat16(55.0f),
|
||||
|
||||
// M = 14
|
||||
MLFloat16(56.0f), MLFloat16(57.0f),
|
||||
MLFloat16(58.0f), MLFloat16(59.0f)};
|
||||
vector<int64_t> W_shape = {15, 1, 2, 2};
|
||||
vector<MLFloat16> B = {
|
||||
MLFloat16(101.0f),
|
||||
MLFloat16(102.0f),
|
||||
MLFloat16(103.0f),
|
||||
MLFloat16(104.0f),
|
||||
MLFloat16(105.0f),
|
||||
MLFloat16(106.0f),
|
||||
MLFloat16(107.0f),
|
||||
MLFloat16(108.0f),
|
||||
MLFloat16(109.0f),
|
||||
MLFloat16(110.0f),
|
||||
MLFloat16(111.0f),
|
||||
MLFloat16(112.0f),
|
||||
MLFloat16(113.0f),
|
||||
MLFloat16(114.0f),
|
||||
MLFloat16(115.0f)};
|
||||
vector<int64_t> B_shape = {15};
|
||||
vector<int64_t> Y_shape = {1, 15, 1, 1};
|
||||
auto expected_vals = {
|
||||
MLFloat16(115.0f), // 0.0*0.0 + 1.0*1.0 + 2.0*2.0 + 3.0*3.0 + 101.0
|
||||
MLFloat16(228.0f),
|
||||
MLFloat16(469.0f),
|
||||
MLFloat16(838.0f),
|
||||
MLFloat16(1335.0f),
|
||||
MLFloat16(1960.0f),
|
||||
MLFloat16(2713.0f), // 24.0*24.0 + 25.0*25.0 + 26.0*26.0 + 27.0*27.0 + 107.0
|
||||
MLFloat16(3594.0f),
|
||||
MLFloat16(4603.0f),
|
||||
MLFloat16(5740.0f),
|
||||
MLFloat16(7005.0f),
|
||||
MLFloat16(8398.0f),
|
||||
MLFloat16(9919.0f), // 48.0*48.0 + 49.0*49.0 + 50.0*50.0 + 51.0*51.0 + 113.0
|
||||
MLFloat16(11568.0f), // 52.0*52.0 + 53.0*53.0 + 54.0*54.0 + 55.0*55.0 + 114.0
|
||||
MLFloat16(13345.0f) // 56.0*56.0 + 57.0*57.0 + 58.0*58.0 + 59.0*59.0 + 115.0
|
||||
};
|
||||
|
||||
TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape);
|
||||
TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true);
|
||||
}
|
||||
|
||||
TEST(ConvFp16Test, ConvDimWithZero) {
|
||||
ConvOpAndTestAttributes attrs = {
|
||||
"", // auto_pad
|
||||
|
|
@ -1074,4 +1309,4 @@ TEST(ConvFp16Test, SharedPrepackedWeights) {
|
|||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
||||
#endif // MLAS_F16VEC_INTRINSICS_SUPPORTED
|
||||
#endif // MLAS_F16VEC_INTRINSICS_SUPPORTED
|
||||
|
|
|
|||
|
|
@ -647,6 +647,241 @@ TEST(ConvTest, Conv2D_group) {
|
|||
TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true);
|
||||
}
|
||||
|
||||
TEST(ConvTest, Depthwise2D_Bias_Group1_Issue18992) {
|
||||
ConvOpAndTestAttributes attrs = {
|
||||
"", // auto_pad
|
||||
vector<int64_t>{1, 1}, // dilations
|
||||
1, // group
|
||||
vector<int64_t>{1, 1}, // kernel_shape
|
||||
vector<int64_t>{0, 0, 0, 0}, // pads
|
||||
vector<int64_t>{1, 1}, // strides
|
||||
{} // excluded EPs
|
||||
};
|
||||
|
||||
vector<float> X = {1.0f};
|
||||
vector<int64_t> X_shape = {1, 1, 1, 1};
|
||||
vector<float> W = {0.5f};
|
||||
vector<int64_t> W_shape = {1, 1, 1, 1};
|
||||
vector<float> B = {0.5f};
|
||||
vector<int64_t> B_shape = {1};
|
||||
vector<int64_t> Y_shape = {1, 1, 1, 1};
|
||||
auto expected_vals = {1.0f};
|
||||
|
||||
TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape);
|
||||
TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true);
|
||||
}
|
||||
|
||||
TEST(ConvTest, Depthwise2D_Bias_Group2) {
|
||||
ConvOpAndTestAttributes attrs = {
|
||||
"", // auto_pad
|
||||
vector<int64_t>{1, 1}, // dilations
|
||||
2, // group
|
||||
vector<int64_t>{1, 1}, // kernel_shape
|
||||
vector<int64_t>{0, 0, 0, 0}, // pads
|
||||
vector<int64_t>{1, 1}, // strides
|
||||
{} // excluded EPs
|
||||
};
|
||||
|
||||
vector<float> X = {
|
||||
0.0f, 1.0f, 2.0f,
|
||||
3.0f, 4.0f, 5.0f,
|
||||
6.0f, 7.0f, 8.0f,
|
||||
|
||||
9.0f, 10.0f, 11.0f,
|
||||
12.0f, 13.0f, 14.0f,
|
||||
15.0f, 16.0f, 17.0f};
|
||||
vector<int64_t> X_shape = {1, 2, 3, 3};
|
||||
vector<float> W = {1.0f, 2.0f};
|
||||
vector<int64_t> W_shape = {2, 1, 1, 1};
|
||||
vector<float> B = {1.0f, -1.0f};
|
||||
vector<int64_t> B_shape = {2};
|
||||
vector<int64_t> Y_shape = {1, 2, 3, 3};
|
||||
auto expected_vals = {
|
||||
1.0f, 2.0f, 3.0f,
|
||||
4.0f, 5.0f, 6.0f,
|
||||
7.0f, 8.0f, 9.0f,
|
||||
|
||||
17.0f, 19.0f, 21.0f,
|
||||
23.0f, 25.0f, 27.0f,
|
||||
29.0f, 31.0f, 33.0f};
|
||||
|
||||
TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape);
|
||||
TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true);
|
||||
}
|
||||
|
||||
TEST(ConvTest, Depthwise2D_Bias_Group15) {
|
||||
ConvOpAndTestAttributes attrs = {
|
||||
"", // auto_pad
|
||||
vector<int64_t>{1, 1}, // dilations
|
||||
15, // group
|
||||
vector<int64_t>{2, 2}, // kernel_shape
|
||||
vector<int64_t>{0, 0, 0, 0}, // pads
|
||||
vector<int64_t>{1, 1}, // strides
|
||||
{} // excluded EPs
|
||||
};
|
||||
|
||||
vector<float> X = {
|
||||
// C = 0
|
||||
0.0f, 1.0f,
|
||||
2.0f, 3.0f,
|
||||
|
||||
// C = 1
|
||||
4.0f, 5.0f,
|
||||
6.0f, 7.0f,
|
||||
|
||||
// C = 2
|
||||
8.0f, 9.0f,
|
||||
10.0f, 11.0f,
|
||||
|
||||
// C = 3
|
||||
12.0f, 13.0f,
|
||||
14.0f, 15.0f,
|
||||
|
||||
// C = 4
|
||||
16.0f, 17.0f,
|
||||
18.0f, 19.0f,
|
||||
|
||||
// C = 5
|
||||
20.0f, 21.0f,
|
||||
22.0f, 23.0f,
|
||||
|
||||
// C = 6
|
||||
24.0f, 25.0f,
|
||||
26.0f, 27.0f,
|
||||
|
||||
// C = 7
|
||||
28.0f, 29.0f,
|
||||
30.0f, 31.0f,
|
||||
|
||||
// C = 8
|
||||
32.0f, 33.0f,
|
||||
34.0f, 35.0f,
|
||||
|
||||
// C = 9
|
||||
36.0f, 37.0f,
|
||||
38.0f, 39.0f,
|
||||
|
||||
// C = 10
|
||||
40.0f, 41.0f,
|
||||
42.0f, 43.0f,
|
||||
|
||||
// C = 11
|
||||
44.0f, 45.0f,
|
||||
46.0f, 47.0f,
|
||||
|
||||
// C = 12
|
||||
48.0f, 49.0f,
|
||||
50.0f, 51.0f,
|
||||
|
||||
// C = 13
|
||||
52.0f, 53.0f,
|
||||
54.0f, 55.0f,
|
||||
|
||||
// C = 14
|
||||
56.0f, 57.0f,
|
||||
58.0f, 59.0f};
|
||||
vector<int64_t> X_shape = {1, 15, 2, 2};
|
||||
vector<float> W = {
|
||||
// M = 0
|
||||
0.0f, 1.0f,
|
||||
2.0f, 3.0f,
|
||||
|
||||
// M = 1
|
||||
4.0f, 5.0f,
|
||||
6.0f, 7.0f,
|
||||
|
||||
// M = 2
|
||||
8.0f, 9.0f,
|
||||
10.0f, 11.0f,
|
||||
|
||||
// M = 3
|
||||
12.0f, 13.0f,
|
||||
14.0f, 15.0f,
|
||||
|
||||
// M = 4
|
||||
16.0f, 17.0f,
|
||||
18.0f, 19.0f,
|
||||
|
||||
// M = 5
|
||||
20.0f, 21.0f,
|
||||
22.0f, 23.0f,
|
||||
|
||||
// M = 6
|
||||
24.0f, 25.0f,
|
||||
26.0f, 27.0f,
|
||||
|
||||
// M = 7
|
||||
28.0f, 29.0f,
|
||||
30.0f, 31.0f,
|
||||
|
||||
// M = 8
|
||||
32.0f, 33.0f,
|
||||
34.0f, 35.0f,
|
||||
|
||||
// M = 9
|
||||
36.0f, 37.0f,
|
||||
38.0f, 39.0f,
|
||||
|
||||
// M = 10
|
||||
40.0f, 41.0f,
|
||||
42.0f, 43.0f,
|
||||
|
||||
// M = 11
|
||||
44.0f, 45.0f,
|
||||
46.0f, 47.0f,
|
||||
|
||||
// M = 12
|
||||
48.0f, 49.0f,
|
||||
50.0f, 51.0f,
|
||||
|
||||
// M = 13
|
||||
52.0f, 53.0f,
|
||||
54.0f, 55.0f,
|
||||
|
||||
// M = 14
|
||||
56.0f, 57.0f,
|
||||
58.0f, 59.0f};
|
||||
vector<int64_t> W_shape = {15, 1, 2, 2};
|
||||
vector<float> B = {
|
||||
101.0f,
|
||||
102.0f,
|
||||
103.0f,
|
||||
104.0f,
|
||||
105.0f,
|
||||
106.0f,
|
||||
107.0f,
|
||||
108.0f,
|
||||
109.0f,
|
||||
110.0f,
|
||||
111.0f,
|
||||
112.0f,
|
||||
113.0f,
|
||||
114.0f,
|
||||
115.0f};
|
||||
vector<int64_t> B_shape = {15};
|
||||
vector<int64_t> Y_shape = {1, 15, 1, 1};
|
||||
auto expected_vals = {
|
||||
115.0f, // 0.0*0.0 + 1.0*1.0 + 2.0*2.0 + 3.0*3.0 + 101.0
|
||||
228.0f,
|
||||
469.0f,
|
||||
838.0f,
|
||||
1335.0f,
|
||||
1960.0f,
|
||||
2713.0f, // 24.0*24.0 + 25.0*25.0 + 26.0*26.0 + 27.0*27.0 + 107.0
|
||||
3594.0f,
|
||||
4603.0f,
|
||||
5740.0f,
|
||||
7005.0f,
|
||||
8398.0f,
|
||||
9919.0f, // 48.0*48.0 + 49.0*49.0 + 50.0*50.0 + 51.0*51.0 + 113.0
|
||||
11568.0f, // 52.0*52.0 + 53.0*53.0 + 54.0*54.0 + 55.0*55.0 + 114.0
|
||||
13345.0f // 56.0*56.0 + 57.0*57.0 + 58.0*58.0 + 59.0*59.0 + 115.0
|
||||
};
|
||||
|
||||
TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape);
|
||||
TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true);
|
||||
}
|
||||
|
||||
TEST(ConvTest, ConvDimWithZero) {
|
||||
ConvOpAndTestAttributes attrs = {
|
||||
"", // auto_pad
|
||||
|
|
|
|||
Loading…
Reference in a new issue