mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-24 22:17:32 +00:00
finished ut
This commit is contained in:
parent
c93c17a6d9
commit
d9919fb35d
7 changed files with 326 additions and 67 deletions
|
|
@ -995,7 +995,7 @@ Return Value:
|
|||
if (LogSoftmax) {
|
||||
dispatch->LogSoftmax_Fp16(Input, Output, D, NegativeMaximum, MLAS_FP16(std::log(accumulation_fp32)));
|
||||
} else {
|
||||
dispatch->Softmax_Fp16(Output, Output, D, MLAS_FP16(1.0f / accumulation_fp32));
|
||||
dispatch->Softmax_Fp16(Output, Output, D, MLAS_FP16(accumulation_fp32));
|
||||
}
|
||||
|
||||
Input += D;
|
||||
|
|
|
|||
|
|
@ -94,17 +94,17 @@ struct MLAS_SOFTMAX_DISPATCH {
|
|||
SumExp_Fp16_Fn* SumExp_Fp16 = nullptr;
|
||||
|
||||
/**
|
||||
* @brief Compute the softmax output for each element of the input array. input * scale.
|
||||
* @brief Compute the softmax output for each element of the input array. input / sum.
|
||||
* @param Input Address of the input array. Values of exp(x)
|
||||
* @param Output Address of the output array. Could be the same as the input array.
|
||||
* @param N Number of elements in the input array
|
||||
* @param scale The scale factor to apply to the output
|
||||
* @param Sum Sum of exp(input)
|
||||
*/
|
||||
typedef void(Softmax_Fp16_Fn)(
|
||||
const MLAS_FP16* Input,
|
||||
MLAS_FP16* Output,
|
||||
size_t N,
|
||||
const MLAS_FP16 scale
|
||||
const MLAS_FP16 Sum
|
||||
);
|
||||
|
||||
Softmax_Fp16_Fn* Softmax_Fp16 = nullptr;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ MLAS_FP16 ReduceMax_Kernel_Fp16(const MLAS_FP16* Input, size_t N);
|
|||
|
||||
MLAS_FP16 SumExp_Kernel_Fp16(const MLAS_FP16* Input, MLAS_FP16* Output, size_t N, const MLAS_FP16 NegativeMaximum);
|
||||
|
||||
void Softmax_Kernel_Fp16(const MLAS_FP16* Input, MLAS_FP16* Output, size_t N, const MLAS_FP16 scale);
|
||||
void Softmax_Kernel_Fp16(const MLAS_FP16* Input, MLAS_FP16* Output, size_t N, const MLAS_FP16 Sum);
|
||||
|
||||
void LogSoftmax_Kernel_Fp16(const MLAS_FP16* Input, MLAS_FP16* Output, size_t N, const MLAS_FP16 NegativeMaximum, const MLAS_FP16 LogSum);
|
||||
|
||||
|
|
|
|||
|
|
@ -747,11 +747,13 @@ MLAS_FP16 ReduceMax_Kernel_Fp16(const MLAS_FP16* Input, size_t N) {
|
|||
return MLAS_FP16::FromBits(result);
|
||||
}
|
||||
|
||||
void Softmax_Kernel_Fp16(const MLAS_FP16* Input, MLAS_FP16* Output, size_t N, const MLAS_FP16 scale) {
|
||||
void Softmax_Kernel_Fp16(const MLAS_FP16* Input, MLAS_FP16* Output, size_t N, const MLAS_FP16 Sum) {
|
||||
const auto* input = reinterpret_cast<const _mlas_fp16_*>(Input);
|
||||
auto* output = reinterpret_cast<_mlas_fp16_*>(Output);
|
||||
auto scale8 = MlasBroadcastFloat16x8(scale.val);
|
||||
auto scale4 = MlasBroadcastFloat16x4(scale.val);
|
||||
auto sum8 = MlasBroadcastFloat16x8(Sum.val);
|
||||
auto sum4 = MlasBroadcastFloat16x4(Sum.val);
|
||||
auto scale8 = MlasDivide(MlasBroadcastFloat16x8((_mlas_fp16_)0x3c00), sum8);
|
||||
auto scale4 = MlasDivide(MlasBroadcastFloat16x4((_mlas_fp16_)0x3c00), sum4);
|
||||
|
||||
while (N >= 32) {
|
||||
auto v0 = MlasLoadFloat16x8(input);
|
||||
|
|
|
|||
|
|
@ -1,59 +0,0 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "test_util.h"
|
||||
|
||||
class MlasComputeExpTest : public MlasTestBase {
|
||||
private:
|
||||
MatrixGuardBuffer<float> BufferInput;
|
||||
MatrixGuardBuffer<float> BufferOutput;
|
||||
MatrixGuardBuffer<float> BufferOutputReference;
|
||||
|
||||
void Test(size_t N, float MinimumValue, float MaximumValue) {
|
||||
float* Input = BufferInput.GetBuffer(N);
|
||||
float* Output = BufferOutput.GetBuffer(N);
|
||||
float* OutputReference = BufferOutputReference.GetBuffer(N);
|
||||
|
||||
std::default_random_engine generator(static_cast<unsigned>(N));
|
||||
std::uniform_real_distribution<float> distribution(MinimumValue, MaximumValue);
|
||||
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
Input[n] = distribution(generator);
|
||||
}
|
||||
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
OutputReference[n] = std::exp(Input[n]);
|
||||
}
|
||||
|
||||
MlasComputeExp(Input, Output, N);
|
||||
|
||||
constexpr float AbsoluteTolerance = 1e-6f;
|
||||
constexpr float RelativeTolerance = 1e-6f;
|
||||
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
float diff = std::fabs(Output[n] - OutputReference[n]);
|
||||
ASSERT_TRUE(diff <= AbsoluteTolerance || diff <= std::fabs(OutputReference[n]) * RelativeTolerance)
|
||||
<< " @" << n << " of " << N << ", got: " << Output[n] << ", expecting: " << OutputReference[n];
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static const char* GetTestSuiteName() {
|
||||
static const std::string suite_name("Exp");
|
||||
return suite_name.c_str();
|
||||
}
|
||||
|
||||
void ExecuteShort(void) override {
|
||||
for (size_t n = 1; n < 128; n++) {
|
||||
Test(n, -10.f, 10.f);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) {
|
||||
// no long execute needed
|
||||
if (is_short_execute) {
|
||||
return MlasDirectShortExecuteTests<MlasComputeExpTest>::RegisterShortExecute();
|
||||
}
|
||||
return 0ul;
|
||||
});
|
||||
113
onnxruntime/test/mlas/unittest/test_softcap.cpp
Normal file
113
onnxruntime/test/mlas/unittest/test_softcap.cpp
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "test_util.h"
|
||||
#include "core/mlas/lib/mlasi.h"
|
||||
#include "core/mlas/lib/softmax.h"
|
||||
|
||||
class MlasComputeTanhTest : public MlasTestBase {
|
||||
private:
|
||||
MatrixGuardBuffer<MLAS_FP16> BufferInputFp16;
|
||||
MatrixGuardBuffer<MLAS_FP16> BufferOutputFp16;
|
||||
|
||||
#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
void TestFp16(size_t N, float MinimumValue, float MaximumValue) {
|
||||
MLAS_FP16* Input = BufferInputFp16.GetBuffer(N);
|
||||
MLAS_FP16* Output = BufferOutputFp16.GetBuffer(N);
|
||||
|
||||
std::default_random_engine generator(static_cast<unsigned>(N));
|
||||
std::uniform_real_distribution<float> distribution(MinimumValue, MaximumValue);
|
||||
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
Input[n] = MLAS_FP16(distribution(generator));
|
||||
}
|
||||
|
||||
MlasComputeTanh(Input, Output, N);
|
||||
|
||||
constexpr float AbsoluteTolerance = 5e-3f;
|
||||
constexpr float RelativeTolerance = 5e-3f;
|
||||
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
float in = Input[n].ToFloat();
|
||||
float ref = std::tanh(in);
|
||||
float out = Output[n].ToFloat();
|
||||
float diff = std::fabs(out - ref);
|
||||
ASSERT_TRUE(diff <= AbsoluteTolerance || diff <= std::fabs(ref) * RelativeTolerance)
|
||||
<< " @ " << in << ", got: " << out << ", expecting: " << ref
|
||||
<< ", diff: " << diff << ", r-diff: " << diff / std::fabs(ref);
|
||||
}
|
||||
}
|
||||
#endif // defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
|
||||
public:
|
||||
static const char* GetTestSuiteName() {
|
||||
static const std::string suite_name("Tanh");
|
||||
return suite_name.c_str();
|
||||
}
|
||||
|
||||
void ExecuteShort(void) override {
|
||||
for (size_t n = 1; n < 128; n++) {
|
||||
#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
TestFp16(n, -3.51562f, 3.51562f);
|
||||
#endif // defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class MlasComputeSoftcapTest : public MlasTestBase {
|
||||
private:
|
||||
MatrixGuardBuffer<MLAS_FP16> BufferInputFp16;
|
||||
MatrixGuardBuffer<MLAS_FP16> BufferOutputFp16;
|
||||
|
||||
#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
void TestFp16(size_t N, float MinimumValue, float MaximumValue, float cap) {
|
||||
MLAS_FP16* Input = BufferInputFp16.GetBuffer(N);
|
||||
MLAS_FP16* Output = BufferOutputFp16.GetBuffer(N);
|
||||
|
||||
std::default_random_engine generator(static_cast<unsigned>(N));
|
||||
std::uniform_real_distribution<float> distribution(MinimumValue, MaximumValue);
|
||||
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
Input[n] = MLAS_FP16(distribution(generator));
|
||||
}
|
||||
|
||||
MlasComputeSoftcap(Input, Output, N, MLAS_FP16(cap));
|
||||
|
||||
constexpr float AbsoluteTolerance = 5e-3f;
|
||||
constexpr float RelativeTolerance = 5e-3f;
|
||||
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
float in = Input[n].ToFloat();
|
||||
float ref = std::tanh(in/cap) * cap;
|
||||
float out = Output[n].ToFloat();
|
||||
float diff = std::fabs(out - ref);
|
||||
ASSERT_TRUE(diff <= AbsoluteTolerance || diff <= std::fabs(ref) * RelativeTolerance)
|
||||
<< " @ " << in << ", got: " << out << ", expecting: " << ref << ", r-diff " << diff / std::fabs(ref);
|
||||
}
|
||||
}
|
||||
#endif // defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
|
||||
public:
|
||||
static const char* GetTestSuiteName() {
|
||||
static const std::string suite_name("Softcap");
|
||||
return suite_name.c_str();
|
||||
}
|
||||
|
||||
void ExecuteShort(void) override {
|
||||
for (size_t n = 1; n < 128; n++) {
|
||||
#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
TestFp16(n, -10.f, 10.f, 3.2f);
|
||||
#endif // defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) {
|
||||
size_t count = 0;
|
||||
if (is_short_execute) {
|
||||
count += MlasDirectShortExecuteTests<MlasComputeTanhTest>::RegisterShortExecute();
|
||||
count += MlasDirectShortExecuteTests<MlasComputeSoftcapTest>::RegisterShortExecute();
|
||||
}
|
||||
return count;
|
||||
});
|
||||
|
|
@ -2,6 +2,126 @@
|
|||
// Licensed under the MIT License.
|
||||
|
||||
#include "test_util.h"
|
||||
#include "core/mlas/lib/mlasi.h"
|
||||
#include "core/mlas/lib/softmax.h"
|
||||
|
||||
class MlasComputeExpTest : public MlasTestBase {
|
||||
private:
|
||||
MatrixGuardBuffer<float> BufferInput;
|
||||
MatrixGuardBuffer<float> BufferOutput;
|
||||
MatrixGuardBuffer<float> BufferOutputReference;
|
||||
MatrixGuardBuffer<MLAS_FP16> BufferInputFp16;
|
||||
MatrixGuardBuffer<MLAS_FP16> BufferOutputFp16;
|
||||
|
||||
void Test(size_t N, float MinimumValue, float MaximumValue) {
|
||||
float* Input = BufferInput.GetBuffer(N);
|
||||
float* Output = BufferOutput.GetBuffer(N);
|
||||
float* OutputReference = BufferOutputReference.GetBuffer(N);
|
||||
|
||||
std::default_random_engine generator(static_cast<unsigned>(N));
|
||||
std::uniform_real_distribution<float> distribution(MinimumValue, MaximumValue);
|
||||
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
Input[n] = distribution(generator);
|
||||
}
|
||||
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
OutputReference[n] = std::exp(Input[n]);
|
||||
}
|
||||
|
||||
MlasComputeExp(Input, Output, N);
|
||||
|
||||
constexpr float AbsoluteTolerance = 1e-6f;
|
||||
constexpr float RelativeTolerance = 1e-6f;
|
||||
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
float diff = std::fabs(Output[n] - OutputReference[n]);
|
||||
ASSERT_TRUE(diff <= AbsoluteTolerance || diff <= std::fabs(OutputReference[n]) * RelativeTolerance)
|
||||
<< " @" << n << " of " << N << ", got: " << Output[n] << ", expecting: " << OutputReference[n];
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
|
||||
void TestFp16(size_t N, float MinimumValue, float MaximumValue) {
|
||||
MLAS_FP16* Input = BufferInputFp16.GetBuffer(N);
|
||||
MLAS_FP16* Output = BufferOutputFp16.GetBuffer(N);
|
||||
|
||||
std::default_random_engine generator(N);
|
||||
std::uniform_real_distribution<float> distribution(MinimumValue, MaximumValue);
|
||||
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
Input[n] = MLAS_FP16(distribution(generator));
|
||||
}
|
||||
|
||||
MlasComputeExp(Input, Output, N);
|
||||
|
||||
constexpr float AbsoluteTolerance = 5e-4f;
|
||||
constexpr float RelativeTolerance = 1e-3f;
|
||||
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
float in = Input[n].ToFloat();
|
||||
float ref = std::exp(in);
|
||||
float out = Output[n].ToFloat();
|
||||
float diff = std::fabs(out - ref);
|
||||
ASSERT_TRUE(diff <= AbsoluteTolerance || diff <= std::fabs(ref) * RelativeTolerance)
|
||||
<< " @ " << in << ", got: " << out << ", expecting: " << ref << ", r-diff: " << diff / std::fabs(ref);
|
||||
}
|
||||
}
|
||||
|
||||
void TestSumFp16(size_t N, float MinimumValue, float MaximumValue) {
|
||||
MLAS_FP16* Input = BufferInputFp16.GetBuffer(N);
|
||||
MLAS_FP16* Output = BufferOutputFp16.GetBuffer(N);
|
||||
|
||||
std::default_random_engine generator(N);
|
||||
std::uniform_real_distribution<float> distribution(MinimumValue, MaximumValue);
|
||||
|
||||
float max_val = std::numeric_limits<float>::min();
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
Input[n] = MLAS_FP16(distribution(generator));
|
||||
max_val = std::max(max_val, Input[n].ToFloat());
|
||||
}
|
||||
|
||||
const auto* dispatch = GetMlasPlatform().SoftmaxDispatch;
|
||||
auto sum = dispatch->SumExp_Fp16(Input, Output, N, MLAS_FP16(-max_val));
|
||||
|
||||
constexpr float AbsoluteTolerance = 5e-4f;
|
||||
constexpr float RelativeTolerance = 1e-3f;
|
||||
|
||||
float sum_ref = 0.0f;
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
float in = Input[n].ToFloat();
|
||||
float ref = std::exp(in - max_val);
|
||||
sum_ref += ref;
|
||||
float out = Output[n].ToFloat();
|
||||
float diff = std::fabs(out - ref);
|
||||
ASSERT_TRUE(diff <= AbsoluteTolerance || diff <= std::fabs(ref) * RelativeTolerance)
|
||||
<< " @ " << in << ", got: " << out << ", expecting: " << ref << ", r-diff: " << diff / std::fabs(ref);
|
||||
}
|
||||
|
||||
float diff = std::fabs(sum.ToFloat() - sum_ref);
|
||||
ASSERT_TRUE(diff <= 1e-3f || diff <= std::fabs(sum_ref) * 5e-3f)
|
||||
<< " sum: " << sum.ToFloat() << ", expecting: " << sum_ref << ", r-diff: " << diff / std::fabs(sum_ref);
|
||||
}
|
||||
|
||||
#endif // defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
|
||||
public:
|
||||
static const char* GetTestSuiteName() {
|
||||
static const std::string suite_name("Exp");
|
||||
return suite_name.c_str();
|
||||
}
|
||||
|
||||
void ExecuteShort(void) override {
|
||||
for (size_t n = 1; n < 128; n++) {
|
||||
Test(n, -10.f, 10.f);
|
||||
#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
TestFp16(n, -17.f, 11.f);
|
||||
TestSumFp16(n, -10.f, 10.f);
|
||||
#endif // defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <bool Threaded>
|
||||
class MlasSoftmaxTest : public MlasTestBase {
|
||||
|
|
@ -9,6 +129,8 @@ class MlasSoftmaxTest : public MlasTestBase {
|
|||
MatrixGuardBuffer<float> BufferInput;
|
||||
MatrixGuardBuffer<float> BufferOutput;
|
||||
MatrixGuardBuffer<float> BufferOutputReference;
|
||||
MatrixGuardBuffer<MLAS_FP16> BufferInputFp16;
|
||||
MatrixGuardBuffer<MLAS_FP16> BufferOutputFp16;
|
||||
MLAS_THREADPOOL* threadpool_;
|
||||
|
||||
void Test(size_t N, size_t D, float MinimumValue, float MaximumValue) {
|
||||
|
|
@ -44,6 +166,65 @@ class MlasSoftmaxTest : public MlasTestBase {
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
void TestReduceMaxFp16(size_t N, float MinimumValue, float MaximumValue) {
|
||||
MLAS_FP16* Input = BufferInputFp16.GetBuffer(N);
|
||||
|
||||
std::default_random_engine generator(static_cast<unsigned>(N));
|
||||
std::uniform_real_distribution<float> distribution(MinimumValue, MaximumValue);
|
||||
|
||||
float ref = std::numeric_limits<float>::lowest();
|
||||
|
||||
for (size_t nd = 0; nd < N; nd++) {
|
||||
Input[nd] = MLAS_FP16(distribution(generator));
|
||||
ref = std::max(ref, Input[nd].ToFloat());
|
||||
}
|
||||
|
||||
const auto* dispatch = GetMlasPlatform().SoftmaxDispatch;
|
||||
auto out = dispatch->ReduceMax_Fp16(Input, N).ToFloat();
|
||||
|
||||
constexpr float AbsoluteTolerance = 1e-3f;
|
||||
constexpr float RelativeTolerance = 1e-3f;
|
||||
|
||||
float diff = std::fabs(out - ref);
|
||||
ASSERT_TRUE(diff <= AbsoluteTolerance || diff <= std::fabs(ref) * RelativeTolerance)
|
||||
<< "ReduceMaxFp16: " << N << ", got: " << out << ", expecting: " << ref
|
||||
<< ", diff: " << diff << ", r-diff: " << diff / std::fabs(ref);
|
||||
}
|
||||
|
||||
void TestFp16(size_t N, size_t D, float MinimumValue, float MaximumValue, bool LogSoftmax, bool SmoothSoftmax) {
|
||||
MLAS_FP16* Input = BufferInputFp16.GetBuffer(N * D);
|
||||
MLAS_FP16* Output = BufferOutputFp16.GetBuffer(N * D);
|
||||
float* InputReference = BufferInput.GetBuffer(N * D);
|
||||
float* OutputReference = BufferOutputReference.GetBuffer(N * D);
|
||||
|
||||
std::default_random_engine generator(static_cast<unsigned>(N * D));
|
||||
std::uniform_real_distribution<float> distribution(MinimumValue, MaximumValue);
|
||||
|
||||
for (size_t nd = 0; nd < N * D; nd++) {
|
||||
Input[nd] = MLAS_FP16(distribution(generator));
|
||||
InputReference[nd] = Input[nd].ToFloat();
|
||||
}
|
||||
|
||||
MlasComputeSoftmax(Input, Output, N, D, LogSoftmax, SmoothSoftmax, threadpool_);
|
||||
ReferenceSoftmax(InputReference, OutputReference, N, D, LogSoftmax, SmoothSoftmax);
|
||||
|
||||
constexpr float AbsoluteTolerance = 5e-3f;
|
||||
constexpr float RelativeTolerance = 5e-3f;
|
||||
|
||||
for (size_t nd = 0; nd < N * D; nd++) {
|
||||
float in = Input[nd].ToFloat();
|
||||
float ref = OutputReference[nd];
|
||||
float out = Output[nd].ToFloat();
|
||||
float diff = std::fabs(out - ref);
|
||||
ASSERT_TRUE(diff <= AbsoluteTolerance || diff <= std::fabs(ref) * RelativeTolerance)
|
||||
<< "LogSoftmax:" << LogSoftmax << ", SmoothSoftmax: " << SmoothSoftmax << ", input " << in
|
||||
<< ", got: " << out << ", expecting: " << ref << ", diff: " << diff << ", r-diff: " << diff / std::fabs(ref);
|
||||
}
|
||||
|
||||
}
|
||||
#endif // defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
|
||||
void ReferenceSoftmax(const float* Input, float* Output, size_t N, size_t D, bool LogSoftmax, bool SmoothSoftmax) {
|
||||
for (size_t n = 0; n < N; n++) {
|
||||
float MaximumValue = std::numeric_limits<float>::lowest();
|
||||
|
|
@ -99,11 +280,32 @@ class MlasSoftmaxTest : public MlasTestBase {
|
|||
void ExecuteShort(void) override {
|
||||
for (size_t d = 1; d < 128; d++) {
|
||||
Test(1, d, -10.f, 10.f);
|
||||
#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
TestReduceMaxFp16(d, -10.f, 10.f);
|
||||
TestFp16(1, d, -10.f, 10.f, false, true);
|
||||
TestFp16(1, d, -10.f, 10.f, true, true);
|
||||
TestFp16(1, d, -10.f, 10.f, false, false);
|
||||
TestFp16(1, d, -10.f, 10.f, true, false);
|
||||
#endif // defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
}
|
||||
|
||||
Test(3, 128, 20.f, 30.f);
|
||||
Test(63, 95, -150.f, 190.f);
|
||||
Test(16, 211, 20.f, 30.f);
|
||||
#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
TestFp16(3, 128, 3.f, 7.f, false, true);
|
||||
TestFp16(3, 128, 3.f, 7.f, true, true);
|
||||
TestFp16(3, 128, 3.f, 7.f, false, false);
|
||||
TestFp16(3, 128, 3.f, 7.f, true, false);
|
||||
TestFp16(63, 95, -15.f, 19.f, false, true);
|
||||
TestFp16(63, 95, -15.f, 19.f, true, true);
|
||||
TestFp16(63, 95, -15.f, 19.f, false, false);
|
||||
TestFp16(63, 95, -15.f, 19.f, true, false);
|
||||
TestFp16(16, 211, -7.f, -3.f, false, true);
|
||||
TestFp16(16, 211, -7.f, -3.f, true, true);
|
||||
TestFp16(16, 211, -7.f, -3.f, false, false);
|
||||
TestFp16(16, 211, -7.f, -3.f, true, false);
|
||||
#endif // defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && defined(MLAS_TARGET_ARM64)
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -111,6 +313,7 @@ static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_exe
|
|||
size_t count = 0;
|
||||
if (is_short_execute) {
|
||||
count += MlasDirectShortExecuteTests<MlasSoftmaxTest<false>>::RegisterShortExecute();
|
||||
count += MlasDirectShortExecuteTests<MlasComputeExpTest>::RegisterShortExecute();
|
||||
if (GetMlasThreadPool() != nullptr) {
|
||||
count += MlasDirectShortExecuteTests<MlasSoftmaxTest<true>>::RegisterShortExecute();
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue