mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-17 21:10:43 +00:00
Remove FastGelu from activations.
This commit is contained in:
parent
507d2bb9b9
commit
bb2f427990
2 changed files with 0 additions and 77 deletions
|
|
@ -34,19 +34,5 @@ ONNX_OPERATOR_KERNEL_EX(
|
|||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
Gelu<float>);
|
||||
|
||||
ONNX_OPERATOR_KERNEL_EX(
|
||||
FastGelu,
|
||||
kMSDomain,
|
||||
1,
|
||||
kCpuExecutionProvider,
|
||||
KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
|
||||
FastGelu<float>);
|
||||
|
||||
template<typename T>
|
||||
constexpr float FastGelu<T>::kAlpha;
|
||||
|
||||
template<typename T>
|
||||
constexpr float FastGelu<T>::kGamma;
|
||||
|
||||
} // namespace contrib
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -73,68 +73,5 @@ class Gelu : public OpKernel {
|
|||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class FastGelu : public OpKernel {
|
||||
public:
|
||||
FastGelu(const OpKernelInfo& info) : OpKernel(info) {}
|
||||
|
||||
Status Compute(OpKernelContext* context) const override {
|
||||
const auto* X = context->Input<Tensor>(0);
|
||||
Tensor* Y = context->Output(0, X->Shape());
|
||||
concurrency::ThreadPool* tp = context->GetOperatorThreadPool();
|
||||
if (nullptr != tp) {
|
||||
const T* input = X->template Data<T>();
|
||||
T* output = Y->template MutableData<T>();
|
||||
int task_count = tp->NumThreads() + 1;
|
||||
int64_t elem_count = X->Shape().Size();
|
||||
const auto coefficient = kAlpha * kGamma;
|
||||
if (elem_count > task_count) {
|
||||
tp->SimpleParallelFor(static_cast<std::ptrdiff_t>(task_count), [ input,
|
||||
output,
|
||||
elem_count,
|
||||
task_count,
|
||||
kAlpha = this->kAlpha,
|
||||
coefficient ](std::ptrdiff_t i) {
|
||||
int64_t elem_inx_start = i * elem_count / task_count;
|
||||
int64_t elem_inx_end = (i + 1) * elem_count / task_count;
|
||||
for (int64_t elem_inx = elem_inx_start; elem_inx < elem_inx_end; elem_inx++) {
|
||||
const auto x = input[elem_inx];
|
||||
output[elem_inx] = x * (coefficient * x * x + kAlpha);
|
||||
output[elem_inx] = 0.5f * x * (1.0f + tanh(output[elem_inx]));
|
||||
}
|
||||
});
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Commented out EIGEN implentation due to EIGEN bug.
|
||||
// On Windows build with GPU enabled, kGamma * x_pow_3 + EIGEN_X below would produce incorrect
|
||||
// result, same issue discovered in fast_gelu_grad op.
|
||||
// Given that CPU kernel is mostly for conformance check, where performance is not of high
|
||||
// priority, to workaround this bug, use a for loop and avoid using EIGEN library.
|
||||
//
|
||||
// const auto x_pow_3 = EIGEN_X.cube();
|
||||
// const auto tanh_result = (kAlpha * (kGamma * x_pow_3 + EIGEN_X)).tanh();
|
||||
|
||||
// EIGEN_Y = 0.5f * EIGEN_X * (1.f + tanh_result);
|
||||
|
||||
const T* input = X->template Data<T>();
|
||||
T* output = Y->template MutableData<T>();
|
||||
int64_t elem_count = X->Shape().Size();
|
||||
for (auto i = 0; i < elem_count; ++i) {
|
||||
const auto x_val = input[i];
|
||||
const auto x_cube = x_val * x_val * x_val;
|
||||
T tanh_result = std::tanh(kAlpha * x_val + kAlpha * kGamma * x_cube);
|
||||
output[i] = 0.5f * x_val * (tanh_result + 1.0f);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr float kAlpha = static_cast<float>(M_2_SQRTPI * M_SQRT1_2);
|
||||
static constexpr float kGamma = 0.044715f;
|
||||
};
|
||||
|
||||
} // namespace contrib
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
Loading…
Reference in a new issue