mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-01 23:30:35 +00:00
Use GEMM for SVMRegressor. (#3305)
This commit is contained in:
parent
19edad132c
commit
4db01309cb
4 changed files with 179 additions and 51 deletions
|
|
@ -7,6 +7,7 @@
|
|||
#include "core/framework/op_kernel.h"
|
||||
#include "core/util/math_cpuonly.h"
|
||||
#include "ml_common.h"
|
||||
#include "core/providers/cpu/math/gemm.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace ml {
|
||||
|
|
@ -15,7 +16,8 @@ namespace ml {
|
|||
template <typename T>
|
||||
class SVMCommon {
|
||||
protected:
|
||||
SVMCommon(const OpKernelInfo& info) : kernel_type_(MakeKernel(info.GetAttrOrDefault<std::string>("kernel_type", "LINEAR"))) {
|
||||
SVMCommon(const OpKernelInfo& info)
|
||||
: kernel_type_(MakeKernel(info.GetAttrOrDefault<std::string>("kernel_type", "LINEAR"))) {
|
||||
std::vector<float> kernel_params;
|
||||
ORT_ENFORCE(info.GetAttrs<float>("kernel_params", kernel_params).IsOK());
|
||||
|
||||
|
|
@ -29,34 +31,98 @@ class SVMCommon {
|
|||
void set_kernel_type(KERNEL new_kernel_type) { kernel_type_ = new_kernel_type; }
|
||||
KERNEL get_kernel_type() const { return kernel_type_; }
|
||||
|
||||
void batched_kernel_dot(const gsl::span<const T> a, const gsl::span<const T> b,
|
||||
int64_t m, int64_t n, int64_t k,
|
||||
float scalar_C,
|
||||
const gsl::span<T> out,
|
||||
concurrency::ThreadPool* threadpool) const {
|
||||
assert(a.size() == size_t(m * k) && b.size() == size_t(k * n) && out.size() == size_t(m * n));
|
||||
|
||||
if (kernel_type_ == KERNEL::RBF) {
|
||||
T* cur_out = out.data();
|
||||
const T* cur_batch = a.data();
|
||||
|
||||
// each batch has 'k' features
|
||||
for (int64_t batch = 0; batch < m; ++batch) {
|
||||
const T* cur_support_vector = b.data();
|
||||
|
||||
// broadcast the support vectors against the k features in each batch. output is one value per support vector
|
||||
for (int64_t support_vector = 0; support_vector < n; ++support_vector) {
|
||||
T sum = 0.f;
|
||||
const T* cur_input = cur_batch;
|
||||
|
||||
for (int64_t feature = 0; feature < k; ++feature) {
|
||||
T val = *cur_input++ - *cur_support_vector++;
|
||||
sum += val * val;
|
||||
}
|
||||
|
||||
*cur_out++ = std::exp(-gamma_ * sum);
|
||||
}
|
||||
|
||||
cur_batch += k; // move to start of next batch
|
||||
}
|
||||
} else {
|
||||
float alpha = 1.f;
|
||||
float beta = 1.f;
|
||||
static const TensorShape shape_C({1});
|
||||
float c = scalar_C; // scalar_C is used for LINEAR in the GEMM
|
||||
|
||||
if (kernel_type_ != KERNEL::LINEAR) {
|
||||
// kernel_type_ == POLY or SIGMOID
|
||||
alpha = gamma_;
|
||||
c = coef0_;
|
||||
}
|
||||
|
||||
onnxruntime::Gemm<T>::ComputeGemm(CBLAS_TRANSPOSE::CblasNoTrans, CBLAS_TRANSPOSE::CblasTrans,
|
||||
m, n, k,
|
||||
alpha, a.data(), b.data(), beta,
|
||||
c != 0.f ? &c : nullptr, &shape_C,
|
||||
out.data(),
|
||||
threadpool);
|
||||
|
||||
if (kernel_type_ == KERNEL::POLY) {
|
||||
auto map_out = EigenVectorArrayMap<T>(out.data(), out.size());
|
||||
if (degree_ == 2)
|
||||
map_out = map_out.square();
|
||||
else if (degree_ == 3)
|
||||
map_out = map_out.cube();
|
||||
else
|
||||
map_out = map_out.pow(degree_);
|
||||
|
||||
} else if (kernel_type_ == KERNEL::SIGMOID) {
|
||||
MlasComputeTanh(out.data(), out.data(), out.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float kernel_dot(const T* A, int64_t a, const std::vector<float>& B, int64_t b, int64_t len, KERNEL k) const {
|
||||
double sum = 0;
|
||||
const T* pA = A + a;
|
||||
const float* pB = B.data() + b;
|
||||
switch(k) {
|
||||
case KERNEL::POLY:
|
||||
for (int64_t i = len; i > 0; --i)
|
||||
sum += *pA++ * *pB++;
|
||||
sum = gamma_ * sum + coef0_;
|
||||
sum = std::pow(sum, degree_);
|
||||
break;
|
||||
case KERNEL::SIGMOID:
|
||||
for (int64_t i = len; i > 0; --i)
|
||||
sum += *pA++ * *pB++;
|
||||
sum = gamma_ * sum + coef0_;
|
||||
sum = std::tanh(sum);
|
||||
break;
|
||||
case KERNEL::RBF:
|
||||
for (int64_t i = len; i > 0; --i) {
|
||||
double val = *pA++ - *pB++;
|
||||
sum += val * val;
|
||||
}
|
||||
sum = std::exp(-gamma_ * sum);
|
||||
break;
|
||||
case KERNEL::LINEAR:
|
||||
for (int64_t i = len; i > 0; --i)
|
||||
sum += *pA++ * *pB++;
|
||||
break;
|
||||
switch (k) {
|
||||
case KERNEL::POLY:
|
||||
for (int64_t i = len; i > 0; --i)
|
||||
sum += *pA++ * *pB++;
|
||||
sum = gamma_ * sum + coef0_;
|
||||
sum = std::pow(sum, degree_);
|
||||
break;
|
||||
case KERNEL::SIGMOID:
|
||||
for (int64_t i = len; i > 0; --i)
|
||||
sum += *pA++ * *pB++;
|
||||
sum = gamma_ * sum + coef0_;
|
||||
sum = std::tanh(sum);
|
||||
break;
|
||||
case KERNEL::RBF:
|
||||
for (int64_t i = len; i > 0; --i) {
|
||||
double val = *pA++ - *pB++;
|
||||
sum += val * val;
|
||||
}
|
||||
sum = std::exp(-gamma_ * sum);
|
||||
break;
|
||||
case KERNEL::LINEAR:
|
||||
for (int64_t i = len; i > 0; --i)
|
||||
sum += *pA++ * *pB++;
|
||||
break;
|
||||
}
|
||||
return (float)sum;
|
||||
}
|
||||
|
|
@ -70,6 +136,7 @@ class SVMCommon {
|
|||
|
||||
template <typename T>
|
||||
class SVMClassifier final : public OpKernel, private SVMCommon<T> {
|
||||
using SVMCommon<T>::batched_kernel_dot;
|
||||
using SVMCommon<T>::kernel_dot;
|
||||
using SVMCommon<T>::set_kernel_type;
|
||||
using SVMCommon<T>::get_kernel_type;
|
||||
|
|
|
|||
|
|
@ -40,33 +40,61 @@ template <typename T>
|
|||
Status SVMRegressor<T>::Compute(OpKernelContext* ctx) const {
|
||||
const auto* X = ctx->Input<Tensor>(0);
|
||||
|
||||
int64_t stride = X->Shape().NumDimensions() == 1 ? X->Shape()[0] : X->Shape()[1];
|
||||
int64_t N = X->Shape().NumDimensions() == 1 ? 1 : X->Shape()[0];
|
||||
int64_t num_features = X->Shape().NumDimensions() == 1 ? X->Shape()[0] : X->Shape()[1];
|
||||
int64_t num_batches = X->Shape().NumDimensions() == 1 ? 1 : X->Shape()[0];
|
||||
ORT_ENFORCE(num_features == feature_count_);
|
||||
|
||||
Tensor* Y = ctx->Output(0, TensorShape({N, 1})); // this op outputs for one target only
|
||||
const auto* x_data = X->template Data<T>();
|
||||
int64_t current_weight_0;
|
||||
float sum;
|
||||
// X: [num_batches, feature_count_] where features could be coefficients or support vectors
|
||||
// coefficients_: [vector_count_]
|
||||
// support_vectors_ : [vector_count_, feature_count_]
|
||||
|
||||
for (int64_t n = 0; n < N; n++) { //for each example
|
||||
current_weight_0 = n * stride;
|
||||
sum = 0.f;
|
||||
if (mode_ == SVM_TYPE::SVM_SVC) {
|
||||
for (int64_t j = 0; j < vector_count_; j++) {
|
||||
sum += kernel_dot(x_data, current_weight_0, support_vectors_, feature_count_ * j,
|
||||
feature_count_, get_kernel_type()) * coefficients_[j];
|
||||
}
|
||||
sum += rho_[0];
|
||||
} else if (mode_ == SVM_TYPE::SVM_LINEAR) { //liblinear
|
||||
sum = kernel_dot(x_data, current_weight_0, coefficients_, 0,
|
||||
feature_count_, get_kernel_type());
|
||||
sum += rho_[0];
|
||||
// Y: [num_batches, 1]
|
||||
Tensor* Y = ctx->Output(0, TensorShape({num_batches, 1})); // this op outputs for one target only
|
||||
const auto x_data = X->template DataAsSpan<T>();
|
||||
auto out = Y->MutableDataAsSpan<T>();
|
||||
|
||||
concurrency::ThreadPool* threadpool = ctx->GetOperatorThreadPool();
|
||||
|
||||
if (mode_ == SVM_TYPE::SVM_SVC) {
|
||||
AllocatorPtr allocator;
|
||||
auto status = ctx->GetTempSpaceAllocator(&allocator);
|
||||
ORT_RETURN_IF_ERROR(status);
|
||||
|
||||
auto tmp_data = IAllocator::MakeUniquePtr<T>(allocator, num_batches * vector_count_);
|
||||
auto tmp_data_span = gsl::make_span<T>(tmp_data.get(), num_batches * vector_count_);
|
||||
|
||||
// combine the input data with the support vectors and apply the kernel type
|
||||
// output is {num_batches, vector_count_}
|
||||
batched_kernel_dot(x_data, support_vectors_, num_batches, vector_count_, feature_count_, 0.f, tmp_data_span,
|
||||
threadpool);
|
||||
|
||||
static const TensorShape rho_shape({1});
|
||||
|
||||
// combine with coefficients and add rho_[0]
|
||||
onnxruntime::Gemm<T>::ComputeGemm(CBLAS_TRANSPOSE::CblasNoTrans, CBLAS_TRANSPOSE::CblasTrans,
|
||||
num_batches, 1, vector_count_,
|
||||
1.f, tmp_data_span.data(), coefficients_.data(), 1.f,
|
||||
rho_.data(), &rho_shape,
|
||||
out.data(),
|
||||
threadpool);
|
||||
} else if (mode_ == SVM_TYPE::SVM_LINEAR) {
|
||||
// combine the coefficients with the input data and apply the kernel type
|
||||
batched_kernel_dot(x_data, coefficients_, num_batches, 1, feature_count_, rho_[0], out, threadpool);
|
||||
} else {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unexpected mode:", static_cast<int>(mode_));
|
||||
}
|
||||
|
||||
if (one_class_) {
|
||||
float* y = out.data();
|
||||
float* y_end = y + out.size();
|
||||
|
||||
while (y < y_end) {
|
||||
*y = (*y > 0.f ? 1.f : -1.f);
|
||||
++y;
|
||||
}
|
||||
Y->template MutableData<float>()[n] = one_class_ ? (sum > 0 ? 1.f : -1.f) : sum;
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace ml
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ namespace ml {
|
|||
|
||||
template <typename T>
|
||||
class SVMRegressor final : public OpKernel, private SVMCommon<T> {
|
||||
using SVMCommon<T>::batched_kernel_dot;
|
||||
using SVMCommon<T>::kernel_dot;
|
||||
using SVMCommon<T>::set_kernel_type;
|
||||
using SVMCommon<T>::get_kernel_type;
|
||||
|
|
|
|||
|
|
@ -60,14 +60,46 @@ TEST(MLOpTest, SVMRegressorNuSVC) {
|
|||
TEST(MLOpTest, SVMRegressorNuSVCPolyKernel) {
|
||||
OpTester test("SVMRegressor", 1, onnxruntime::kMLDomain);
|
||||
|
||||
std::vector<float> dual_coefficients = {-2.74322388e+01f, 5.81893108e+01f, -1.00000000e+02f, 6.91693781e+01f, 7.62161261e-02f, -2.66618042e-03f};
|
||||
std::vector<float> support_vectors = {0.f, 0.5f, 32.f, 1.f, 1.5f, 1.f, 2.f, 2.9f, -32.f, 3.f, 13.3f, -11.f, 12.f, 12.9f, -312.f, 43.f, 413.3f, -114.f};
|
||||
std::vector<float> dual_coefficients = {-2.74322388e+01f, 5.81893108e+01f, -1.00000000e+02f,
|
||||
6.91693781e+01f, 7.62161261e-02f, -2.66618042e-03f};
|
||||
std::vector<float> support_vectors = {0.f, 0.5f, 32.f,
|
||||
1.f, 1.5f, 1.f,
|
||||
2.f, 2.9f, -32.f,
|
||||
3.f, 13.3f, -11.f,
|
||||
12.f, 12.9f, -312.f,
|
||||
43.f, 413.3f, -114.f};
|
||||
std::vector<float> rho = {1.5004596f};
|
||||
std::vector<float> kernel_params = {0.001f, 0.f, 3.f}; //gamma, coef0, degree
|
||||
|
||||
//three estimates, for 3 points each, so 9 predictions
|
||||
std::vector<float> X = {1.f, 0.0f, 0.4f, 3.0f, 44.0f, -3.f, 12.0f, 12.9f, -312.f, 23.0f, 11.3f, -222.f, 23.0f, 11.3f, -222.f, 23.0f, 3311.3f, -222.f, 23.0f, 11.3f, -222.f, 43.0f, 413.3f, -114.f};
|
||||
std::vector<float> predictions = {1.50041863e+00f, 3.49624795e-01f, 2.75850969e+00f, -2.28659294e+02f, -2.28659294e+02f, -6.09640826e+05f, -2.28659294e+02f, 3.89055773e+00f};
|
||||
// 8 batches with 3 features in each
|
||||
std::vector<float> X = {1.f, 0.0f, 0.4f,
|
||||
3.0f, 44.0f, -3.f,
|
||||
12.0f, 12.9f, 112.f,
|
||||
23.0f, 11.3f, -222.f,
|
||||
23.0f, 11.3f, -222.f,
|
||||
23.0f, 3311.3f, -222.f,
|
||||
23.0f, 11.3f, -222.f,
|
||||
43.0f, 413.3f, -114.f};
|
||||
|
||||
/*
|
||||
# batched_dot_product calculations
|
||||
first = np.matmul(X, np.transpose(support_vectors))
|
||||
first *= gamma
|
||||
first += coef0
|
||||
POLY_dot_product = np.power(first, degree)
|
||||
|
||||
# second GEMM call in SVMRegressor code
|
||||
predictions = np.matmul(POLY_dot_product, np.transpose(coefficients))
|
||||
predictions += rho
|
||||
*/
|
||||
std::vector<float> predictions = {1.50041862e+00f,
|
||||
3.49624789e-01f,
|
||||
-1.36680453e+02f,
|
||||
-2.28659315e+02f,
|
||||
-2.28659315e+02f,
|
||||
-6.09640827e+05f,
|
||||
-2.28659315e+02f,
|
||||
3.89055458e+00f};
|
||||
|
||||
test.AddAttribute("kernel_type", std::string("POLY"));
|
||||
test.AddAttribute("coefficients", dual_coefficients);
|
||||
|
|
|
|||
Loading…
Reference in a new issue