Use GEMM for SVMRegressor. (#3305)

This commit is contained in:
Scott McKay 2020-03-25 11:49:44 +10:00 committed by GitHub
parent 19edad132c
commit 4db01309cb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 179 additions and 51 deletions

View file

@ -7,6 +7,7 @@
#include "core/framework/op_kernel.h"
#include "core/util/math_cpuonly.h"
#include "ml_common.h"
#include "core/providers/cpu/math/gemm.h"
namespace onnxruntime {
namespace ml {
@ -15,7 +16,8 @@ namespace ml {
template <typename T>
class SVMCommon {
protected:
SVMCommon(const OpKernelInfo& info) : kernel_type_(MakeKernel(info.GetAttrOrDefault<std::string>("kernel_type", "LINEAR"))) {
SVMCommon(const OpKernelInfo& info)
: kernel_type_(MakeKernel(info.GetAttrOrDefault<std::string>("kernel_type", "LINEAR"))) {
std::vector<float> kernel_params;
ORT_ENFORCE(info.GetAttrs<float>("kernel_params", kernel_params).IsOK());
@ -29,34 +31,98 @@ class SVMCommon {
void set_kernel_type(KERNEL new_kernel_type) { kernel_type_ = new_kernel_type; }
KERNEL get_kernel_type() const { return kernel_type_; }
void batched_kernel_dot(const gsl::span<const T> a, const gsl::span<const T> b,
int64_t m, int64_t n, int64_t k,
float scalar_C,
const gsl::span<T> out,
concurrency::ThreadPool* threadpool) const {
assert(a.size() == size_t(m * k) && b.size() == size_t(k * n) && out.size() == size_t(m * n));
if (kernel_type_ == KERNEL::RBF) {
T* cur_out = out.data();
const T* cur_batch = a.data();
// each batch has 'k' features
for (int64_t batch = 0; batch < m; ++batch) {
const T* cur_support_vector = b.data();
// broadcast the support vectors against the k features in each batch. output is one value per support vector
for (int64_t support_vector = 0; support_vector < n; ++support_vector) {
T sum = 0.f;
const T* cur_input = cur_batch;
for (int64_t feature = 0; feature < k; ++feature) {
T val = *cur_input++ - *cur_support_vector++;
sum += val * val;
}
*cur_out++ = std::exp(-gamma_ * sum);
}
cur_batch += k; // move to start of next batch
}
} else {
float alpha = 1.f;
float beta = 1.f;
static const TensorShape shape_C({1});
float c = scalar_C; // scalar_C is used for LINEAR in the GEMM
if (kernel_type_ != KERNEL::LINEAR) {
// kernel_type_ == POLY or SIGMOID
alpha = gamma_;
c = coef0_;
}
onnxruntime::Gemm<T>::ComputeGemm(CBLAS_TRANSPOSE::CblasNoTrans, CBLAS_TRANSPOSE::CblasTrans,
m, n, k,
alpha, a.data(), b.data(), beta,
c != 0.f ? &c : nullptr, &shape_C,
out.data(),
threadpool);
if (kernel_type_ == KERNEL::POLY) {
auto map_out = EigenVectorArrayMap<T>(out.data(), out.size());
if (degree_ == 2)
map_out = map_out.square();
else if (degree_ == 3)
map_out = map_out.cube();
else
map_out = map_out.pow(degree_);
} else if (kernel_type_ == KERNEL::SIGMOID) {
MlasComputeTanh(out.data(), out.data(), out.size());
}
}
}
float kernel_dot(const T* A, int64_t a, const std::vector<float>& B, int64_t b, int64_t len, KERNEL k) const {
double sum = 0;
const T* pA = A + a;
const float* pB = B.data() + b;
switch(k) {
case KERNEL::POLY:
for (int64_t i = len; i > 0; --i)
sum += *pA++ * *pB++;
sum = gamma_ * sum + coef0_;
sum = std::pow(sum, degree_);
break;
case KERNEL::SIGMOID:
for (int64_t i = len; i > 0; --i)
sum += *pA++ * *pB++;
sum = gamma_ * sum + coef0_;
sum = std::tanh(sum);
break;
case KERNEL::RBF:
for (int64_t i = len; i > 0; --i) {
double val = *pA++ - *pB++;
sum += val * val;
}
sum = std::exp(-gamma_ * sum);
break;
case KERNEL::LINEAR:
for (int64_t i = len; i > 0; --i)
sum += *pA++ * *pB++;
break;
switch (k) {
case KERNEL::POLY:
for (int64_t i = len; i > 0; --i)
sum += *pA++ * *pB++;
sum = gamma_ * sum + coef0_;
sum = std::pow(sum, degree_);
break;
case KERNEL::SIGMOID:
for (int64_t i = len; i > 0; --i)
sum += *pA++ * *pB++;
sum = gamma_ * sum + coef0_;
sum = std::tanh(sum);
break;
case KERNEL::RBF:
for (int64_t i = len; i > 0; --i) {
double val = *pA++ - *pB++;
sum += val * val;
}
sum = std::exp(-gamma_ * sum);
break;
case KERNEL::LINEAR:
for (int64_t i = len; i > 0; --i)
sum += *pA++ * *pB++;
break;
}
return (float)sum;
}
@ -70,6 +136,7 @@ class SVMCommon {
template <typename T>
class SVMClassifier final : public OpKernel, private SVMCommon<T> {
using SVMCommon<T>::batched_kernel_dot;
using SVMCommon<T>::kernel_dot;
using SVMCommon<T>::set_kernel_type;
using SVMCommon<T>::get_kernel_type;

View file

@ -40,33 +40,61 @@ template <typename T>
Status SVMRegressor<T>::Compute(OpKernelContext* ctx) const {
const auto* X = ctx->Input<Tensor>(0);
int64_t stride = X->Shape().NumDimensions() == 1 ? X->Shape()[0] : X->Shape()[1];
int64_t N = X->Shape().NumDimensions() == 1 ? 1 : X->Shape()[0];
int64_t num_features = X->Shape().NumDimensions() == 1 ? X->Shape()[0] : X->Shape()[1];
int64_t num_batches = X->Shape().NumDimensions() == 1 ? 1 : X->Shape()[0];
ORT_ENFORCE(num_features == feature_count_);
Tensor* Y = ctx->Output(0, TensorShape({N, 1})); // this op outputs for one target only
const auto* x_data = X->template Data<T>();
int64_t current_weight_0;
float sum;
// X: [num_batches, feature_count_] where features could be coefficients or support vectors
// coefficients_: [vector_count_]
// support_vectors_ : [vector_count_, feature_count_]
for (int64_t n = 0; n < N; n++) { //for each example
current_weight_0 = n * stride;
sum = 0.f;
if (mode_ == SVM_TYPE::SVM_SVC) {
for (int64_t j = 0; j < vector_count_; j++) {
sum += kernel_dot(x_data, current_weight_0, support_vectors_, feature_count_ * j,
feature_count_, get_kernel_type()) * coefficients_[j];
}
sum += rho_[0];
} else if (mode_ == SVM_TYPE::SVM_LINEAR) { //liblinear
sum = kernel_dot(x_data, current_weight_0, coefficients_, 0,
feature_count_, get_kernel_type());
sum += rho_[0];
// Y: [num_batches, 1]
Tensor* Y = ctx->Output(0, TensorShape({num_batches, 1})); // this op outputs for one target only
const auto x_data = X->template DataAsSpan<T>();
auto out = Y->MutableDataAsSpan<T>();
concurrency::ThreadPool* threadpool = ctx->GetOperatorThreadPool();
if (mode_ == SVM_TYPE::SVM_SVC) {
AllocatorPtr allocator;
auto status = ctx->GetTempSpaceAllocator(&allocator);
ORT_RETURN_IF_ERROR(status);
auto tmp_data = IAllocator::MakeUniquePtr<T>(allocator, num_batches * vector_count_);
auto tmp_data_span = gsl::make_span<T>(tmp_data.get(), num_batches * vector_count_);
// combine the input data with the support vectors and apply the kernel type
// output is {num_batches, vector_count_}
batched_kernel_dot(x_data, support_vectors_, num_batches, vector_count_, feature_count_, 0.f, tmp_data_span,
threadpool);
static const TensorShape rho_shape({1});
// combine with coefficients and add rho_[0]
onnxruntime::Gemm<T>::ComputeGemm(CBLAS_TRANSPOSE::CblasNoTrans, CBLAS_TRANSPOSE::CblasTrans,
num_batches, 1, vector_count_,
1.f, tmp_data_span.data(), coefficients_.data(), 1.f,
rho_.data(), &rho_shape,
out.data(),
threadpool);
} else if (mode_ == SVM_TYPE::SVM_LINEAR) {
// combine the coefficients with the input data and apply the kernel type
batched_kernel_dot(x_data, coefficients_, num_batches, 1, feature_count_, rho_[0], out, threadpool);
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unexpected mode:", static_cast<int>(mode_));
}
if (one_class_) {
float* y = out.data();
float* y_end = y + out.size();
while (y < y_end) {
*y = (*y > 0.f ? 1.f : -1.f);
++y;
}
Y->template MutableData<float>()[n] = one_class_ ? (sum > 0 ? 1.f : -1.f) : sum;
}
return Status::OK();
}
} // namespace ml
} // namespace onnxruntime

View file

@ -14,6 +14,7 @@ namespace ml {
template <typename T>
class SVMRegressor final : public OpKernel, private SVMCommon<T> {
using SVMCommon<T>::batched_kernel_dot;
using SVMCommon<T>::kernel_dot;
using SVMCommon<T>::set_kernel_type;
using SVMCommon<T>::get_kernel_type;

View file

@ -60,14 +60,46 @@ TEST(MLOpTest, SVMRegressorNuSVC) {
TEST(MLOpTest, SVMRegressorNuSVCPolyKernel) {
OpTester test("SVMRegressor", 1, onnxruntime::kMLDomain);
std::vector<float> dual_coefficients = {-2.74322388e+01f, 5.81893108e+01f, -1.00000000e+02f, 6.91693781e+01f, 7.62161261e-02f, -2.66618042e-03f};
std::vector<float> support_vectors = {0.f, 0.5f, 32.f, 1.f, 1.5f, 1.f, 2.f, 2.9f, -32.f, 3.f, 13.3f, -11.f, 12.f, 12.9f, -312.f, 43.f, 413.3f, -114.f};
std::vector<float> dual_coefficients = {-2.74322388e+01f, 5.81893108e+01f, -1.00000000e+02f,
6.91693781e+01f, 7.62161261e-02f, -2.66618042e-03f};
std::vector<float> support_vectors = {0.f, 0.5f, 32.f,
1.f, 1.5f, 1.f,
2.f, 2.9f, -32.f,
3.f, 13.3f, -11.f,
12.f, 12.9f, -312.f,
43.f, 413.3f, -114.f};
std::vector<float> rho = {1.5004596f};
std::vector<float> kernel_params = {0.001f, 0.f, 3.f}; //gamma, coef0, degree
//three estimates, for 3 points each, so 9 predictions
std::vector<float> X = {1.f, 0.0f, 0.4f, 3.0f, 44.0f, -3.f, 12.0f, 12.9f, -312.f, 23.0f, 11.3f, -222.f, 23.0f, 11.3f, -222.f, 23.0f, 3311.3f, -222.f, 23.0f, 11.3f, -222.f, 43.0f, 413.3f, -114.f};
std::vector<float> predictions = {1.50041863e+00f, 3.49624795e-01f, 2.75850969e+00f, -2.28659294e+02f, -2.28659294e+02f, -6.09640826e+05f, -2.28659294e+02f, 3.89055773e+00f};
// 8 batches with 3 features in each
std::vector<float> X = {1.f, 0.0f, 0.4f,
3.0f, 44.0f, -3.f,
12.0f, 12.9f, 112.f,
23.0f, 11.3f, -222.f,
23.0f, 11.3f, -222.f,
23.0f, 3311.3f, -222.f,
23.0f, 11.3f, -222.f,
43.0f, 413.3f, -114.f};
/*
# batched_dot_product calculations
first = np.matmul(X, np.transpose(support_vectors))
first *= gamma
first += coef0
POLY_dot_product = np.power(first, degree)
# second GEMM call in SVMRegressor code
predictions = np.matmul(POLY_dot_product, np.transpose(coefficients))
predictions += rho
*/
std::vector<float> predictions = {1.50041862e+00f,
3.49624789e-01f,
-1.36680453e+02f,
-2.28659315e+02f,
-2.28659315e+02f,
-6.09640827e+05f,
-2.28659315e+02f,
3.89055458e+00f};
test.AddAttribute("kernel_type", std::string("POLY"));
test.AddAttribute("coefficients", dual_coefficients);