Get onnxruntime/core/providers/cuda/math/matmul_integer.cc from ort_training.

This commit is contained in:
Sergii Dymchenko 2020-04-09 18:09:46 -07:00
parent 1b465ba820
commit 507d2bb9b9

View file

@ -6,7 +6,6 @@
#include "core/providers/cpu/math/matmul_helper.h"
#include "core/providers/cuda/shared_inc/fpgeneric.h"
#include "core/providers/cuda/cuda_allocator.h"
#include "core/providers/cuda/igemm.h"
#include "core/providers/common.h"
namespace onnxruntime {
@ -107,28 +106,6 @@ Status MatMulInteger<int8_t, int8_t>::ComputeInternal(OpKernelContext* ctx) cons
beta = 1;
}
#if CUDA_VERSION >= 10010
if (DeviceProp::GetDeviceProps().major >= 7 && DeviceProp::GetDeviceProps().minor >= 5) {
for (size_t batch = 0; batch < helper.OutputOffsets().size(); batch++) {
LtIgemmTensor(
static_cast<int>(helper.M()),
static_cast<int>(helper.N()),
static_cast<int>(helper.K()),
alpha,
beta,
a_ptr + helper.LeftOffsets()[batch],
static_cast<int>(helper.K()),
b_ptr + helper.RightOffsets()[batch],
static_cast<int>(helper.N()),
output_ptr + helper.OutputOffsets()[batch],
static_cast<int>(helper.N()),
this,
Base::CublasLtHandle());
}
return Status::OK();
}
#endif
// pad A and B to make their leading dimension be multiples of 32
// because cublasGemmEx requires:
// 1. leading dimension is multiples of 4