mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-01 23:30:35 +00:00
View Op - new unit tests and add support for tensor memcpy by offset/size (#3439)
* view ops UTs * update per comments * PR comments - code clean up * code clean up per comments Co-authored-by: Ethan Tao <ettao@microsoft.com>
This commit is contained in:
parent
15e32b44fd
commit
b35468289a
4 changed files with 203 additions and 9 deletions
|
|
@ -193,6 +193,7 @@ class Tensor final {
|
|||
/**
|
||||
* Get the byte offset with respect to the p_data
|
||||
* @warning this is a temporary solution for reusing the buffer bigger than needed.
|
||||
* @warning use with caution - make sure you do boundary check before calling this method (see view.cc)
|
||||
*/
|
||||
inline ptrdiff_t ByteOffset() const {
|
||||
return byte_offset_;
|
||||
|
|
|
|||
|
|
@ -11,12 +11,6 @@
|
|||
|
||||
namespace onnxruntime {
|
||||
|
||||
#if defined(USE_MIMALLOC_ARENA_ALLOCATOR)
|
||||
using TArenaAllocator = MiMallocArena;
|
||||
#else
|
||||
using TArenaAllocator = BFCArena;
|
||||
#endif
|
||||
|
||||
using namespace ::onnxruntime::common;
|
||||
|
||||
AllocatorPtr CreateAllocator(DeviceAllocatorRegistrationInfo info, OrtDevice::DeviceId device_id) {
|
||||
|
|
@ -24,7 +18,7 @@ AllocatorPtr CreateAllocator(DeviceAllocatorRegistrationInfo info, OrtDevice::De
|
|||
if (device_allocator->AllowsArena()) {
|
||||
#ifdef USE_MIMALLOC
|
||||
return std::shared_ptr<IArenaAllocator>(
|
||||
onnxruntime::make_unique<MiMallocArena>(std::move(device_allocator), info.max_mem, arena_extend_strategy));
|
||||
onnxruntime::make_unique<MiMallocArena>(std::move(device_allocator), info.max_mem));
|
||||
#else
|
||||
return std::shared_ptr<IArenaAllocator>(
|
||||
onnxruntime::make_unique<BFCArena>(std::move(device_allocator), info.max_mem, info.arena_extend_strategy));
|
||||
|
|
|
|||
195
orttraining/orttraining/test/training_ops/cuda/view_op_test.cc
Normal file
195
orttraining/orttraining/test/training_ops/cuda/view_op_test.cc
Normal file
|
|
@ -0,0 +1,195 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "test/providers/provider_test_utils.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
template <class T>
|
||||
using ShapeAndData = std::pair<const std::vector<int64_t>, const std::vector<T>>;
|
||||
|
||||
using ShapeAndFloatData = ShapeAndData<float>;
|
||||
using ShapeAndDoubleData = ShapeAndData<double>;
|
||||
using ShapeAndHalfData = ShapeAndData<MLFloat16>;
|
||||
using ShapeData = ShapeAndData<int64_t>;
|
||||
using ExpectResult = OpTester::ExpectResult;
|
||||
|
||||
template <typename T>
|
||||
void RunTest(const ShapeAndData<T>& input,
|
||||
const std::vector<ShapeData>& shapes,
|
||||
const std::vector<ShapeAndData<T>>& outputs,
|
||||
bool expect_failure = false,
|
||||
const std::string& err_msg = {}) {
|
||||
OpTester test("View", 1, onnxruntime::kMSDomain);
|
||||
|
||||
test.AddInput<T>("input0", input.first, input.second);
|
||||
|
||||
int i = 1;
|
||||
for (auto& s : shapes) {
|
||||
auto& shape = s.first;
|
||||
auto& data = s.second;
|
||||
std::ostringstream oss;
|
||||
oss << "input" << i++;
|
||||
test.AddInput<int64_t>(oss.str().c_str(), shape, data);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
for (auto& output : outputs) {
|
||||
auto& shape = output.first;
|
||||
auto& data = output.second;
|
||||
std::ostringstream oss;
|
||||
oss << "output" << i++;
|
||||
test.AddOutput<T>(oss.str().c_str(), shape, data);
|
||||
}
|
||||
|
||||
std::unordered_set<std::string> excluded_providers;
|
||||
|
||||
test.Run(expect_failure ? ExpectResult::kExpectFailure : ExpectResult::kExpectSuccess, err_msg, excluded_providers);
|
||||
}
|
||||
|
||||
TEST(ViewOperatorTest, TwoViewFloat_1) {
|
||||
std::vector<ShapeData> shapes;
|
||||
std::vector<ShapeAndFloatData> outputs;
|
||||
|
||||
// input shape and data
|
||||
ShapeAndFloatData input = {{4, 2},
|
||||
{1.f, 2.f,
|
||||
3.f, 4.f,
|
||||
5.f, 6.f,
|
||||
7.f, 8.f}};
|
||||
|
||||
shapes.push_back({{2}, std::vector<int64_t>(2, 2)});
|
||||
shapes.push_back({{2}, std::vector<int64_t>(2, 2)});
|
||||
|
||||
outputs.push_back({{2, 2},
|
||||
{1.f, 2.f,
|
||||
3.f, 4.f}});
|
||||
outputs.push_back({{2, 2},
|
||||
{5.f, 6.f,
|
||||
7.f, 8.f}});
|
||||
|
||||
RunTest<float>(input, shapes, outputs);
|
||||
}
|
||||
|
||||
TEST(ViewOperatorTest, TwoViewFloat_2) {
|
||||
std::vector<ShapeData> shapes;
|
||||
std::vector<ShapeAndFloatData> outputs;
|
||||
|
||||
// input shape and data
|
||||
ShapeAndFloatData input = {{4, 2},
|
||||
{1.f, 2.f,
|
||||
3.f, 4.f,
|
||||
5.f, 6.f,
|
||||
7.f, 8.f}};
|
||||
|
||||
shapes.push_back({{2}, {1, 2}});
|
||||
shapes.push_back({{2}, {3, 2}});
|
||||
|
||||
outputs.push_back({{1, 2}, {1.f, 2.f}});
|
||||
outputs.push_back({{3, 2},
|
||||
{3.f, 4.f,
|
||||
5.f, 6.f,
|
||||
7.f, 8.f}});
|
||||
|
||||
RunTest<float>(input, shapes, outputs);
|
||||
}
|
||||
|
||||
TEST(ViewOperatorTest, TwoViewFloat_3) {
|
||||
std::vector<ShapeData> shapes;
|
||||
std::vector<ShapeAndFloatData> outputs;
|
||||
|
||||
// input shape and data
|
||||
ShapeAndFloatData input = {{4, 2},
|
||||
{1.f, 2.f,
|
||||
3.f, 4.f,
|
||||
5.f, 6.f,
|
||||
7.f, 8.f}};
|
||||
|
||||
shapes.push_back({{2}, {1, 2}});
|
||||
shapes.push_back({{3}, {1, 3, 2}});
|
||||
|
||||
outputs.push_back({{1, 2}, {1.f, 2.f}});
|
||||
outputs.push_back({{1, 3, 2},
|
||||
{3.f, 4.f,
|
||||
5.f, 6.f,
|
||||
7.f, 8.f}});
|
||||
|
||||
RunTest<float>(input, shapes, outputs);
|
||||
}
|
||||
|
||||
TEST(ViewOperatorTest, ThreeViewFloat) {
|
||||
std::vector<ShapeData> shapes;
|
||||
std::vector<ShapeAndFloatData> outputs;
|
||||
|
||||
// input shape and data
|
||||
ShapeAndFloatData input = {{4, 3},
|
||||
{1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
|
||||
7.f, 8.f, 9.f, 10.f, 11.f, 12.f}};
|
||||
|
||||
shapes.push_back({{2}, {1, 2}});
|
||||
shapes.push_back({{3}, {1, 3, 2}});
|
||||
shapes.push_back({{2}, {4, 1}});
|
||||
|
||||
outputs.push_back({{1, 2}, {1.f, 2.f}});
|
||||
outputs.push_back({{1, 3, 2},
|
||||
{3.f, 4.f, 5.f, 6.f, 7.f, 8.f}});
|
||||
outputs.push_back({{4, 1},
|
||||
{9.f, 10.f, 11.f, 12.f}});
|
||||
|
||||
RunTest<float>(input, shapes, outputs);
|
||||
}
|
||||
|
||||
TEST(ViewOperatorTest, TwoViewDouble) {
|
||||
std::vector<ShapeData> shapes;
|
||||
std::vector<ShapeAndDoubleData> outputs;
|
||||
|
||||
// input shape and data
|
||||
ShapeAndDoubleData input = {{3, 2},
|
||||
{1.f, 2.f,
|
||||
3.f, 4.f,
|
||||
5.f, 6.f}};
|
||||
|
||||
shapes.push_back({{2}, {2, 1}});
|
||||
shapes.push_back({{3}, {1, 2, 2}});
|
||||
|
||||
outputs.push_back({{2, 1},
|
||||
{1.f, 2.f}});
|
||||
outputs.push_back({{1, 2, 2},
|
||||
{3.f, 4.f, 5.f, 6.f}});
|
||||
|
||||
RunTest<double>(input, shapes, outputs);
|
||||
|
||||
}
|
||||
|
||||
TEST(ViewOperatorTest, TwoViewHalf) {
|
||||
std::vector<ShapeData> shapes;
|
||||
std::vector<ShapeAndHalfData> outputs;
|
||||
|
||||
std::vector<float> data = {1.0f, 2.0f,
|
||||
3.0f, 4.0f,
|
||||
5.0f, 6.0f};
|
||||
std::vector<MLFloat16> data_half(6);
|
||||
ConvertFloatToMLFloat16(data.data(), data_half.data(), 6);
|
||||
// input shape and data
|
||||
ShapeAndHalfData input = {{3, 2}, data_half};
|
||||
|
||||
shapes.push_back({{2}, {2, 1}});
|
||||
shapes.push_back({{3}, {1, 2, 2}});
|
||||
|
||||
std::vector<float> data1 = {1.0f, 2.0f};
|
||||
std::vector<MLFloat16> data_half1(2);
|
||||
ConvertFloatToMLFloat16(data1.data(), data_half1.data(), 2);
|
||||
outputs.push_back({{2, 1}, data_half1});
|
||||
|
||||
std::vector<float> data2 = {3.f, 4.f, 5.f, 6.f};
|
||||
std::vector<MLFloat16> data_half2(4);
|
||||
ConvertFloatToMLFloat16(data2.data(), data_half2.data(), 4);
|
||||
outputs.push_back({{1, 2, 2}, data_half2});
|
||||
|
||||
RunTest<MLFloat16>(input, shapes, outputs);
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -73,9 +73,13 @@ Status View::ComputeInternal(OpKernelContext* context) const {
|
|||
Tensor* Y = context->Output(i, y_shapes[i]);
|
||||
if (Y != nullptr) {
|
||||
if (X_data != Y->MutableDataRaw()) {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "View output is not sharing the underlaying buffer of input");
|
||||
// View output is not sharing the underlaying buffer of input, copy instead
|
||||
const void* source = static_cast<const char*>(X_data) + y_byte_offsets[i];
|
||||
void* target = Y->MutableDataRaw();
|
||||
CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(target, source, Y->SizeInBytes(), cudaMemcpyDeviceToDevice));
|
||||
} else {
|
||||
Y->SetByteOffset(y_byte_offsets[i]);
|
||||
}
|
||||
Y->SetByteOffset(y_byte_offsets[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue