mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-04 23:59:56 +00:00
Remove copy of generator in Multinomial (#1611)
* Remove copy of generator in Multinomial so that different values are generated each time. Add ability to test
This commit is contained in:
parent
b5de1324ef
commit
b405482cfa
4 changed files with 120 additions and 118 deletions
|
|
@ -76,8 +76,6 @@ void GenerateData(std::default_random_engine& generator, TDistribution distribut
|
|||
static Status RandomNormalCompute(float mean, float scale, std::default_random_engine& generator, TensorProto::DataType dtype, Tensor& Y);
|
||||
static Status RandomUniformCompute(float high, float low, std::default_random_engine& generator, TensorProto::DataType dtype, Tensor& Y);
|
||||
|
||||
// Leaving in case we need to change to this approach
|
||||
//static Status CreateOutputTensorFromTensorValues(OpKernelContext* ctx, const Tensor& X,Tensor** Y);
|
||||
static Status CreateOutputTensorFromTensorShape(OpKernelContext* ctx, const Tensor& X, Tensor** Y);
|
||||
static TensorProto::DataType InferDataType(const Tensor& tensor);
|
||||
|
||||
|
|
@ -168,53 +166,48 @@ static Status MultinomialCompute(OpKernelContext* ctx,
|
|||
Eigen::array<int64_t, 2> Y_dims = {{batch_size, num_samples}};
|
||||
Matrix<OutputType> output = Matrix<OutputType>(Y.template MutableData<OutputType>(), Y_dims);
|
||||
|
||||
// TODO (perf optimization) - the idea behind making this a lambda is so that we can parallelize across batches.
|
||||
// When we do that this lamdba will act as one task given to a thread
|
||||
auto DoWork = [ctx, num_samples, num_classes, &generator, &logits, &output](int64_t start_row,
|
||||
int64_t limit_row) {
|
||||
std::default_random_engine generator_copy = generator;
|
||||
// BEGIN create temporary tensor
|
||||
AllocatorPtr alloc;
|
||||
ctx->GetTempSpaceAllocator(&alloc);
|
||||
auto cdf_data = static_cast<double*>(alloc->Alloc(sizeof(double) * num_classes));
|
||||
BufferUniquePtr cdf_buffer(cdf_data, BufferDeleter(alloc));
|
||||
Eigen::array<int64_t, 1> cdf_dims = {{num_classes}};
|
||||
auto cdf = EigenVector<double>(cdf_data, cdf_dims);
|
||||
// END create temporary tensor
|
||||
// BEGIN create temporary tensor
|
||||
AllocatorPtr alloc;
|
||||
ORT_RETURN_IF_ERROR(ctx->GetTempSpaceAllocator(&alloc));
|
||||
auto cdf_data = static_cast<double*>(alloc->Alloc(sizeof(double) * num_classes));
|
||||
BufferUniquePtr cdf_buffer(cdf_data, BufferDeleter(alloc));
|
||||
Eigen::array<int64_t, 1> cdf_dims = {{num_classes}};
|
||||
auto cdf = EigenVector<double>(cdf_data, cdf_dims);
|
||||
// END create temporary tensor
|
||||
|
||||
std::uniform_real_distribution<double> dist(0.0, 1.0); // TODO: should this be initialized per batch?
|
||||
for (int64_t b = start_row; b < limit_row; ++b) {
|
||||
const float* logits_row = &(logits(b, 0));
|
||||
// Takes an along-class maximum (for numerical stability).
|
||||
float maxx = std::numeric_limits<float>::lowest();
|
||||
for (int64_t j = 0; j < num_classes; ++j) {
|
||||
if (Eigen::numext::isfinite(logits_row[j])) {
|
||||
maxx = std::max(maxx, logits_row[j]);
|
||||
}
|
||||
}
|
||||
const auto max_logit = static_cast<double>(maxx);
|
||||
std::uniform_real_distribution<double> dist(0.0, 1.0); // TODO: should this be initialized per batch?
|
||||
|
||||
// Precompute cumulative probability distribution across classes.
|
||||
// Note: This isn't normalized.
|
||||
cdf = (logits.chip<0>(b).cast<double>() - max_logit).exp();
|
||||
double running_total = 0;
|
||||
for (int64_t j = 0; j < num_classes; ++j) {
|
||||
if (Eigen::numext::isfinite(logits_row[j])) {
|
||||
running_total += cdf(j);
|
||||
}
|
||||
cdf(j) = running_total;
|
||||
}
|
||||
// Generate each sample.
|
||||
const double* cdf_begin = cdf.data();
|
||||
const double* cdf_end = cdf.data() + num_classes;
|
||||
for (int64_t j = 0; j < num_samples; ++j) {
|
||||
const double to_find = dist(generator_copy) * running_total;
|
||||
auto found_iter = std::upper_bound(cdf_begin, cdf_end, to_find);
|
||||
output(b, j) = static_cast<OutputType>(std::distance(cdf_begin, found_iter));
|
||||
for (int64_t b = 0; b < batch_size; ++b) {
|
||||
const float* logits_row = &(logits(b, 0));
|
||||
// Takes an along-class maximum (for numerical stability).
|
||||
float maxx = std::numeric_limits<float>::lowest();
|
||||
for (int64_t j = 0; j < num_classes; ++j) {
|
||||
if (Eigen::numext::isfinite(logits_row[j])) {
|
||||
maxx = std::max(maxx, logits_row[j]);
|
||||
}
|
||||
}
|
||||
};
|
||||
DoWork(0, batch_size);
|
||||
const auto max_logit = static_cast<double>(maxx);
|
||||
|
||||
// Precompute cumulative probability distribution across classes.
|
||||
// Note: This isn't normalized.
|
||||
cdf = (logits.chip<0>(b).cast<double>() - max_logit).exp();
|
||||
double running_total = 0;
|
||||
for (int64_t j = 0; j < num_classes; ++j) {
|
||||
if (Eigen::numext::isfinite(logits_row[j])) {
|
||||
running_total += cdf(j);
|
||||
}
|
||||
cdf(j) = running_total;
|
||||
}
|
||||
// Generate each sample.
|
||||
const double* cdf_begin = cdf.data();
|
||||
const double* cdf_end = cdf.data() + num_classes;
|
||||
for (int64_t j = 0; j < num_samples; ++j) {
|
||||
const double to_find = dist(generator) * running_total;
|
||||
auto found_iter = std::upper_bound(cdf_begin, cdf_end, to_find);
|
||||
output(b, j) = static_cast<OutputType>(std::distance(cdf_begin, found_iter));
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
@ -262,32 +255,6 @@ Status Multinomial::Compute(OpKernelContext* ctx) const {
|
|||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
alternative interpretation of the spec is that the input tensor contains the dimensions as ints.
|
||||
Keeping this temporarily in case we go back to that.
|
||||
|
||||
// read shape information from input tensor and create output tensor with it
|
||||
static Status CreateOutputTensorFromTensorValues(OpKernelContext* ctx, const Tensor& X, Tensor** Y) {
|
||||
const TensorShape& shape = X.Shape();
|
||||
auto size = shape.Size();
|
||||
auto num_dims = shape.NumDimensions();
|
||||
|
||||
if (num_dims != 1) {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Expected 1 dimension tensor with shape information. Dimensions=", num_dims);
|
||||
}
|
||||
|
||||
std::vector<int64_t> dims;
|
||||
dims.reserve(shape.Size());
|
||||
|
||||
auto data = gsl::make_span(tensor.template Data<int64_t>(), shape.Size());
|
||||
dims.insert(dims.cbegin(), data.cbegin(), data.cend());
|
||||
|
||||
*Y = ctx->Output(0, TensorShape(dims));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
*/
|
||||
|
||||
// create output tensor using shape of input tensor
|
||||
static Status CreateOutputTensorFromTensorShape(OpKernelContext* ctx, const Tensor& X, Tensor** Y) {
|
||||
const TensorShape& shape = X.Shape();
|
||||
|
|
@ -363,9 +330,11 @@ static Status RandomUniformCompute(float low, float high,
|
|||
|
||||
template <typename T, typename TDistribution>
|
||||
void GenerateData(std::default_random_engine& generator, TDistribution distribution, Tensor& tensor) {
|
||||
auto out = gsl::make_span(tensor.template MutableData<T>(), tensor.Shape().Size());
|
||||
|
||||
std::for_each(out.begin(), out.end(), [&generator, &distribution](T& value) { value = distribution(generator); });
|
||||
T* out = tensor.MutableData<T>();
|
||||
for (int64_t i = 0, end = tensor.Shape().Size(); i < end; ++i) {
|
||||
*out = distribution(generator);
|
||||
++out;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -246,7 +246,7 @@ TEST(Random, MultinomialGoodCase) {
|
|||
const std::vector<int64_t> output_dims{batch_size, num_samples};
|
||||
#ifdef _WIN32
|
||||
const std::vector<int64_t> expected_output{2, 0, 0, 2, 2, 2, 0, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 0};
|
||||
#elif defined(__MACH__) || defined (__ANDROID__)
|
||||
#elif defined(__MACH__) || defined(__ANDROID__)
|
||||
const std::vector<int64_t> expected_output{1, 1, 2, 2, 0, 2, 2, 2, 0, 2, 1, 1, 2, 0, 2, 2, 0, 2, 1, 1};
|
||||
#else
|
||||
const std::vector<int64_t> expected_output{2, 0, 0, 1, 0, 1, 2, 0, 1, 0, 0, 1, 1, 0, 1, 0, 2, 0, 2, 0};
|
||||
|
|
@ -257,31 +257,46 @@ TEST(Random, MultinomialGoodCase) {
|
|||
}
|
||||
|
||||
TEST(Random, MultinomialDefaultDType) {
|
||||
OpTester test("Multinomial");
|
||||
auto run_test = [](int num_run_calls, const std::vector<int32_t>& expected_output) {
|
||||
OpTester test("Multinomial");
|
||||
const int64_t num_samples = 10;
|
||||
const int batch_size = 2;
|
||||
const float seed = 1618.f;
|
||||
|
||||
const int64_t num_samples = 10;
|
||||
const int batch_size = 2;
|
||||
const float seed = 1618.f;
|
||||
const std::vector<int64_t> input_dims{2, 3};
|
||||
std::vector<float> input(TensorShape(input_dims).Size());
|
||||
std::fill(input.begin(), input.end(), -10.f);
|
||||
test.AddInput<float>("X", input_dims, input);
|
||||
|
||||
const std::vector<int64_t> input_dims{2, 3};
|
||||
std::vector<float> input(TensorShape(input_dims).Size());
|
||||
std::fill(input.begin(), input.end(), -10.f);
|
||||
test.AddInput<float>("X", input_dims, input);
|
||||
test.AddAttribute("sample_size", num_samples);
|
||||
test.AddAttribute("seed", seed);
|
||||
|
||||
test.AddAttribute("sample_size", num_samples);
|
||||
test.AddAttribute("seed", seed);
|
||||
const std::vector<int64_t> output_dims{batch_size, num_samples};
|
||||
test.AddOutput<int32_t>("Y", output_dims, expected_output);
|
||||
|
||||
// test.Run() re-loads the model each time, so we need to do multiple calls to InferenceSession::Run inside of it
|
||||
// to test that the second call to Compute produces different data
|
||||
test.SetNumRunCalls(num_run_calls);
|
||||
|
||||
test.Run();
|
||||
};
|
||||
|
||||
const std::vector<int64_t> output_dims{batch_size, num_samples};
|
||||
#ifdef _WIN32
|
||||
const std::vector<int32_t> expected_output{2, 0, 0, 2, 2, 2, 0, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 0};
|
||||
#elif defined(__MACH__) || defined (__ANDROID__)
|
||||
const std::vector<int32_t> expected_output{1, 1, 2, 2, 0, 2, 2, 2, 0, 2, 1, 1, 2, 0, 2, 2, 0, 2, 1, 1};
|
||||
const std::vector<int32_t> expected_output_1{2, 0, 0, 2, 2, 2, 0, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 0};
|
||||
const std::vector<int32_t> expected_output_2{0, 0, 1, 0, 2, 2, 2, 0, 2, 1, 2, 1, 0, 2, 0, 2, 2, 1, 2, 1};
|
||||
#elif defined(__MACH__) || defined(__ANDROID__)
|
||||
const std::vector<int32_t> expected_output_1{1, 1, 2, 2, 0, 2, 2, 2, 0, 2, 1, 1, 2, 0, 2, 2, 0, 2, 1, 1};
|
||||
const std::vector<int32_t> expected_output_2{1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 2, 0, 1, 1, 0, 2, 2, 2, 1};
|
||||
#else
|
||||
const std::vector<int32_t> expected_output{2, 0, 0, 1, 0, 1, 2, 0, 1, 0, 0, 1, 1, 0, 1, 0, 2, 0, 2, 0};
|
||||
const std::vector<int32_t> expected_output_1{2, 0, 0, 1, 0, 1, 2, 0, 1, 0, 0, 1, 1, 0, 1, 0, 2, 0, 2, 0};
|
||||
const std::vector<int32_t> expected_output_2{2, 2, 1, 1, 0, 2, 2, 1, 1, 2, 0, 0, 0, 2, 0, 1, 1, 1, 0, 0};
|
||||
#endif
|
||||
test.AddOutput<int32_t>("Y", output_dims, expected_output);
|
||||
|
||||
test.Run();
|
||||
// Test output from a single call to Multinomial::Compute
|
||||
run_test(1, expected_output_1);
|
||||
|
||||
// Test output from 2 calls to Multinomial::Compute
|
||||
run_test(2, expected_output_2);
|
||||
}
|
||||
|
||||
TEST(Random, MultinomialInvalidDtype) {
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ void Check(const OpTester::Data& expected_data, const Tensor& output_tensor, con
|
|||
auto size = output_tensor.Shape().Size();
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
EXPECT_EQ(expected[i], output[i]) << "provider_type: " << provider_type;
|
||||
EXPECT_EQ(expected[i], output[i]) << "i:" << i << ", provider_type: " << provider_type;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -51,19 +51,21 @@ void Check<double>(const OpTester::Data& expected_data, const Tensor& output_ten
|
|||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (std::isinf(expected[i])) { // Test infinity for equality
|
||||
EXPECT_EQ(expected[i], output[i]);
|
||||
EXPECT_EQ(expected[i], output[i]) << "i:" << i;
|
||||
} else if (std::isnan(expected[i])) {
|
||||
EXPECT_TRUE(std::isnan(output[i])) << "Expected output " << i << " to be NaN";
|
||||
} else {
|
||||
if (!has_abs_err && !has_rel_err) {
|
||||
// the default for existing tests
|
||||
EXPECT_NEAR(expected[i], output[i], threshold) << "provider_type: " << provider_type;
|
||||
EXPECT_NEAR(expected[i], output[i], threshold) << "i:" << i << ", provider_type: " << provider_type;
|
||||
} else {
|
||||
if (has_abs_err) {
|
||||
EXPECT_NEAR(expected[i], output[i], expected_data.absolute_error_.value()) << "provider_type: " << provider_type;
|
||||
EXPECT_NEAR(expected[i], output[i], expected_data.absolute_error_.value())
|
||||
<< "i:" << i << ", provider_type: " << provider_type;
|
||||
}
|
||||
if (has_rel_err) {
|
||||
EXPECT_NEAR(expected[i], output[i], expected_data.relative_error_.value() * std::abs(expected[i])) << "provider_type: " << provider_type;
|
||||
EXPECT_NEAR(expected[i], output[i], expected_data.relative_error_.value() * std::abs(expected[i]))
|
||||
<< "i:" << i << ", provider_type: " << provider_type;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -87,19 +89,21 @@ void Check<float>(const OpTester::Data& expected_data, const Tensor& output_tens
|
|||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (std::isinf(expected[i])) { // Test infinity for equality
|
||||
EXPECT_EQ(expected[i], output[i]);
|
||||
EXPECT_EQ(expected[i], output[i]) << "i:" << i;
|
||||
} else if (std::isnan(expected[i])) {
|
||||
EXPECT_TRUE(std::isnan(output[i])) << "Expected output " << i << " to be NaN";
|
||||
} else {
|
||||
if (!has_abs_err && !has_rel_err) {
|
||||
// the default for existing tests
|
||||
EXPECT_NEAR(expected[i], output[i], threshold) << "provider_type: " << provider_type;
|
||||
EXPECT_NEAR(expected[i], output[i], threshold) << "i:" << i << ", provider_type: " << provider_type;
|
||||
} else {
|
||||
if (has_abs_err) {
|
||||
EXPECT_NEAR(expected[i], output[i], expected_data.absolute_error_.value()) << "provider_type: " << provider_type;
|
||||
EXPECT_NEAR(expected[i], output[i], expected_data.absolute_error_.value())
|
||||
<< "i:" << i << ", provider_type: " << provider_type;
|
||||
}
|
||||
if (has_rel_err) {
|
||||
EXPECT_NEAR(expected[i], output[i], expected_data.relative_error_.value() * std::abs(expected[i])) << "provider_type: " << provider_type;
|
||||
EXPECT_NEAR(expected[i], output[i], expected_data.relative_error_.value() * std::abs(expected[i]))
|
||||
<< "i:" << i << ", provider_type: " << provider_type;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -121,10 +125,10 @@ void Check<MLFloat16>(const OpTester::Data& expected_data, const Tensor& output_
|
|||
float threshold = 0.001f;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (std::isinf(f_expected[i])) // Test infinity for equality
|
||||
EXPECT_EQ(f_expected[i], f_output[i]);
|
||||
EXPECT_EQ(f_expected[i], f_output[i]) << "i:" << i;
|
||||
else {
|
||||
// the default for existing tests
|
||||
EXPECT_NEAR(f_expected[i], f_output[i], threshold) << "provider_type: " << provider_type;
|
||||
EXPECT_NEAR(f_expected[i], f_output[i], threshold) << "i:" << i << ", provider_type: " << provider_type;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -342,23 +346,27 @@ void OpTester::ExecuteModel(Model& model, InferenceSession& session_object, Expe
|
|||
default_run_options.run_log_verbosity_level = 1;
|
||||
|
||||
std::vector<OrtValue> fetches;
|
||||
status = session_object.Run(run_options ? *run_options : default_run_options, feeds, output_names, &fetches);
|
||||
if (status.IsOK()) {
|
||||
EXPECT_TRUE(expect_result == ExpectResult::kExpectSuccess) << "Expected failure but Run was successful";
|
||||
if (expect_result == ExpectResult::kExpectFailure) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if (expect_result == ExpectResult::kExpectFailure) {
|
||||
// Disable expected_failure_string checks for MKL-DNN and nGraph EP's
|
||||
if (provider_type != kMklDnnExecutionProvider && provider_type != kNGraphExecutionProvider) {
|
||||
EXPECT_THAT(status.ErrorMessage(), testing::HasSubstr(expected_failure_string));
|
||||
for (int i = 0; i < num_run_calls_; ++i) {
|
||||
fetches.clear();
|
||||
status = session_object.Run(run_options ? *run_options : default_run_options, feeds, output_names, &fetches);
|
||||
|
||||
if (status.IsOK()) {
|
||||
EXPECT_TRUE(expect_result == ExpectResult::kExpectSuccess) << "Expected failure but Run was successful";
|
||||
if (expect_result == ExpectResult::kExpectFailure) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
LOGS_DEFAULT(ERROR) << "Run failed with status: " << status.ErrorMessage();
|
||||
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
|
||||
if (expect_result == ExpectResult::kExpectFailure) {
|
||||
// Disable expected_failure_string checks for MKL-DNN and nGraph EP's
|
||||
if (provider_type != kMklDnnExecutionProvider && provider_type != kNGraphExecutionProvider) {
|
||||
EXPECT_THAT(status.ErrorMessage(), testing::HasSubstr(expected_failure_string));
|
||||
}
|
||||
} else {
|
||||
LOGS_DEFAULT(ERROR) << "Run failed with status: " << status.ErrorMessage();
|
||||
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
|
||||
}
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Verify the outputs
|
||||
|
|
@ -515,7 +523,9 @@ void OpTester::Run(ExpectResult expect_result,
|
|||
|
||||
//if node is not registered for the provider, skip
|
||||
node.SetExecutionProviderType(provider_type);
|
||||
if (provider_type == onnxruntime::kNGraphExecutionProvider || provider_type == onnxruntime::kTensorrtExecutionProvider || provider_type == onnxruntime::kOpenVINOExecutionProvider)
|
||||
if (provider_type == onnxruntime::kNGraphExecutionProvider ||
|
||||
provider_type == onnxruntime::kTensorrtExecutionProvider ||
|
||||
provider_type == onnxruntime::kOpenVINOExecutionProvider)
|
||||
continue;
|
||||
auto reg = execution_provider->GetKernelRegistry();
|
||||
const KernelCreateInfo* kci = reg->TryFindKernel(node, execution_provider->Type());
|
||||
|
|
|
|||
|
|
@ -227,6 +227,13 @@ class OpTester {
|
|||
void SetOutputAbsErr(const char* name, float v);
|
||||
void SetOutputRelErr(const char* name, float v);
|
||||
|
||||
// Number of times to call InferenceSession::Run. The same feeds are used each time.
|
||||
// e.g. used to verify the generator ops behave as expected
|
||||
void SetNumRunCalls(int n) {
|
||||
ORT_ENFORCE(n > 0);
|
||||
num_run_calls_ = n;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void AddAttribute(std::string name, T value) {
|
||||
// Generate a the proper AddAttribute call for later
|
||||
|
|
@ -318,6 +325,7 @@ class OpTester {
|
|||
int opset_version_;
|
||||
bool add_shape_to_tensor_data_ = true;
|
||||
int add_symbolic_dim_to_tensor_data_ = -1;
|
||||
int num_run_calls_ = 1;
|
||||
std::vector<Data> input_data_;
|
||||
std::vector<Data> output_data_;
|
||||
std::vector<size_t> initializer_index_;
|
||||
|
|
|
|||
Loading…
Reference in a new issue