Eliminate stray vector and the contention it creates (#20377)

### Description
Unused vector allocating large memory chunk within a concurrent routine
creates heap contention and is eliminated.

### Motivation and Context
This partially addresses
https://github.com/microsoft/onnxruntime/issues/20373.
This commit is contained in:
Dmitri Smirnov 2024-04-19 10:27:42 -07:00 committed by GitHub
parent 4d98f06f93
commit 42b700d463
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -3,12 +3,13 @@
#include "tfidfvectorizer.h"
#include "core/common/common.h"
#include "core/common/inlined_containers.h"
#include <core/common/safeint.h>
#include "core/framework/tensor.h"
#include "core/platform/threadpool.h"
#include <functional>
#include <unordered_map>
#include <core/common/safeint.h>
#include <string_view>
namespace onnxruntime {
@ -41,10 +42,15 @@ using NgramPartInt = NgramPart<int64_t>;
using NgramPartString = NgramPart<std::string>;
// Avoid recursive class definitions using unique_ptr + forward declaration
using IntMap = std::unordered_map<int64_t, std::unique_ptr<NgramPartInt>>;
using IntMap = InlinedHashMap<int64_t, std::unique_ptr<NgramPartInt>>;
#ifndef DISABLE_ABSEIL
using StrMap = absl::flat_hash_map<std::reference_wrapper<const std::string>, std::unique_ptr<NgramPartString>,
std::hash<std::string>, std::equal_to<std::string>>;
#else
using StrMap = std::unordered_map<std::reference_wrapper<const std::string>, std::unique_ptr<NgramPartString>,
std::hash<std::string>, std::equal_to<std::string>>;
#endif
template <>
struct NgramPart<int64_t> {
@ -412,7 +418,6 @@ Status TfIdfVectorizer::Compute(OpKernelContext* ctx) const {
is_input_string, num_batches, num_rows, &fn_weight](ptrdiff_t batch_num) {
// Frequency holder allocate [B..output_size_] and init all to zero.
auto work = concurrency::ThreadPool::PartitionWork(batch_num, num_batches, static_cast<size_t>(num_rows));
std::vector<uint32_t> frequencies(this->impl_->output_size_);
for (auto row_num = work.start; row_num < work.end; ++row_num) {
auto out = gsl::span<float>(output_data + row_num * this->impl_->output_size_, this->impl_->output_size_);
std::fill(out.begin(), out.end(), 0.0f);