mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-03 03:58:54 +00:00
Eliminate stray vector and the contention it creates (#20377)
### Description Unused vector allocating large memory chunk within a concurrent routine creates heap contention and is eliminated. ### Motivation and Context This partially addresses https://github.com/microsoft/onnxruntime/issues/20373.
This commit is contained in:
parent
4d98f06f93
commit
42b700d463
1 changed files with 9 additions and 4 deletions
|
|
@ -3,12 +3,13 @@
|
|||
|
||||
#include "tfidfvectorizer.h"
|
||||
#include "core/common/common.h"
|
||||
#include "core/common/inlined_containers.h"
|
||||
#include <core/common/safeint.h>
|
||||
#include "core/framework/tensor.h"
|
||||
#include "core/platform/threadpool.h"
|
||||
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
#include <core/common/safeint.h>
|
||||
#include <string_view>
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
|
|
@ -41,10 +42,15 @@ using NgramPartInt = NgramPart<int64_t>;
|
|||
using NgramPartString = NgramPart<std::string>;
|
||||
|
||||
// Avoid recursive class definitions using unique_ptr + forward declaration
|
||||
using IntMap = std::unordered_map<int64_t, std::unique_ptr<NgramPartInt>>;
|
||||
using IntMap = InlinedHashMap<int64_t, std::unique_ptr<NgramPartInt>>;
|
||||
|
||||
#ifndef DISABLE_ABSEIL
|
||||
using StrMap = absl::flat_hash_map<std::reference_wrapper<const std::string>, std::unique_ptr<NgramPartString>,
|
||||
std::hash<std::string>, std::equal_to<std::string>>;
|
||||
#else
|
||||
using StrMap = std::unordered_map<std::reference_wrapper<const std::string>, std::unique_ptr<NgramPartString>,
|
||||
std::hash<std::string>, std::equal_to<std::string>>;
|
||||
#endif
|
||||
|
||||
template <>
|
||||
struct NgramPart<int64_t> {
|
||||
|
|
@ -412,7 +418,6 @@ Status TfIdfVectorizer::Compute(OpKernelContext* ctx) const {
|
|||
is_input_string, num_batches, num_rows, &fn_weight](ptrdiff_t batch_num) {
|
||||
// Frequency holder allocate [B..output_size_] and init all to zero.
|
||||
auto work = concurrency::ThreadPool::PartitionWork(batch_num, num_batches, static_cast<size_t>(num_rows));
|
||||
std::vector<uint32_t> frequencies(this->impl_->output_size_);
|
||||
for (auto row_num = work.start; row_num < work.end; ++row_num) {
|
||||
auto out = gsl::span<float>(output_data + row_num * this->impl_->output_size_, this->impl_->output_size_);
|
||||
std::fill(out.begin(), out.end(), 0.0f);
|
||||
|
|
|
|||
Loading…
Reference in a new issue