mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-26 22:35:43 +00:00
Fix memory leak by improper handling of std::string typed (#227)
output buffer. Tensor returns a buffer to fully constructed std::strings and we should treat them as such.
This commit is contained in:
parent
e97caa7787
commit
255ee39af6
2 changed files with 15 additions and 16 deletions
|
|
@ -42,7 +42,7 @@ class Locale {
|
|||
loc_ = _create_locale(LC_CTYPE, name.c_str());
|
||||
if (loc_ == nullptr) {
|
||||
ORT_THROW("Failed to construct locale with name:",
|
||||
name, ":", ":Please, install necessary language-pack-XX and configure locales");
|
||||
name, ":", ":Please, install necessary language-pack-XX and configure locales");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -78,7 +78,7 @@ class Locale {
|
|||
explicit Locale(const std::string& name) try : loc_(name) {
|
||||
} catch (const std::runtime_error& e) {
|
||||
ORT_THROW("Failed to construct locale with name:",
|
||||
name, ":", e.what(), ":Please, install necessary language-pack-XX and configure locales");
|
||||
name, ":", e.what(), ":Please, install necessary language-pack-XX and configure locales");
|
||||
}
|
||||
|
||||
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Locale);
|
||||
|
|
@ -118,9 +118,8 @@ Status CopyCaseAction(ForwardIter first, ForwardIter end, OpKernelContext* ctx,
|
|||
if (C == 0) {
|
||||
output_dims.push_back(1);
|
||||
TensorShape output_shape(output_dims);
|
||||
auto output_ten = ctx->Output(0, output_shape);
|
||||
auto output_default = output_ten->template MutableData<std::string>();
|
||||
new (output_default) std::string();
|
||||
// This will create one empty string
|
||||
ctx->Output(0, output_shape);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
@ -141,11 +140,11 @@ Status CopyCaseAction(ForwardIter first, ForwardIter end, OpKernelContext* ctx,
|
|||
}
|
||||
// In place transform
|
||||
loc.ChangeCase(caseaction, wstr);
|
||||
new (output_data + output_idx) std::string(converter.to_bytes(wstr));
|
||||
*(output_data + output_idx) = converter.to_bytes(wstr);
|
||||
} else {
|
||||
assert(caseaction == StringNormalizer::NONE);
|
||||
// Simple copy or move if the iterator points to a non-const string
|
||||
new (output_data + output_idx) std::string(std::move(s));
|
||||
*(output_data + output_idx) = std::move(s);
|
||||
}
|
||||
++output_idx;
|
||||
++first;
|
||||
|
|
|
|||
|
|
@ -218,7 +218,7 @@ Tokenizer::Tokenizer(const OpKernelInfo& info) : OpKernel(info) {
|
|||
separators[0].empty());
|
||||
|
||||
ORT_ENFORCE(!char_tokenezation_ || mincharnum_ < 2,
|
||||
"mincharnum is too big for char level tokenezation");
|
||||
"mincharnum is too big for char level tokenezation");
|
||||
|
||||
// Create TST and insert separators
|
||||
if (!char_tokenezation_) {
|
||||
|
|
@ -284,7 +284,7 @@ Status Tokenizer::CharTokenize(OpKernelContext* ctx, size_t N, size_t C,
|
|||
while (curr_input != last) {
|
||||
const auto& s = *curr_input;
|
||||
if (mark_) {
|
||||
new (output_data + output_index) std::string(&start_text, 1);
|
||||
(output_data + output_index)->assign(&start_text, 1);
|
||||
++output_index;
|
||||
}
|
||||
size_t tokens = 0;
|
||||
|
|
@ -295,20 +295,20 @@ Status Tokenizer::CharTokenize(OpKernelContext* ctx, size_t N, size_t C,
|
|||
assert(result);
|
||||
(void)result;
|
||||
assert(token_idx + tlen <= str_len);
|
||||
new (output_data + output_index) std::string(s.substr(token_idx, tlen));
|
||||
*(output_data + output_index) = s.substr(token_idx, tlen);
|
||||
++output_index;
|
||||
token_idx += tlen;
|
||||
++tokens;
|
||||
}
|
||||
if (mark_) {
|
||||
new (output_data + output_index) std::string(&end_text, 1);
|
||||
(output_data + output_index)->assign(&end_text, 1);
|
||||
++output_index;
|
||||
}
|
||||
// Padding strings
|
||||
assert(tokens + (mark_ * 2) <= max_tokens);
|
||||
const size_t pads = max_tokens - (mark_ * 2) - tokens;
|
||||
for (size_t p = 0; p < pads; ++p) {
|
||||
new (output_data + output_index) std::string(pad_value_);
|
||||
*(output_data + output_index) = pad_value_;
|
||||
++output_index;
|
||||
}
|
||||
++curr_input;
|
||||
|
|
@ -422,21 +422,21 @@ Status Tokenizer::SeparatorTokenize(OpKernelContext* ctx,
|
|||
size_t c_idx = output_index;
|
||||
#endif
|
||||
if (mark_) {
|
||||
new (output_data + output_index) std::string(&start_text, 1);
|
||||
(output_data + output_index)->assign(&start_text, 1);
|
||||
++output_index;
|
||||
}
|
||||
// Output tokens for this row
|
||||
for (auto& token : row) {
|
||||
new (output_data + output_index) std::string(converter.to_bytes(token));
|
||||
*(output_data + output_index) = converter.to_bytes(token);
|
||||
++output_index;
|
||||
}
|
||||
if (mark_) {
|
||||
new (output_data + output_index) std::string(&end_text, 1);
|
||||
(output_data + output_index)->assign(&end_text, 1);
|
||||
++output_index;
|
||||
}
|
||||
const size_t pads = max_tokens - (mark_ * 2) - row.size();
|
||||
for (size_t p = 0; p < pads; ++p) {
|
||||
new (output_data + output_index) std::string(pad_value_);
|
||||
*(output_data + output_index) = pad_value_;
|
||||
++output_index;
|
||||
}
|
||||
#ifdef _DEBUG
|
||||
|
|
|
|||
Loading…
Reference in a new issue