Fix memory leak by improper handling of std::string typed (#227)

output buffer. Tensor returns a buffer to fully constructed
  std::strings and we should treat them as such.
This commit is contained in:
Dmitri Smirnov 2018-12-19 17:46:21 -08:00 committed by GitHub
parent e97caa7787
commit 255ee39af6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 15 additions and 16 deletions

View file

@ -42,7 +42,7 @@ class Locale {
loc_ = _create_locale(LC_CTYPE, name.c_str());
if (loc_ == nullptr) {
ORT_THROW("Failed to construct locale with name:",
name, ":", ":Please, install necessary language-pack-XX and configure locales");
name, ":", ":Please, install necessary language-pack-XX and configure locales");
}
}
@ -78,7 +78,7 @@ class Locale {
explicit Locale(const std::string& name) try : loc_(name) {
} catch (const std::runtime_error& e) {
ORT_THROW("Failed to construct locale with name:",
name, ":", e.what(), ":Please, install necessary language-pack-XX and configure locales");
name, ":", e.what(), ":Please, install necessary language-pack-XX and configure locales");
}
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Locale);
@ -118,9 +118,8 @@ Status CopyCaseAction(ForwardIter first, ForwardIter end, OpKernelContext* ctx,
if (C == 0) {
output_dims.push_back(1);
TensorShape output_shape(output_dims);
auto output_ten = ctx->Output(0, output_shape);
auto output_default = output_ten->template MutableData<std::string>();
new (output_default) std::string();
// This will create one empty string
ctx->Output(0, output_shape);
return Status::OK();
}
@ -141,11 +140,11 @@ Status CopyCaseAction(ForwardIter first, ForwardIter end, OpKernelContext* ctx,
}
// In place transform
loc.ChangeCase(caseaction, wstr);
new (output_data + output_idx) std::string(converter.to_bytes(wstr));
*(output_data + output_idx) = converter.to_bytes(wstr);
} else {
assert(caseaction == StringNormalizer::NONE);
// Simple copy or move if the iterator points to a non-const string
new (output_data + output_idx) std::string(std::move(s));
*(output_data + output_idx) = std::move(s);
}
++output_idx;
++first;

View file

@ -218,7 +218,7 @@ Tokenizer::Tokenizer(const OpKernelInfo& info) : OpKernel(info) {
separators[0].empty());
ORT_ENFORCE(!char_tokenezation_ || mincharnum_ < 2,
"mincharnum is too big for char level tokenezation");
"mincharnum is too big for char level tokenezation");
// Create TST and insert separators
if (!char_tokenezation_) {
@ -284,7 +284,7 @@ Status Tokenizer::CharTokenize(OpKernelContext* ctx, size_t N, size_t C,
while (curr_input != last) {
const auto& s = *curr_input;
if (mark_) {
new (output_data + output_index) std::string(&start_text, 1);
(output_data + output_index)->assign(&start_text, 1);
++output_index;
}
size_t tokens = 0;
@ -295,20 +295,20 @@ Status Tokenizer::CharTokenize(OpKernelContext* ctx, size_t N, size_t C,
assert(result);
(void)result;
assert(token_idx + tlen <= str_len);
new (output_data + output_index) std::string(s.substr(token_idx, tlen));
*(output_data + output_index) = s.substr(token_idx, tlen);
++output_index;
token_idx += tlen;
++tokens;
}
if (mark_) {
new (output_data + output_index) std::string(&end_text, 1);
(output_data + output_index)->assign(&end_text, 1);
++output_index;
}
// Padding strings
assert(tokens + (mark_ * 2) <= max_tokens);
const size_t pads = max_tokens - (mark_ * 2) - tokens;
for (size_t p = 0; p < pads; ++p) {
new (output_data + output_index) std::string(pad_value_);
*(output_data + output_index) = pad_value_;
++output_index;
}
++curr_input;
@ -422,21 +422,21 @@ Status Tokenizer::SeparatorTokenize(OpKernelContext* ctx,
size_t c_idx = output_index;
#endif
if (mark_) {
new (output_data + output_index) std::string(&start_text, 1);
(output_data + output_index)->assign(&start_text, 1);
++output_index;
}
// Output tokens for this row
for (auto& token : row) {
new (output_data + output_index) std::string(converter.to_bytes(token));
*(output_data + output_index) = converter.to_bytes(token);
++output_index;
}
if (mark_) {
new (output_data + output_index) std::string(&end_text, 1);
(output_data + output_index)->assign(&end_text, 1);
++output_index;
}
const size_t pads = max_tokens - (mark_ * 2) - row.size();
for (size_t p = 0; p < pads; ++p) {
new (output_data + output_index) std::string(pad_value_);
*(output_data + output_index) = pad_value_;
++output_index;
}
#ifdef _DEBUG