diff --git a/onnxruntime/contrib_ops/cpu/string_normalizer.cc b/onnxruntime/contrib_ops/cpu/string_normalizer.cc index 2b76a7bbb4..a20eaade10 100644 --- a/onnxruntime/contrib_ops/cpu/string_normalizer.cc +++ b/onnxruntime/contrib_ops/cpu/string_normalizer.cc @@ -42,7 +42,7 @@ class Locale { loc_ = _create_locale(LC_CTYPE, name.c_str()); if (loc_ == nullptr) { ORT_THROW("Failed to construct locale with name:", - name, ":", ":Please, install necessary language-pack-XX and configure locales"); + name, ":", ":Please, install necessary language-pack-XX and configure locales"); } } @@ -78,7 +78,7 @@ class Locale { explicit Locale(const std::string& name) try : loc_(name) { } catch (const std::runtime_error& e) { ORT_THROW("Failed to construct locale with name:", - name, ":", e.what(), ":Please, install necessary language-pack-XX and configure locales"); + name, ":", e.what(), ":Please, install necessary language-pack-XX and configure locales"); } ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Locale); @@ -118,9 +118,8 @@ Status CopyCaseAction(ForwardIter first, ForwardIter end, OpKernelContext* ctx, if (C == 0) { output_dims.push_back(1); TensorShape output_shape(output_dims); - auto output_ten = ctx->Output(0, output_shape); - auto output_default = output_ten->template MutableData(); - new (output_default) std::string(); + // This will create one empty string + ctx->Output(0, output_shape); return Status::OK(); } @@ -141,11 +140,11 @@ Status CopyCaseAction(ForwardIter first, ForwardIter end, OpKernelContext* ctx, } // In place transform loc.ChangeCase(caseaction, wstr); - new (output_data + output_idx) std::string(converter.to_bytes(wstr)); + *(output_data + output_idx) = converter.to_bytes(wstr); } else { assert(caseaction == StringNormalizer::NONE); // Simple copy or move if the iterator points to a non-const string - new (output_data + output_idx) std::string(std::move(s)); + *(output_data + output_idx) = std::move(s); } ++output_idx; ++first; diff --git a/onnxruntime/contrib_ops/cpu/tokenizer.cc b/onnxruntime/contrib_ops/cpu/tokenizer.cc index 90aaeb9ab8..4c31ec20e5 100644 --- a/onnxruntime/contrib_ops/cpu/tokenizer.cc +++ b/onnxruntime/contrib_ops/cpu/tokenizer.cc @@ -218,7 +218,7 @@ Tokenizer::Tokenizer(const OpKernelInfo& info) : OpKernel(info) { separators[0].empty()); ORT_ENFORCE(!char_tokenezation_ || mincharnum_ < 2, - "mincharnum is too big for char level tokenezation"); + "mincharnum is too big for char level tokenezation"); // Create TST and insert separators if (!char_tokenezation_) { @@ -284,7 +284,7 @@ Status Tokenizer::CharTokenize(OpKernelContext* ctx, size_t N, size_t C, while (curr_input != last) { const auto& s = *curr_input; if (mark_) { - new (output_data + output_index) std::string(&start_text, 1); + (output_data + output_index)->assign(&start_text, 1); ++output_index; } size_t tokens = 0; @@ -295,20 +295,20 @@ Status Tokenizer::CharTokenize(OpKernelContext* ctx, size_t N, size_t C, assert(result); (void)result; assert(token_idx + tlen <= str_len); - new (output_data + output_index) std::string(s.substr(token_idx, tlen)); + *(output_data + output_index) = s.substr(token_idx, tlen); ++output_index; token_idx += tlen; ++tokens; } if (mark_) { - new (output_data + output_index) std::string(&end_text, 1); + (output_data + output_index)->assign(&end_text, 1); ++output_index; } // Padding strings assert(tokens + (mark_ * 2) <= max_tokens); const size_t pads = max_tokens - (mark_ * 2) - tokens; for (size_t p = 0; p < pads; ++p) { - new (output_data + output_index) std::string(pad_value_); + *(output_data + output_index) = pad_value_; ++output_index; } ++curr_input; @@ -422,21 +422,21 @@ Status Tokenizer::SeparatorTokenize(OpKernelContext* ctx, size_t c_idx = output_index; #endif if (mark_) { - new (output_data + output_index) std::string(&start_text, 1); + (output_data + output_index)->assign(&start_text, 1); ++output_index; } // Output tokens for this row for (auto& token : row) { - new (output_data + output_index) std::string(converter.to_bytes(token)); + *(output_data + output_index) = converter.to_bytes(token); ++output_index; } if (mark_) { - new (output_data + output_index) std::string(&end_text, 1); + (output_data + output_index)->assign(&end_text, 1); ++output_index; } const size_t pads = max_tokens - (mark_ * 2) - row.size(); for (size_t p = 0; p < pads; ++p) { - new (output_data + output_index) std::string(pad_value_); + *(output_data + output_index) = pad_value_; ++output_index; } #ifdef _DEBUG