diff --git a/onnxruntime/contrib_ops/cpu/tokenizer.cc b/onnxruntime/contrib_ops/cpu/tokenizer.cc index 36c99909c5..404b395c96 100644 --- a/onnxruntime/contrib_ops/cpu/tokenizer.cc +++ b/onnxruntime/contrib_ops/cpu/tokenizer.cc @@ -277,7 +277,6 @@ Tokenizer::Tokenizer(const OpKernelInfo& info) : OpKernel(info) { // Use tokenexp re2::RE2::Options options; options.set_longest_match(true); - options.set_posix_syntax(true); std::unique_ptr regex(new re2::RE2(tokenexp, options)); if (!regex->ok()) { ORT_THROW("Can not digest regex: ", regex->error()); diff --git a/onnxruntime/test/contrib_ops/tokenizer_test.cc b/onnxruntime/test/contrib_ops/tokenizer_test.cc index 8823b53eb2..0375b0f76e 100644 --- a/onnxruntime/test/contrib_ops/tokenizer_test.cc +++ b/onnxruntime/test/contrib_ops/tokenizer_test.cc @@ -809,5 +809,26 @@ TEST(ContribOpTest, TokenizerExpression_RegDot) { test.Run(OpTester::ExpectResult::kExpectSuccess); } +TEST(ContribOpTest, TokenizerExpression_RegChar) { + OpTester test("Tokenizer", opset_ver, domain); + const std::string tokenexp(u8"\\w"); + InitTestAttr(test, true, {}, 1, tokenexp); + + std::vector dims{1}; + std::vector input{u8"a;;;b"}; + test.AddInput("T", dims, input); + + std::vector output_dims(dims); + output_dims.push_back(int64_t(4)); + std::vector output{ + start_mark, + u8"a", + u8"b", + end_mark}; + + test.AddOutput("Y", output_dims, output); + test.Run(OpTester::ExpectResult::kExpectSuccess); +} + } // namespace test } // namespace onnxruntime