mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-18 21:21:17 +00:00
* remove posix option * add unit test for regular expression
This commit is contained in:
parent
da9af592d9
commit
4cc7121368
2 changed files with 21 additions and 1 deletions
|
|
@ -277,7 +277,6 @@ Tokenizer::Tokenizer(const OpKernelInfo& info) : OpKernel(info) {
|
|||
// Use tokenexp
|
||||
re2::RE2::Options options;
|
||||
options.set_longest_match(true);
|
||||
options.set_posix_syntax(true);
|
||||
std::unique_ptr<re2::RE2> regex(new re2::RE2(tokenexp, options));
|
||||
if (!regex->ok()) {
|
||||
ORT_THROW("Can not digest regex: ", regex->error());
|
||||
|
|
|
|||
|
|
@ -809,5 +809,26 @@ TEST(ContribOpTest, TokenizerExpression_RegDot) {
|
|||
test.Run(OpTester::ExpectResult::kExpectSuccess);
|
||||
}
|
||||
|
||||
TEST(ContribOpTest, TokenizerExpression_RegChar) {
|
||||
OpTester test("Tokenizer", opset_ver, domain);
|
||||
const std::string tokenexp(u8"\\w");
|
||||
InitTestAttr(test, true, {}, 1, tokenexp);
|
||||
|
||||
std::vector<int64_t> dims{1};
|
||||
std::vector<std::string> input{u8"a;;;b"};
|
||||
test.AddInput<std::string>("T", dims, input);
|
||||
|
||||
std::vector<int64_t> output_dims(dims);
|
||||
output_dims.push_back(int64_t(4));
|
||||
std::vector<std::string> output{
|
||||
start_mark,
|
||||
u8"a",
|
||||
u8"b",
|
||||
end_mark};
|
||||
|
||||
test.AddOutput<std::string>("Y", output_dims, output);
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess);
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
Loading…
Reference in a new issue