mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-28 22:56:32 +00:00
* Imlpement StringNormalizer Add mixed language tests, test case insentive path. * Create a locale on the fly. Default locale does not seem to create well. * Add CI language-pack-en to make default locale available. Catch and translate locale creation exception to make the message meaningful. * Make sure locales are configured on Ubuntu.
39 lines
891 B
C++
39 lines
891 B
C++
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
|
|
#pragma once
|
|
|
|
#include "core/framework/op_kernel.h"
|
|
|
|
#include <locale>
|
|
#include <string>
|
|
#include <unordered_set>
|
|
|
|
namespace onnxruntime {
|
|
namespace contrib {
|
|
|
|
class StringNormalizer : public OpKernel {
|
|
public:
|
|
enum CaseAction {
|
|
NONE = 0,
|
|
LOWER = 1,
|
|
UPPER = 2,
|
|
};
|
|
|
|
explicit StringNormalizer(const OpKernelInfo& info);
|
|
~StringNormalizer() = default;
|
|
|
|
Status Compute(OpKernelContext* ctx) const override;
|
|
|
|
private:
|
|
bool is_case_sensitive_;
|
|
CaseAction casechangeaction_;
|
|
CaseAction compare_caseaction_; // used for case-insensitive compare
|
|
std::string locale_name_;
|
|
// Either if these are populated but not both
|
|
std::unordered_set<std::string> stopwords_;
|
|
std::unordered_set<std::wstring> wstopwords_;
|
|
};
|
|
|
|
} // namespace contrib
|
|
} // namespace onnxruntime
|