onnxruntime/onnxruntime/contrib_ops/cpu/string_normalizer.h
Dmitri Smirnov fbb23a9ed0
Implement StringNormalizer (#69)
* Imlpement StringNormalizer
  Add mixed language tests, test case insentive path.
* Create a locale on the fly. Default locale does not seem to create well.
* Add CI language-pack-en to make default locale available.
  Catch and translate locale creation exception to make the message
  meaningful.
* Make sure locales are configured on Ubuntu.
2018-12-04 13:47:08 -08:00

39 lines
891 B
C++

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include "core/framework/op_kernel.h"
#include <locale>
#include <string>
#include <unordered_set>
namespace onnxruntime {
namespace contrib {
class StringNormalizer : public OpKernel {
public:
enum CaseAction {
NONE = 0,
LOWER = 1,
UPPER = 2,
};
explicit StringNormalizer(const OpKernelInfo& info);
~StringNormalizer() = default;
Status Compute(OpKernelContext* ctx) const override;
private:
bool is_case_sensitive_;
CaseAction casechangeaction_;
CaseAction compare_caseaction_; // used for case-insensitive compare
std::string locale_name_;
// Either if these are populated but not both
std::unordered_set<std::string> stopwords_;
std::unordered_set<std::wstring> wstopwords_;
};
} // namespace contrib
} // namespace onnxruntime