From 829b2a5e8104477df18e3f18902b1bd2b694d826 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <yuslepukhin@users.noreply.github.com>
Date: Thu, 24 Jan 2019 10:11:26 -0800
Subject: [PATCH] Promote TfIdfvectorizer to ONNX ver 9 (#373)

* Advance ONNX commit, move Ngram files under ONNX and rename to TfIdfVectorizer

* Rename Ngram to TfIdfVectorizer and redeclare in ONNX domain

* Restore tfidfvectorizer tests

* Remove ML definition.
---
 onnxruntime/contrib_ops/contrib_kernels.cc    |   6 -
 .../core/graph/contrib_ops/contrib_defs.cc    | 177 +++---------------
 .../providers/cpu/cpu_execution_provider.cc   |   6 +
 .../providers/cpu/nn/tfidfvectorizer.cc}      |  56 +++---
 .../providers/cpu/nn/tfidfvectorizer.h}       |  10 +-
 onnxruntime/test/onnx/main.cc                 |   9 +-
 .../cpu/nn/tfidfvectorizer_test.cc}           |  94 +++++-----
 7 files changed, 106 insertions(+), 252 deletions(-)
 rename onnxruntime/{contrib_ops/cpu/ngram.cc => core/providers/cpu/nn/tfidfvectorizer.cc} (93%)
 rename onnxruntime/{contrib_ops/cpu/ngram.h => core/providers/cpu/nn/tfidfvectorizer.h} (76%)
 rename onnxruntime/test/{contrib_ops/ngram_test.cc => providers/cpu/nn/tfidfvectorizer_test.cc} (87%)

diff --git a/onnxruntime/contrib_ops/contrib_kernels.cc b/onnxruntime/contrib_ops/contrib_kernels.cc
index 27f38ee417..1cf3b39c67 100644
--- a/onnxruntime/contrib_ops/contrib_kernels.cc
+++ b/onnxruntime/contrib_ops/contrib_kernels.cc
@@ -13,9 +13,6 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1,
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedGemm);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, AttnLSTM);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, string, Tokenizer);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, string, Ngram);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int32_t, Ngram);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int64_t, Ngram);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, DequantizeLinear);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, DequantizeLinear);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, QuantizeLinear);
@@ -42,9 +39,6 @@ void RegisterContribKernels(KernelRegistry& kernel_registry) {
   kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedGemm)>());
   kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, AttnLSTM)>());
   kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, string, Tokenizer)>());
-  kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, string, Ngram)>());
-  kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int32_t, Ngram)>());
-  kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int64_t, Ngram)>());
   kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, DequantizeLinear)>());
   kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, DequantizeLinear)>());
   kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, QuantizeLinear)>());
diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
index 6d1ac0c700..596465000e 100644
--- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
@@ -275,28 +275,28 @@ activation and leaky_relu_alpha.)DOC")
           "",
           AttributeProto::FLOAT,
           OPTIONAL)
-       .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
-         propagateElemTypeFromInputToOutput(ctx, 0, 0);
-         if (hasNInputShapes(ctx, 2)) {
-           auto transAAttr = ctx.getAttribute("transA");
-           bool transA =
-               transAAttr ? static_cast<int>(transAAttr->i()) != 0 : false;
-           auto transBAttr = ctx.getAttribute("transB");
-           bool transB =
-               transBAttr ? static_cast<int>(transBAttr->i()) != 0 : false;
-           auto& first_input_shape = getInputShape(ctx, 0);
-           auto& second_input_shape = getInputShape(ctx, 1);
-           if (first_input_shape.dim_size() != 2)
-             fail_shape_inference("First input does not have rank 2");
-           if (second_input_shape.dim_size() != 2)
-             fail_shape_inference("Second input does not have rank 2");
-           updateOutputShape(
-               ctx,
-               0,
-               {first_input_shape.dim(transA ? 1 : 0),
-                second_input_shape.dim(transB ? 0 : 1)});
-         }
-       });
+      .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+        propagateElemTypeFromInputToOutput(ctx, 0, 0);
+        if (hasNInputShapes(ctx, 2)) {
+          auto transAAttr = ctx.getAttribute("transA");
+          bool transA =
+              transAAttr ? static_cast<int>(transAAttr->i()) != 0 : false;
+          auto transBAttr = ctx.getAttribute("transB");
+          bool transB =
+              transBAttr ? static_cast<int>(transBAttr->i()) != 0 : false;
+          auto& first_input_shape = getInputShape(ctx, 0);
+          auto& second_input_shape = getInputShape(ctx, 1);
+          if (first_input_shape.dim_size() != 2)
+            fail_shape_inference("First input does not have rank 2");
+          if (second_input_shape.dim_size() != 2)
+            fail_shape_inference("Second input does not have rank 2");
+          updateOutputShape(
+              ctx,
+              0,
+              {first_input_shape.dim(transA ? 1 : 0),
+               second_input_shape.dim(transB ? 0 : 1)});
+        }
+      });
 
   ONNX_CONTRIB_OPERATOR_SCHEMA(ExpandDims)
       .SetDomain(kMSDomain)
@@ -374,139 +374,6 @@ activation and leaky_relu_alpha.)DOC")
       })
       .SetDoc(R"DOC(Tokenizer divides each string in X into a vector of strings along the last axis. All input strings including attributes are UTF-8 encoded.)DOC");
 
-  ONNX_CONTRIB_OPERATOR_SCHEMA(Ngram)
-      .SetDomain(kMSDomain)
-      .SinceVersion(1)
-      .Input(0, "X", "Input for n-gram extraction", "T")
-      .Output(0, "Y", "Ngram results", "T1")
-      .TypeConstraint(
-          "T",
-          {"tensor(string)", "tensor(int32)", "tensor(int64)"},
-          "Input is ether string UTF-8 or int32/int64")
-      .TypeConstraint(
-          "T1",
-          {"tensor(float)"},
-          "1-D tensor of floats")
-      .Attr(
-          "max_gram_length",
-          "Maximum n-gram length. If this value is 3, 3-grams will be used to generate the output.",
-          AttributeProto::INT)
-      .Attr(
-          "min_gram_length",
-          "Minimum n-gram length. If this value is 2 and max_gram_length is 3, output may contain counts of 2-grams and 3-grams.",
-          AttributeProto::INT)
-      .Attr(
-          "max_skip_count",
-          "Maximum number of items (integers/strings) to be skipped when constructing an n-gram from X."
-          "If max_skip_count=1, min_gram_length=2, max_gram_length=3, this operator may generate 2-grams"
-          "with skip_count=0 and skip_count=1, and 3-grams with skip_count=0 and skip_count=1",
-          AttributeProto::INT)
-      .Attr(
-          "pool_strings",
-          "List of strings n-grams learned from the training set. Either this or pool_int64s attributes must be present but not both."
-          "It's an 1-D tensor starting with the collections of all 1-grams and ending with the collections of n-grams."
-          "The i-th element in pool stores the n-gram that should be mapped to index ngram_indexes[i] in the output vector.",
-          AttributeProto::STRINGS,
-          OPTIONAL)
-      .Attr(
-          "pool_int64s",
-          "List of int64 n-grams learned from the training set. Either this or pool_strings attributes must be present but not both."
-          "It's an 1-D tensor starting with the collections of all 1-grams and ending with the collections of n-grams."
-          "The i-th element in pool stores the n-gram that should be mapped to index ngram_indexes[i] in the output vector.",
-          AttributeProto::INTS,
-          OPTIONAL)
-      .Attr(
-          "ngram_counts",
-          "The starting indexes of 1-grams, 2-grams, and so on in pool."
-          "It is useful when determining the boundary between two consecutive collections of n-grams."
-          "For example, if ngram_counts is [0, 17, 36], the first index (zero-based) of 1-gram/2-gram/3-gram"
-          "in pool are 0/17/36. This format is essentially identical to CSR (or CSC) sparse matrix format, "
-          "and we choose to keep this due to its popularity.",
-          AttributeProto::INTS)
-      .Attr(
-          "ngram_indexes",
-          "list of int64s (type: AttributeProto::INTS). This list is parallel to the specified 'pool_*' attribute."
-          "The i-th element in ngram_indexes indicate the coordinate of the i-th n-gram in the output tensor.",
-          AttributeProto::INTS)
-      .Attr(
-          "weights",
-          "list of floats. This attribute stores the weight of each n-gram in pool. The i-th element in weights"
-          "is the weight of the i-th n-gram in pool. Its length equals to the size of ngram_indexes."
-          "By default, weights is an all-one tensor.This attribute is used when mode is \"IDF\" or \"TFIDF\""
-          "to scale the associated word counts.",
-          AttributeProto::FLOATS,
-          OPTIONAL)
-      .Attr(
-          "mode",
-          "The weighting criteria. It can be one of \"TF\" (term frequency),"
-          "\"IDF\" (inverse document frequency), and \"TFIDF\" (the combination of TF and IDF)",
-          AttributeProto::STRING)
-      .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
-        auto output_elem_type = ctx.getOutputType(0)->mutable_tensor_type();
-        output_elem_type->set_elem_type(ONNX_NAMESPACE::TensorProto::FLOAT);
-
-        if (hasInputShape(ctx, 0)) {
-          std::vector<int64_t> ngram_indexes;
-          ONNX_NAMESPACE::getRepeatedAttribute(ctx, "ngram_indexes", ngram_indexes);
-          if (ngram_indexes.empty() || !std::all_of(ngram_indexes.cbegin(), ngram_indexes.cend(),
-                                                    [](int64_t i) { return i >= 0; })) {
-            fail_shape_inference(
-                "ngram_indexes must be non-empty with no negative values");
-          }
-
-          auto greatest_hit = std::max_element(ngram_indexes.cbegin(), ngram_indexes.cend());
-          auto max_last_axis = *greatest_hit + 1;
-
-          ONNX_NAMESPACE::TensorShapeProto output_shape;
-          auto& input_shape = ctx.getInputType(0)->tensor_type().shape();
-          auto dim_size = input_shape.dim_size();
-          if (dim_size == 0 || dim_size == 1) {
-            output_shape.add_dim()->set_dim_value(max_last_axis);
-          } else if (dim_size == 2) {
-            auto& B_dim = input_shape.dim(0);
-            if (!B_dim.has_dim_value()) {
-              fail_shape_inference(
-                  "Input shape does not have first dimension value");
-            }
-            output_shape.add_dim()->set_dim_value(B_dim.dim_value());
-            output_shape.add_dim()->set_dim_value(max_last_axis);
-          } else {
-            fail_shape_inference(
-                "Input shape must have either [C] or [B,C] dimensions where C > 0 and B > 0");
-          }
-          updateOutputShape(ctx, 0, output_shape);
-        }
-      })
-      .SetDoc(R"DOC(
-This transform extracts n-grams from the input sequence and save them as a vector. Input can
-be either a 1-D or 2-D tensor. For 1-D input, output is the n-gram representation of that input.
-For 2-D input, the output is also a  2-D tensor whose i-th row is the n-gram representation of the i-th input row.
-More specifically, if input shape is [C], the corresponding output shape would be [max(ngram_indexes) + 1].
-If input shape is [N, C], this operator produces a [N, max(ngram_indexes) + 1]-tensor.
-
-In contrast to standard n-gram extraction, here, the indexes of extracting an n-gram from the original
-sequence are not necessarily consecutive numbers. The discontinuity between indexes are controlled by the number of skips.
-If the number of skips is 2, we should skip two tokens when scanning through the original sequence.
-Let's consider an example. Assume that input sequence is [94, 17, 36, 12, 28] and the number of skips is 2.
-The associated 2-grams are [94, 12] and [17, 28] respectively indexed by [0, 3] and [1, 4].
-If the number of skips becomes 0, the 2-grams generated are [94, 17], [17, 36], [36, 12], [12, 28]
-indexed by [0, 1], [1, 2], [2, 3], [3, 4], respectively.
-
-The output vector stores the count of each n-gram;
-Y[i] indicates the times that the i-th n-gram is found. The attribute ngram_indexes is used to determine the mapping
-between index i and the corresponding n-gram. If pool_int64s is [94 , 17 ,17, 36], ngram_indexes is [1, 0],
-ngram_counts=[0, 0], then the Y[0] (first element in Y) and Y[1] (second element in Y) are the counts of [17, 36] and [94, 17],
-respectively. An n-gram which cannot be found in pool_strings/pool_int64s should be ignored and has no effect on the output.
-Note that we may consider all skips up to S when generating the n-grams.
-
-The examples used above are true if mode is "TF". If mode is "IDF", all the counts larger than 1 would be truncated to 1 and
-the i-th element in weights would be used to scale (by multiplication) the count of the i-th n-gram in pool. If mode is "TFIDF",
-this operator first computes the counts of all n-grams and then scale them by the associated values in the weights attribute.
-
-Only one of pool_strings and pool_int64s can be set. If pool_int64s is set, the input should be an integer tensor.
-If pool_strings is set, the input must be a string tensor.
-)DOC");
-
   // Operators for linear 8 bit quanitzation support.
   ONNX_CONTRIB_OPERATOR_SCHEMA(QuantizeLinear)
       .SetDomain(kMSDomain)
diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
index 8355c7c7dc..c13ff4ca23 100644
--- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
+++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
@@ -248,6 +248,9 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Asi
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Acosh);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Atanh);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Scan);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, string, TfIdfVectorizer);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, int32_t, TfIdfVectorizer);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, int64_t, TfIdfVectorizer);
 
 void RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
   kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, Clip)>());
@@ -489,6 +492,9 @@ void RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
   kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Acosh)>());
   kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Atanh)>());
   kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Scan)>());
+  kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, string, TfIdfVectorizer)>());
+  kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, int32_t, TfIdfVectorizer)>());
+  kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, int64_t, TfIdfVectorizer)>());
 }
 
 // Forward declarations of ml op kernels
diff --git a/onnxruntime/contrib_ops/cpu/ngram.cc b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc
similarity index 93%
rename from onnxruntime/contrib_ops/cpu/ngram.cc
rename to onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc
index 74117806f5..d927e927cc 100644
--- a/onnxruntime/contrib_ops/cpu/ngram.cc
+++ b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include "ngram.h"
+#include "tfidfvectorizer.h"
 #include "onnx/defs/schema.h"
 #include "core/common/common.h"
 #include "core/framework/tensor.h"
@@ -12,34 +12,33 @@
 #include <iterator>
 
 namespace onnxruntime {
-namespace contrib {
 
-ONNX_CPU_OPERATOR_TYPED_MS_KERNEL(
-    Ngram,
-    1,
+ONNX_CPU_OPERATOR_TYPED_KERNEL(
+    TfIdfVectorizer,
+    9,
     string,
     KernelDefBuilder()
         .TypeConstraint("T", DataTypeImpl::GetTensorType<std::string>())
         .TypeConstraint("T1", DataTypeImpl::GetTensorType<float>()),
-    contrib::Ngram);
+    TfIdfVectorizer);
 
-ONNX_CPU_OPERATOR_TYPED_MS_KERNEL(
-    Ngram,
-    1,
+ONNX_CPU_OPERATOR_TYPED_KERNEL(
+    TfIdfVectorizer,
+    9,
     int32_t,
     KernelDefBuilder()
         .TypeConstraint("T", DataTypeImpl::GetTensorType<int32_t>())
         .TypeConstraint("T1", DataTypeImpl::GetTensorType<float>()),
-    contrib::Ngram);
+    TfIdfVectorizer);
 
-ONNX_CPU_OPERATOR_TYPED_MS_KERNEL(
-    Ngram,
-    1,
+ONNX_CPU_OPERATOR_TYPED_KERNEL(
+    TfIdfVectorizer,
+    9,
     int64_t,
     KernelDefBuilder()
         .TypeConstraint("T", DataTypeImpl::GetTensorType<int64_t>())
         .TypeConstraint("T1", DataTypeImpl::GetTensorType<float>()),
-    contrib::Ngram);
+    TfIdfVectorizer);
 
 namespace ngram_details {
 
@@ -169,10 +168,9 @@ inline void Emplace(ForwardIter first, size_t ngrams, size_t ngram_size, size_t&
 }
 
 }  // namespace ngram_details
-}  // namespace contrib
 }  // namespace onnxruntime
 
-using namespace onnxruntime::contrib::ngram_details;
+using namespace onnxruntime::ngram_details;
 
 namespace std {
 template <typename T>
@@ -186,7 +184,6 @@ struct hash<NgramEntry<T>> {
 }  // namespace std
 
 namespace onnxruntime {
-namespace contrib {
 
 // The weighting criteria.
 // "TF"(term frequency),
@@ -206,7 +203,7 @@ enum WeightingCriteria {
   kTFIDF = 3
 };
 
-struct Ngram::Impl {
+struct TfIdfVectorizer::Impl {
   WeightingCriteria weighting_criteria_ = kNone;
   int64_t max_gram_length_ = 0;
   int64_t min_gram_length_ = 0;
@@ -251,36 +248,36 @@ struct Ngram::Impl {
 };
 
 template <>
-inline auto Ngram::Impl::PoolEnd<int64_t>() const {
+inline auto TfIdfVectorizer::Impl::PoolEnd<int64_t>() const {
   return int64_set_.cend();
 }
 
 template <>
-inline auto Ngram::Impl::PoolEnd<int32_t>() const {
+inline auto TfIdfVectorizer::Impl::PoolEnd<int32_t>() const {
   return PoolEnd<int64_t>();
 }
 
 template <>
-inline auto Ngram::Impl::PoolEnd<std::string>() const {
+inline auto TfIdfVectorizer::Impl::PoolEnd<std::string>() const {
   return str_set_.cend();
 }
 
 template <>
-inline auto Ngram::Impl::PoolFind<int64_t>(const NgramEntry<int64_t>& i) const {
+inline auto TfIdfVectorizer::Impl::PoolFind<int64_t>(const NgramEntry<int64_t>& i) const {
   return int64_set_.find(i);
 }
 
 template <>
-inline auto Ngram::Impl::PoolFind<int32_t>(const NgramEntry<int32_t>& i) const {
+inline auto TfIdfVectorizer::Impl::PoolFind<int32_t>(const NgramEntry<int32_t>& i) const {
   return int64_set_.find(i);
 }
 
 template <>
-inline auto Ngram::Impl::PoolFind<std::string>(const NgramEntry<std::string>& i) const {
+inline auto TfIdfVectorizer::Impl::PoolFind<std::string>(const NgramEntry<std::string>& i) const {
   return str_set_.find(i);
 }
 
-Ngram::Ngram(const OpKernelInfo& info) : OpKernel(info), impl_(new Impl) {
+TfIdfVectorizer::TfIdfVectorizer(const OpKernelInfo& info) : OpKernel(info), impl_(new Impl) {
   std::string mode;
   Status status = info.GetAttr("mode", &mode);
   ORT_ENFORCE(status.IsOK(), "mode is required");
@@ -381,10 +378,10 @@ Ngram::Ngram(const OpKernelInfo& info) : OpKernel(info), impl_(new Impl) {
   }
 }
 
-Ngram::~Ngram() {
+TfIdfVectorizer::~TfIdfVectorizer() {
 }
 
-void Ngram::OutputResult(OpKernelContext* ctx, size_t B, const std::vector<uint32_t>& frequences) const {
+void TfIdfVectorizer::OutputResult(OpKernelContext* ctx, size_t B, const std::vector<uint32_t>& frequences) const {
   const Impl& impl = *impl_;
   std::vector<int64_t> output_dims;
   if (B == 0) {
@@ -437,7 +434,7 @@ void Ngram::OutputResult(OpKernelContext* ctx, size_t B, const std::vector<uint3
 }
 
 template <typename T>
-Status Ngram::ComputeImpl(OpKernelContext* ctx) const {
+Status TfIdfVectorizer::ComputeImpl(OpKernelContext* ctx) const {
   const auto& impl = *impl_;
   auto const set_end = impl.PoolEnd<T>();
 
@@ -559,7 +556,7 @@ Status Ngram::ComputeImpl(OpKernelContext* ctx) const {
   return Status::OK();
 }
 
-Status Ngram::Compute(OpKernelContext* ctx) const {
+Status TfIdfVectorizer::Compute(OpKernelContext* ctx) const {
   Status s;
 
   auto X = ctx->Input<Tensor>(0);
@@ -578,5 +575,4 @@ Status Ngram::Compute(OpKernelContext* ctx) const {
   return s;
 }
 
-}  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/ngram.h b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.h
similarity index 76%
rename from onnxruntime/contrib_ops/cpu/ngram.h
rename to onnxruntime/core/providers/cpu/nn/tfidfvectorizer.h
index adf95ebcb8..025933e13f 100644
--- a/onnxruntime/contrib_ops/cpu/ngram.h
+++ b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.h
@@ -9,13 +9,12 @@
 #include <vector>
 
 namespace onnxruntime {
-namespace contrib {
 
-class Ngram final : public OpKernel {
+class TfIdfVectorizer final : public OpKernel {
  public:
-  explicit Ngram(const OpKernelInfo& info);
-  ~Ngram();
-  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Ngram);
+  explicit TfIdfVectorizer(const OpKernelInfo& info);
+  ~TfIdfVectorizer();
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TfIdfVectorizer);
 
   Status Compute(OpKernelContext* ctx) const override;
 
@@ -30,5 +29,4 @@ class Ngram final : public OpKernel {
   std::unique_ptr<Impl> impl_;
 };
 
-}  // namespace contrib
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc
index 631e56eecb..03ef85115a 100644
--- a/onnxruntime/test/onnx/main.cc
+++ b/onnxruntime/test/onnx/main.cc
@@ -344,14 +344,7 @@ int real_main(int argc, char* argv[]) {
       {"cast_FLOAT_to_STRING", "Cast opset 9 not supported yet"},
       {"cast_FLOAT_to_FLOAT16", "Cast opset 9 not supported yet"},
       {"cast_FLOAT16_to_DOUBLE", "Cast opset 9 not supported yet"},
-      {"nonzero_example", "NonZero opset 9 not supported yet"},
-      {"tfidfvectorizer_tf_uniandbigrams_skip5", "TfIdfVectorizer opset 9 not supported yet"},
-      {"tfidfvectorizer_tf_batch_onlybigrams_skip0", "TfIdfVectorizer opset 9 not supported yet"},
-      {"tfidfvectorizer_tf_onlybigrams_skip5", "TfIdfVectorizer opset 9 not supported yet"},
-      {"tfidfvectorizer_tf_only_bigrams_skip0", "TfIdfVectorizer opset 9 not supported yet"},
-      {"tfidfvectorizer_tf_onlybigrams_levelempty", "TfIdfVectorizer opset 9 not supported yet"},
-      {"tfidfvectorizer_tf_batch_uniandbigrams_skip5", "TfIdfVectorizer opset 9 not supported yet"},
-      {"tfidfvectorizer_tf_batch_onlybigrams_skip5", "TfIdfVectorizer opset 9 not supported yet"}};
+      {"nonzero_example", "NonZero opset 9 not supported yet"}};
 
 #ifdef USE_CUDA
   broken_tests["maxpool_2d_default"] = "cudnn pooling only support input dimension >= 3";
diff --git a/onnxruntime/test/contrib_ops/ngram_test.cc b/onnxruntime/test/providers/cpu/nn/tfidfvectorizer_test.cc
similarity index 87%
rename from onnxruntime/test/contrib_ops/ngram_test.cc
rename to onnxruntime/test/providers/cpu/nn/tfidfvectorizer_test.cc
index 727fdd4521..2a6015c655 100644
--- a/onnxruntime/test/contrib_ops/ngram_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/tfidfvectorizer_test.cc
@@ -8,10 +8,10 @@
 
 namespace onnxruntime {
 namespace test {
-namespace ngram_test {
+namespace tfidfvectorizer_test {
 
-constexpr const char* domain = onnxruntime::kMSDomain;
-const int opset_ver = 1;
+constexpr const char* domain = kOnnxDomain;
+const int opset_ver = 9;
 
 void InitTestAttr(OpTester& test, const std::string& mode,
                   int64_t min_gram_length, int64_t max_gram_length, int64_t max_skip_count,
@@ -36,9 +36,9 @@ void InitTestAttr(OpTester& test, const std::string& mode,
     test.AddAttribute("pool_strings", pool_strings);
   }
 }
-}  // namespace ngram_test
+}  // namespace tfidfvectorizer_test
 
-using namespace ngram_test;
+using namespace tfidfvectorizer_test;
 
 // Here is what takes place in general and in particular
 // in this unit test.There are 7 n - grams : 4 unigrams and 3 bigrams
@@ -48,8 +48,8 @@ using namespace ngram_test;
 // However, attribute all controls whether we consider all of the supplied ngram[M..N] sizes
 // into consideration or not.With all = false, we only consider N - grams.
 
-TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_Skip0) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_Skip0) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=0, Min=Max=2, weights empty, int32
   InitTestAttr(test, "TF", 2, 2, 0,
                {0, 4},
@@ -70,8 +70,8 @@ TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_Skip0) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, Int32_TF_BatchOnlyBigrams_Skip0) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, Int32_TF_BatchOnlyBigrams_Skip0) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=0, Min=Max=2, weights empty, int32
   InitTestAttr(test, "TF", 2, 2, 0,
                {0, 4},
@@ -95,8 +95,8 @@ TEST(ContribOpNgramTest, Int32_TF_BatchOnlyBigrams_Skip0) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, String_TF_OnlyBigrams_Skip0) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, String_TF_OnlyBigrams_Skip0) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=0, Min=Max=2, weights empty, string
   InitTestAttr(test, "TF", 2, 2, 0,
                {0, 4},
@@ -118,8 +118,8 @@ TEST(ContribOpNgramTest, String_TF_OnlyBigrams_Skip0) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, String_TF_BatchOnlyBigrams_Skip0) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, String_TF_BatchOnlyBigrams_Skip0) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=0, Min=Max=2, weights empty, string
   InitTestAttr(test, "TF", 2, 2, 0,
                {0, 4},
@@ -145,8 +145,8 @@ TEST(ContribOpNgramTest, String_TF_BatchOnlyBigrams_Skip0) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_LevelEmpty) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_LevelEmpty) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=0, Min=Max=2, weights empty, int32
   InitTestAttr(test, "TF", 2, 2, 0,
                {0, 0},  // no unigrams, bi-grams start immediately
@@ -171,8 +171,8 @@ TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_LevelEmpty) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, Min=Max=2, weights empty, int32
   InitTestAttr(test, "TF", 2, 2, 5,
                {0, 4},
@@ -195,8 +195,8 @@ TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, Int32_TF_BatchOnlyBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, Int32_TF_BatchOnlyBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, , Min=Max=2, weights empty, int32
   InitTestAttr(test, "TF", 2, 2, 5,
                {0, 4},
@@ -221,8 +221,8 @@ TEST(ContribOpNgramTest, Int32_TF_BatchOnlyBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, String_TF_onlyBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, String_TF_onlyBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, , Min=Max=2, weights empty, string
   InitTestAttr(test, "TF", 2, 2, 5,
                {0, 4},
@@ -246,8 +246,8 @@ TEST(ContribOpNgramTest, String_TF_onlyBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, String_TF_BatchOnlyBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, String_TF_BatchOnlyBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, , Min=Max=2, weights empty, string
   InitTestAttr(test, "TF", 2, 2, 5,
                {0, 4},
@@ -270,8 +270,8 @@ TEST(ContribOpNgramTest, String_TF_BatchOnlyBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, Int32_TF_UniAndBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, Int32_TF_UniAndBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, , Min=1, Max=2, weights empty, int32
   InitTestAttr(test, "TF", 1, 2, 5,
                {0, 4},
@@ -293,8 +293,8 @@ TEST(ContribOpNgramTest, Int32_TF_UniAndBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, Int32_TF_BatchUniAndBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, Int32_TF_BatchUniAndBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, Min=1, Max=2, weights empty, int32
   InitTestAttr(test, "TF", 1, 2, 5,
                {0, 4},
@@ -318,8 +318,8 @@ TEST(ContribOpNgramTest, Int32_TF_BatchUniAndBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, String_TF_UniAndBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, String_TF_UniAndBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, Min=1, Max=2, weights empty, string
   InitTestAttr(test, "TF", 1, 2, 5,
                {0, 4},
@@ -341,8 +341,8 @@ TEST(ContribOpNgramTest, String_TF_UniAndBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, String_TF_BatchUniAndBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, String_TF_BatchUniAndBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, Min=1, Max=2, weights empty, string
   InitTestAttr(test, "TF", 1, 2, 5,
                {0, 4},
@@ -366,8 +366,8 @@ TEST(ContribOpNgramTest, String_TF_BatchUniAndBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, Int32_IDF_onlyBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, Int32_IDF_onlyBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, Min=Max=2, weights empty, int32
   // We change to IDF but do not supply weights so
   // we should get all 1.0f where count is not zero
@@ -390,8 +390,8 @@ TEST(ContribOpNgramTest, Int32_IDF_onlyBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, String_IDF_onlyBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, String_IDF_onlyBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, Min=Max=2, weights empty, string
   InitTestAttr(test, "IDF", 2, 2, 5,
                {0, 4},
@@ -413,8 +413,8 @@ TEST(ContribOpNgramTest, String_IDF_onlyBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, Int32_TFIDF_onlyBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, Int32_TFIDF_onlyBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, Min=Max=2, weights empty, int32
   // We change to TFIDF but do not supply weights so
   // we should all get the original values as weights are 1.0f by
@@ -438,8 +438,8 @@ TEST(ContribOpNgramTest, Int32_TFIDF_onlyBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, String_TFIDF_onlyBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, String_TFIDF_onlyBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, Min=Max=2, weights empty, string
   InitTestAttr(test, "TFIDF", 2, 2, 5,
                {0, 4},
@@ -461,8 +461,8 @@ TEST(ContribOpNgramTest, String_TFIDF_onlyBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, Int32_IDFWeights_onlyBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, Int32_IDFWeights_onlyBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, Min=Max=2, weights specified, int32
   // We change to IDF with supplied weights. All
   // with non-zero counts must be replaced with the supplied weights
@@ -485,8 +485,8 @@ TEST(ContribOpNgramTest, Int32_IDFWeights_onlyBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, String_IDFWeights_onlyBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, String_IDFWeights_onlyBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, Min=Max=2, weights specified, string
   InitTestAttr(test, "IDF", 2, 2, 5,
                {0, 4},
@@ -508,8 +508,8 @@ TEST(ContribOpNgramTest, String_IDFWeights_onlyBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, Int32_TFIDFWeights_onlyBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, Int32_TFIDFWeights_onlyBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, Min=Max=2, weights specified, int32
   // We change to TFIDF with supplied weights.
   // We should have all counts scaled by weights
@@ -532,8 +532,8 @@ TEST(ContribOpNgramTest, Int32_TFIDFWeights_onlyBigrams_Skip5) {
   test.Run(OpTester::ExpectResult::kExpectSuccess);
 }
 
-TEST(ContribOpNgramTest, String_TFIDFWeights_onlyBigrams_Skip5) {
-  OpTester test("Ngram", opset_ver, domain);
+TEST(TfIdfVectorizerTest, String_TFIDFWeights_onlyBigrams_Skip5) {
+  OpTester test("TfIdfVectorizer", opset_ver, domain);
   // s=5, Min=Max=2, weights specified, string
   InitTestAttr(test, "TFIDF", 2, 2, 5,
                {0, 4},