From 829b2a5e8104477df18e3f18902b1bd2b694d826 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 24 Jan 2019 10:11:26 -0800 Subject: [PATCH] Promote TfIdfvectorizer to ONNX ver 9 (#373) * Advance ONNX commit, move Ngram files under ONNX and rename to TfIdfVectorizer * Rename Ngram to TfIdfVectorizer and redeclare in ONNX domain * Restore tfidfvectorizer tests * Remove ML definition. --- onnxruntime/contrib_ops/contrib_kernels.cc | 6 - .../core/graph/contrib_ops/contrib_defs.cc | 177 +++--------------- .../providers/cpu/cpu_execution_provider.cc | 6 + .../providers/cpu/nn/tfidfvectorizer.cc} | 56 +++--- .../providers/cpu/nn/tfidfvectorizer.h} | 10 +- onnxruntime/test/onnx/main.cc | 9 +- .../cpu/nn/tfidfvectorizer_test.cc} | 94 +++++----- 7 files changed, 106 insertions(+), 252 deletions(-) rename onnxruntime/{contrib_ops/cpu/ngram.cc => core/providers/cpu/nn/tfidfvectorizer.cc} (93%) rename onnxruntime/{contrib_ops/cpu/ngram.h => core/providers/cpu/nn/tfidfvectorizer.h} (76%) rename onnxruntime/test/{contrib_ops/ngram_test.cc => providers/cpu/nn/tfidfvectorizer_test.cc} (87%) diff --git a/onnxruntime/contrib_ops/contrib_kernels.cc b/onnxruntime/contrib_ops/contrib_kernels.cc index 27f38ee417..1cf3b39c67 100644 --- a/onnxruntime/contrib_ops/contrib_kernels.cc +++ b/onnxruntime/contrib_ops/contrib_kernels.cc @@ -13,9 +13,6 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedGemm); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, AttnLSTM); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, string, Tokenizer); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, string, Ngram); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int32_t, Ngram); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int64_t, Ngram); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, DequantizeLinear); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, DequantizeLinear); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, QuantizeLinear); @@ -42,9 +39,6 @@ void RegisterContribKernels(KernelRegistry& kernel_registry) { kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); - kernel_registry.Register(BuildKernelCreateInfo()); - kernel_registry.Register(BuildKernelCreateInfo()); - kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index 6d1ac0c700..596465000e 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -275,28 +275,28 @@ activation and leaky_relu_alpha.)DOC") "", AttributeProto::FLOAT, OPTIONAL) - .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { - propagateElemTypeFromInputToOutput(ctx, 0, 0); - if (hasNInputShapes(ctx, 2)) { - auto transAAttr = ctx.getAttribute("transA"); - bool transA = - transAAttr ? static_cast(transAAttr->i()) != 0 : false; - auto transBAttr = ctx.getAttribute("transB"); - bool transB = - transBAttr ? static_cast(transBAttr->i()) != 0 : false; - auto& first_input_shape = getInputShape(ctx, 0); - auto& second_input_shape = getInputShape(ctx, 1); - if (first_input_shape.dim_size() != 2) - fail_shape_inference("First input does not have rank 2"); - if (second_input_shape.dim_size() != 2) - fail_shape_inference("Second input does not have rank 2"); - updateOutputShape( - ctx, - 0, - {first_input_shape.dim(transA ? 1 : 0), - second_input_shape.dim(transB ? 0 : 1)}); - } - }); + .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { + propagateElemTypeFromInputToOutput(ctx, 0, 0); + if (hasNInputShapes(ctx, 2)) { + auto transAAttr = ctx.getAttribute("transA"); + bool transA = + transAAttr ? static_cast(transAAttr->i()) != 0 : false; + auto transBAttr = ctx.getAttribute("transB"); + bool transB = + transBAttr ? static_cast(transBAttr->i()) != 0 : false; + auto& first_input_shape = getInputShape(ctx, 0); + auto& second_input_shape = getInputShape(ctx, 1); + if (first_input_shape.dim_size() != 2) + fail_shape_inference("First input does not have rank 2"); + if (second_input_shape.dim_size() != 2) + fail_shape_inference("Second input does not have rank 2"); + updateOutputShape( + ctx, + 0, + {first_input_shape.dim(transA ? 1 : 0), + second_input_shape.dim(transB ? 0 : 1)}); + } + }); ONNX_CONTRIB_OPERATOR_SCHEMA(ExpandDims) .SetDomain(kMSDomain) @@ -374,139 +374,6 @@ activation and leaky_relu_alpha.)DOC") }) .SetDoc(R"DOC(Tokenizer divides each string in X into a vector of strings along the last axis. All input strings including attributes are UTF-8 encoded.)DOC"); - ONNX_CONTRIB_OPERATOR_SCHEMA(Ngram) - .SetDomain(kMSDomain) - .SinceVersion(1) - .Input(0, "X", "Input for n-gram extraction", "T") - .Output(0, "Y", "Ngram results", "T1") - .TypeConstraint( - "T", - {"tensor(string)", "tensor(int32)", "tensor(int64)"}, - "Input is ether string UTF-8 or int32/int64") - .TypeConstraint( - "T1", - {"tensor(float)"}, - "1-D tensor of floats") - .Attr( - "max_gram_length", - "Maximum n-gram length. If this value is 3, 3-grams will be used to generate the output.", - AttributeProto::INT) - .Attr( - "min_gram_length", - "Minimum n-gram length. If this value is 2 and max_gram_length is 3, output may contain counts of 2-grams and 3-grams.", - AttributeProto::INT) - .Attr( - "max_skip_count", - "Maximum number of items (integers/strings) to be skipped when constructing an n-gram from X." - "If max_skip_count=1, min_gram_length=2, max_gram_length=3, this operator may generate 2-grams" - "with skip_count=0 and skip_count=1, and 3-grams with skip_count=0 and skip_count=1", - AttributeProto::INT) - .Attr( - "pool_strings", - "List of strings n-grams learned from the training set. Either this or pool_int64s attributes must be present but not both." - "It's an 1-D tensor starting with the collections of all 1-grams and ending with the collections of n-grams." - "The i-th element in pool stores the n-gram that should be mapped to index ngram_indexes[i] in the output vector.", - AttributeProto::STRINGS, - OPTIONAL) - .Attr( - "pool_int64s", - "List of int64 n-grams learned from the training set. Either this or pool_strings attributes must be present but not both." - "It's an 1-D tensor starting with the collections of all 1-grams and ending with the collections of n-grams." - "The i-th element in pool stores the n-gram that should be mapped to index ngram_indexes[i] in the output vector.", - AttributeProto::INTS, - OPTIONAL) - .Attr( - "ngram_counts", - "The starting indexes of 1-grams, 2-grams, and so on in pool." - "It is useful when determining the boundary between two consecutive collections of n-grams." - "For example, if ngram_counts is [0, 17, 36], the first index (zero-based) of 1-gram/2-gram/3-gram" - "in pool are 0/17/36. This format is essentially identical to CSR (or CSC) sparse matrix format, " - "and we choose to keep this due to its popularity.", - AttributeProto::INTS) - .Attr( - "ngram_indexes", - "list of int64s (type: AttributeProto::INTS). This list is parallel to the specified 'pool_*' attribute." - "The i-th element in ngram_indexes indicate the coordinate of the i-th n-gram in the output tensor.", - AttributeProto::INTS) - .Attr( - "weights", - "list of floats. This attribute stores the weight of each n-gram in pool. The i-th element in weights" - "is the weight of the i-th n-gram in pool. Its length equals to the size of ngram_indexes." - "By default, weights is an all-one tensor.This attribute is used when mode is \"IDF\" or \"TFIDF\"" - "to scale the associated word counts.", - AttributeProto::FLOATS, - OPTIONAL) - .Attr( - "mode", - "The weighting criteria. It can be one of \"TF\" (term frequency)," - "\"IDF\" (inverse document frequency), and \"TFIDF\" (the combination of TF and IDF)", - AttributeProto::STRING) - .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { - auto output_elem_type = ctx.getOutputType(0)->mutable_tensor_type(); - output_elem_type->set_elem_type(ONNX_NAMESPACE::TensorProto::FLOAT); - - if (hasInputShape(ctx, 0)) { - std::vector ngram_indexes; - ONNX_NAMESPACE::getRepeatedAttribute(ctx, "ngram_indexes", ngram_indexes); - if (ngram_indexes.empty() || !std::all_of(ngram_indexes.cbegin(), ngram_indexes.cend(), - [](int64_t i) { return i >= 0; })) { - fail_shape_inference( - "ngram_indexes must be non-empty with no negative values"); - } - - auto greatest_hit = std::max_element(ngram_indexes.cbegin(), ngram_indexes.cend()); - auto max_last_axis = *greatest_hit + 1; - - ONNX_NAMESPACE::TensorShapeProto output_shape; - auto& input_shape = ctx.getInputType(0)->tensor_type().shape(); - auto dim_size = input_shape.dim_size(); - if (dim_size == 0 || dim_size == 1) { - output_shape.add_dim()->set_dim_value(max_last_axis); - } else if (dim_size == 2) { - auto& B_dim = input_shape.dim(0); - if (!B_dim.has_dim_value()) { - fail_shape_inference( - "Input shape does not have first dimension value"); - } - output_shape.add_dim()->set_dim_value(B_dim.dim_value()); - output_shape.add_dim()->set_dim_value(max_last_axis); - } else { - fail_shape_inference( - "Input shape must have either [C] or [B,C] dimensions where C > 0 and B > 0"); - } - updateOutputShape(ctx, 0, output_shape); - } - }) - .SetDoc(R"DOC( -This transform extracts n-grams from the input sequence and save them as a vector. Input can -be either a 1-D or 2-D tensor. For 1-D input, output is the n-gram representation of that input. -For 2-D input, the output is also a 2-D tensor whose i-th row is the n-gram representation of the i-th input row. -More specifically, if input shape is [C], the corresponding output shape would be [max(ngram_indexes) + 1]. -If input shape is [N, C], this operator produces a [N, max(ngram_indexes) + 1]-tensor. - -In contrast to standard n-gram extraction, here, the indexes of extracting an n-gram from the original -sequence are not necessarily consecutive numbers. The discontinuity between indexes are controlled by the number of skips. -If the number of skips is 2, we should skip two tokens when scanning through the original sequence. -Let's consider an example. Assume that input sequence is [94, 17, 36, 12, 28] and the number of skips is 2. -The associated 2-grams are [94, 12] and [17, 28] respectively indexed by [0, 3] and [1, 4]. -If the number of skips becomes 0, the 2-grams generated are [94, 17], [17, 36], [36, 12], [12, 28] -indexed by [0, 1], [1, 2], [2, 3], [3, 4], respectively. - -The output vector stores the count of each n-gram; -Y[i] indicates the times that the i-th n-gram is found. The attribute ngram_indexes is used to determine the mapping -between index i and the corresponding n-gram. If pool_int64s is [94 , 17 ,17, 36], ngram_indexes is [1, 0], -ngram_counts=[0, 0], then the Y[0] (first element in Y) and Y[1] (second element in Y) are the counts of [17, 36] and [94, 17], -respectively. An n-gram which cannot be found in pool_strings/pool_int64s should be ignored and has no effect on the output. -Note that we may consider all skips up to S when generating the n-grams. - -The examples used above are true if mode is "TF". If mode is "IDF", all the counts larger than 1 would be truncated to 1 and -the i-th element in weights would be used to scale (by multiplication) the count of the i-th n-gram in pool. If mode is "TFIDF", -this operator first computes the counts of all n-grams and then scale them by the associated values in the weights attribute. - -Only one of pool_strings and pool_int64s can be set. If pool_int64s is set, the input should be an integer tensor. -If pool_strings is set, the input must be a string tensor. -)DOC"); - // Operators for linear 8 bit quanitzation support. ONNX_CONTRIB_OPERATOR_SCHEMA(QuantizeLinear) .SetDomain(kMSDomain) diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index 8355c7c7dc..c13ff4ca23 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -248,6 +248,9 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Asi class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Acosh); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Atanh); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Scan); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, string, TfIdfVectorizer); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, int32_t, TfIdfVectorizer); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, int64_t, TfIdfVectorizer); void RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { kernel_registry.Register(BuildKernelCreateInfo()); @@ -489,6 +492,9 @@ void RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); kernel_registry.Register(BuildKernelCreateInfo()); + kernel_registry.Register(BuildKernelCreateInfo()); + kernel_registry.Register(BuildKernelCreateInfo()); + kernel_registry.Register(BuildKernelCreateInfo()); } // Forward declarations of ml op kernels diff --git a/onnxruntime/contrib_ops/cpu/ngram.cc b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc similarity index 93% rename from onnxruntime/contrib_ops/cpu/ngram.cc rename to onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc index 74117806f5..d927e927cc 100644 --- a/onnxruntime/contrib_ops/cpu/ngram.cc +++ b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "ngram.h" +#include "tfidfvectorizer.h" #include "onnx/defs/schema.h" #include "core/common/common.h" #include "core/framework/tensor.h" @@ -12,34 +12,33 @@ #include namespace onnxruntime { -namespace contrib { -ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( - Ngram, - 1, +ONNX_CPU_OPERATOR_TYPED_KERNEL( + TfIdfVectorizer, + 9, string, KernelDefBuilder() .TypeConstraint("T", DataTypeImpl::GetTensorType()) .TypeConstraint("T1", DataTypeImpl::GetTensorType()), - contrib::Ngram); + TfIdfVectorizer); -ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( - Ngram, - 1, +ONNX_CPU_OPERATOR_TYPED_KERNEL( + TfIdfVectorizer, + 9, int32_t, KernelDefBuilder() .TypeConstraint("T", DataTypeImpl::GetTensorType()) .TypeConstraint("T1", DataTypeImpl::GetTensorType()), - contrib::Ngram); + TfIdfVectorizer); -ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( - Ngram, - 1, +ONNX_CPU_OPERATOR_TYPED_KERNEL( + TfIdfVectorizer, + 9, int64_t, KernelDefBuilder() .TypeConstraint("T", DataTypeImpl::GetTensorType()) .TypeConstraint("T1", DataTypeImpl::GetTensorType()), - contrib::Ngram); + TfIdfVectorizer); namespace ngram_details { @@ -169,10 +168,9 @@ inline void Emplace(ForwardIter first, size_t ngrams, size_t ngram_size, size_t& } } // namespace ngram_details -} // namespace contrib } // namespace onnxruntime -using namespace onnxruntime::contrib::ngram_details; +using namespace onnxruntime::ngram_details; namespace std { template @@ -186,7 +184,6 @@ struct hash> { } // namespace std namespace onnxruntime { -namespace contrib { // The weighting criteria. // "TF"(term frequency), @@ -206,7 +203,7 @@ enum WeightingCriteria { kTFIDF = 3 }; -struct Ngram::Impl { +struct TfIdfVectorizer::Impl { WeightingCriteria weighting_criteria_ = kNone; int64_t max_gram_length_ = 0; int64_t min_gram_length_ = 0; @@ -251,36 +248,36 @@ struct Ngram::Impl { }; template <> -inline auto Ngram::Impl::PoolEnd() const { +inline auto TfIdfVectorizer::Impl::PoolEnd() const { return int64_set_.cend(); } template <> -inline auto Ngram::Impl::PoolEnd() const { +inline auto TfIdfVectorizer::Impl::PoolEnd() const { return PoolEnd(); } template <> -inline auto Ngram::Impl::PoolEnd() const { +inline auto TfIdfVectorizer::Impl::PoolEnd() const { return str_set_.cend(); } template <> -inline auto Ngram::Impl::PoolFind(const NgramEntry& i) const { +inline auto TfIdfVectorizer::Impl::PoolFind(const NgramEntry& i) const { return int64_set_.find(i); } template <> -inline auto Ngram::Impl::PoolFind(const NgramEntry& i) const { +inline auto TfIdfVectorizer::Impl::PoolFind(const NgramEntry& i) const { return int64_set_.find(i); } template <> -inline auto Ngram::Impl::PoolFind(const NgramEntry& i) const { +inline auto TfIdfVectorizer::Impl::PoolFind(const NgramEntry& i) const { return str_set_.find(i); } -Ngram::Ngram(const OpKernelInfo& info) : OpKernel(info), impl_(new Impl) { +TfIdfVectorizer::TfIdfVectorizer(const OpKernelInfo& info) : OpKernel(info), impl_(new Impl) { std::string mode; Status status = info.GetAttr("mode", &mode); ORT_ENFORCE(status.IsOK(), "mode is required"); @@ -381,10 +378,10 @@ Ngram::Ngram(const OpKernelInfo& info) : OpKernel(info), impl_(new Impl) { } } -Ngram::~Ngram() { +TfIdfVectorizer::~TfIdfVectorizer() { } -void Ngram::OutputResult(OpKernelContext* ctx, size_t B, const std::vector& frequences) const { +void TfIdfVectorizer::OutputResult(OpKernelContext* ctx, size_t B, const std::vector& frequences) const { const Impl& impl = *impl_; std::vector output_dims; if (B == 0) { @@ -437,7 +434,7 @@ void Ngram::OutputResult(OpKernelContext* ctx, size_t B, const std::vector -Status Ngram::ComputeImpl(OpKernelContext* ctx) const { +Status TfIdfVectorizer::ComputeImpl(OpKernelContext* ctx) const { const auto& impl = *impl_; auto const set_end = impl.PoolEnd(); @@ -559,7 +556,7 @@ Status Ngram::ComputeImpl(OpKernelContext* ctx) const { return Status::OK(); } -Status Ngram::Compute(OpKernelContext* ctx) const { +Status TfIdfVectorizer::Compute(OpKernelContext* ctx) const { Status s; auto X = ctx->Input(0); @@ -578,5 +575,4 @@ Status Ngram::Compute(OpKernelContext* ctx) const { return s; } -} // namespace contrib } // namespace onnxruntime diff --git a/onnxruntime/contrib_ops/cpu/ngram.h b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.h similarity index 76% rename from onnxruntime/contrib_ops/cpu/ngram.h rename to onnxruntime/core/providers/cpu/nn/tfidfvectorizer.h index adf95ebcb8..025933e13f 100644 --- a/onnxruntime/contrib_ops/cpu/ngram.h +++ b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.h @@ -9,13 +9,12 @@ #include namespace onnxruntime { -namespace contrib { -class Ngram final : public OpKernel { +class TfIdfVectorizer final : public OpKernel { public: - explicit Ngram(const OpKernelInfo& info); - ~Ngram(); - ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Ngram); + explicit TfIdfVectorizer(const OpKernelInfo& info); + ~TfIdfVectorizer(); + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TfIdfVectorizer); Status Compute(OpKernelContext* ctx) const override; @@ -30,5 +29,4 @@ class Ngram final : public OpKernel { std::unique_ptr impl_; }; -} // namespace contrib } // namespace onnxruntime diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 631e56eecb..03ef85115a 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -344,14 +344,7 @@ int real_main(int argc, char* argv[]) { {"cast_FLOAT_to_STRING", "Cast opset 9 not supported yet"}, {"cast_FLOAT_to_FLOAT16", "Cast opset 9 not supported yet"}, {"cast_FLOAT16_to_DOUBLE", "Cast opset 9 not supported yet"}, - {"nonzero_example", "NonZero opset 9 not supported yet"}, - {"tfidfvectorizer_tf_uniandbigrams_skip5", "TfIdfVectorizer opset 9 not supported yet"}, - {"tfidfvectorizer_tf_batch_onlybigrams_skip0", "TfIdfVectorizer opset 9 not supported yet"}, - {"tfidfvectorizer_tf_onlybigrams_skip5", "TfIdfVectorizer opset 9 not supported yet"}, - {"tfidfvectorizer_tf_only_bigrams_skip0", "TfIdfVectorizer opset 9 not supported yet"}, - {"tfidfvectorizer_tf_onlybigrams_levelempty", "TfIdfVectorizer opset 9 not supported yet"}, - {"tfidfvectorizer_tf_batch_uniandbigrams_skip5", "TfIdfVectorizer opset 9 not supported yet"}, - {"tfidfvectorizer_tf_batch_onlybigrams_skip5", "TfIdfVectorizer opset 9 not supported yet"}}; + {"nonzero_example", "NonZero opset 9 not supported yet"}}; #ifdef USE_CUDA broken_tests["maxpool_2d_default"] = "cudnn pooling only support input dimension >= 3"; diff --git a/onnxruntime/test/contrib_ops/ngram_test.cc b/onnxruntime/test/providers/cpu/nn/tfidfvectorizer_test.cc similarity index 87% rename from onnxruntime/test/contrib_ops/ngram_test.cc rename to onnxruntime/test/providers/cpu/nn/tfidfvectorizer_test.cc index 727fdd4521..2a6015c655 100644 --- a/onnxruntime/test/contrib_ops/ngram_test.cc +++ b/onnxruntime/test/providers/cpu/nn/tfidfvectorizer_test.cc @@ -8,10 +8,10 @@ namespace onnxruntime { namespace test { -namespace ngram_test { +namespace tfidfvectorizer_test { -constexpr const char* domain = onnxruntime::kMSDomain; -const int opset_ver = 1; +constexpr const char* domain = kOnnxDomain; +const int opset_ver = 9; void InitTestAttr(OpTester& test, const std::string& mode, int64_t min_gram_length, int64_t max_gram_length, int64_t max_skip_count, @@ -36,9 +36,9 @@ void InitTestAttr(OpTester& test, const std::string& mode, test.AddAttribute("pool_strings", pool_strings); } } -} // namespace ngram_test +} // namespace tfidfvectorizer_test -using namespace ngram_test; +using namespace tfidfvectorizer_test; // Here is what takes place in general and in particular // in this unit test.There are 7 n - grams : 4 unigrams and 3 bigrams @@ -48,8 +48,8 @@ using namespace ngram_test; // However, attribute all controls whether we consider all of the supplied ngram[M..N] sizes // into consideration or not.With all = false, we only consider N - grams. -TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_Skip0) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_Skip0) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=0, Min=Max=2, weights empty, int32 InitTestAttr(test, "TF", 2, 2, 0, {0, 4}, @@ -70,8 +70,8 @@ TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_Skip0) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, Int32_TF_BatchOnlyBigrams_Skip0) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, Int32_TF_BatchOnlyBigrams_Skip0) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=0, Min=Max=2, weights empty, int32 InitTestAttr(test, "TF", 2, 2, 0, {0, 4}, @@ -95,8 +95,8 @@ TEST(ContribOpNgramTest, Int32_TF_BatchOnlyBigrams_Skip0) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, String_TF_OnlyBigrams_Skip0) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, String_TF_OnlyBigrams_Skip0) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=0, Min=Max=2, weights empty, string InitTestAttr(test, "TF", 2, 2, 0, {0, 4}, @@ -118,8 +118,8 @@ TEST(ContribOpNgramTest, String_TF_OnlyBigrams_Skip0) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, String_TF_BatchOnlyBigrams_Skip0) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, String_TF_BatchOnlyBigrams_Skip0) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=0, Min=Max=2, weights empty, string InitTestAttr(test, "TF", 2, 2, 0, {0, 4}, @@ -145,8 +145,8 @@ TEST(ContribOpNgramTest, String_TF_BatchOnlyBigrams_Skip0) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_LevelEmpty) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_LevelEmpty) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=0, Min=Max=2, weights empty, int32 InitTestAttr(test, "TF", 2, 2, 0, {0, 0}, // no unigrams, bi-grams start immediately @@ -171,8 +171,8 @@ TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_LevelEmpty) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, Min=Max=2, weights empty, int32 InitTestAttr(test, "TF", 2, 2, 5, {0, 4}, @@ -195,8 +195,8 @@ TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, Int32_TF_BatchOnlyBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, Int32_TF_BatchOnlyBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, , Min=Max=2, weights empty, int32 InitTestAttr(test, "TF", 2, 2, 5, {0, 4}, @@ -221,8 +221,8 @@ TEST(ContribOpNgramTest, Int32_TF_BatchOnlyBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, String_TF_onlyBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, String_TF_onlyBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, , Min=Max=2, weights empty, string InitTestAttr(test, "TF", 2, 2, 5, {0, 4}, @@ -246,8 +246,8 @@ TEST(ContribOpNgramTest, String_TF_onlyBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, String_TF_BatchOnlyBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, String_TF_BatchOnlyBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, , Min=Max=2, weights empty, string InitTestAttr(test, "TF", 2, 2, 5, {0, 4}, @@ -270,8 +270,8 @@ TEST(ContribOpNgramTest, String_TF_BatchOnlyBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, Int32_TF_UniAndBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, Int32_TF_UniAndBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, , Min=1, Max=2, weights empty, int32 InitTestAttr(test, "TF", 1, 2, 5, {0, 4}, @@ -293,8 +293,8 @@ TEST(ContribOpNgramTest, Int32_TF_UniAndBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, Int32_TF_BatchUniAndBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, Int32_TF_BatchUniAndBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, Min=1, Max=2, weights empty, int32 InitTestAttr(test, "TF", 1, 2, 5, {0, 4}, @@ -318,8 +318,8 @@ TEST(ContribOpNgramTest, Int32_TF_BatchUniAndBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, String_TF_UniAndBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, String_TF_UniAndBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, Min=1, Max=2, weights empty, string InitTestAttr(test, "TF", 1, 2, 5, {0, 4}, @@ -341,8 +341,8 @@ TEST(ContribOpNgramTest, String_TF_UniAndBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, String_TF_BatchUniAndBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, String_TF_BatchUniAndBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, Min=1, Max=2, weights empty, string InitTestAttr(test, "TF", 1, 2, 5, {0, 4}, @@ -366,8 +366,8 @@ TEST(ContribOpNgramTest, String_TF_BatchUniAndBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, Int32_IDF_onlyBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, Int32_IDF_onlyBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, Min=Max=2, weights empty, int32 // We change to IDF but do not supply weights so // we should get all 1.0f where count is not zero @@ -390,8 +390,8 @@ TEST(ContribOpNgramTest, Int32_IDF_onlyBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, String_IDF_onlyBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, String_IDF_onlyBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, Min=Max=2, weights empty, string InitTestAttr(test, "IDF", 2, 2, 5, {0, 4}, @@ -413,8 +413,8 @@ TEST(ContribOpNgramTest, String_IDF_onlyBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, Int32_TFIDF_onlyBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, Int32_TFIDF_onlyBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, Min=Max=2, weights empty, int32 // We change to TFIDF but do not supply weights so // we should all get the original values as weights are 1.0f by @@ -438,8 +438,8 @@ TEST(ContribOpNgramTest, Int32_TFIDF_onlyBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, String_TFIDF_onlyBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, String_TFIDF_onlyBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, Min=Max=2, weights empty, string InitTestAttr(test, "TFIDF", 2, 2, 5, {0, 4}, @@ -461,8 +461,8 @@ TEST(ContribOpNgramTest, String_TFIDF_onlyBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, Int32_IDFWeights_onlyBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, Int32_IDFWeights_onlyBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, Min=Max=2, weights specified, int32 // We change to IDF with supplied weights. All // with non-zero counts must be replaced with the supplied weights @@ -485,8 +485,8 @@ TEST(ContribOpNgramTest, Int32_IDFWeights_onlyBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, String_IDFWeights_onlyBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, String_IDFWeights_onlyBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, Min=Max=2, weights specified, string InitTestAttr(test, "IDF", 2, 2, 5, {0, 4}, @@ -508,8 +508,8 @@ TEST(ContribOpNgramTest, String_IDFWeights_onlyBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, Int32_TFIDFWeights_onlyBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, Int32_TFIDFWeights_onlyBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, Min=Max=2, weights specified, int32 // We change to TFIDF with supplied weights. // We should have all counts scaled by weights @@ -532,8 +532,8 @@ TEST(ContribOpNgramTest, Int32_TFIDFWeights_onlyBigrams_Skip5) { test.Run(OpTester::ExpectResult::kExpectSuccess); } -TEST(ContribOpNgramTest, String_TFIDFWeights_onlyBigrams_Skip5) { - OpTester test("Ngram", opset_ver, domain); +TEST(TfIdfVectorizerTest, String_TFIDFWeights_onlyBigrams_Skip5) { + OpTester test("TfIdfVectorizer", opset_ver, domain); // s=5, Min=Max=2, weights specified, string InitTestAttr(test, "TFIDF", 2, 2, 5, {0, 4},