Promote TfIdfvectorizer to ONNX ver 9 (#373)

* Advance ONNX commit, move Ngram files under ONNX and rename to TfIdfVectorizer

* Rename Ngram to TfIdfVectorizer and redeclare in ONNX domain

* Restore tfidfvectorizer tests

* Remove ML definition.
This commit is contained in:
Dmitri Smirnov 2019-01-24 10:11:26 -08:00 committed by GitHub
parent 89f643f04b
commit 829b2a5e81
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 106 additions and 252 deletions

View file

@ -13,9 +13,6 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1,
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedGemm);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, AttnLSTM);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, string, Tokenizer);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, string, Ngram);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int32_t, Ngram);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int64_t, Ngram);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, DequantizeLinear);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, DequantizeLinear);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, QuantizeLinear);
@ -42,9 +39,6 @@ void RegisterContribKernels(KernelRegistry& kernel_registry) {
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedGemm)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, AttnLSTM)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, string, Tokenizer)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, string, Ngram)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int32_t, Ngram)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int64_t, Ngram)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, DequantizeLinear)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, DequantizeLinear)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, QuantizeLinear)>());

View file

@ -275,28 +275,28 @@ activation and leaky_relu_alpha.)DOC")
"",
AttributeProto::FLOAT,
OPTIONAL)
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
propagateElemTypeFromInputToOutput(ctx, 0, 0);
if (hasNInputShapes(ctx, 2)) {
auto transAAttr = ctx.getAttribute("transA");
bool transA =
transAAttr ? static_cast<int>(transAAttr->i()) != 0 : false;
auto transBAttr = ctx.getAttribute("transB");
bool transB =
transBAttr ? static_cast<int>(transBAttr->i()) != 0 : false;
auto& first_input_shape = getInputShape(ctx, 0);
auto& second_input_shape = getInputShape(ctx, 1);
if (first_input_shape.dim_size() != 2)
fail_shape_inference("First input does not have rank 2");
if (second_input_shape.dim_size() != 2)
fail_shape_inference("Second input does not have rank 2");
updateOutputShape(
ctx,
0,
{first_input_shape.dim(transA ? 1 : 0),
second_input_shape.dim(transB ? 0 : 1)});
}
});
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
propagateElemTypeFromInputToOutput(ctx, 0, 0);
if (hasNInputShapes(ctx, 2)) {
auto transAAttr = ctx.getAttribute("transA");
bool transA =
transAAttr ? static_cast<int>(transAAttr->i()) != 0 : false;
auto transBAttr = ctx.getAttribute("transB");
bool transB =
transBAttr ? static_cast<int>(transBAttr->i()) != 0 : false;
auto& first_input_shape = getInputShape(ctx, 0);
auto& second_input_shape = getInputShape(ctx, 1);
if (first_input_shape.dim_size() != 2)
fail_shape_inference("First input does not have rank 2");
if (second_input_shape.dim_size() != 2)
fail_shape_inference("Second input does not have rank 2");
updateOutputShape(
ctx,
0,
{first_input_shape.dim(transA ? 1 : 0),
second_input_shape.dim(transB ? 0 : 1)});
}
});
ONNX_CONTRIB_OPERATOR_SCHEMA(ExpandDims)
.SetDomain(kMSDomain)
@ -374,139 +374,6 @@ activation and leaky_relu_alpha.)DOC")
})
.SetDoc(R"DOC(Tokenizer divides each string in X into a vector of strings along the last axis. All input strings including attributes are UTF-8 encoded.)DOC");
ONNX_CONTRIB_OPERATOR_SCHEMA(Ngram)
.SetDomain(kMSDomain)
.SinceVersion(1)
.Input(0, "X", "Input for n-gram extraction", "T")
.Output(0, "Y", "Ngram results", "T1")
.TypeConstraint(
"T",
{"tensor(string)", "tensor(int32)", "tensor(int64)"},
"Input is ether string UTF-8 or int32/int64")
.TypeConstraint(
"T1",
{"tensor(float)"},
"1-D tensor of floats")
.Attr(
"max_gram_length",
"Maximum n-gram length. If this value is 3, 3-grams will be used to generate the output.",
AttributeProto::INT)
.Attr(
"min_gram_length",
"Minimum n-gram length. If this value is 2 and max_gram_length is 3, output may contain counts of 2-grams and 3-grams.",
AttributeProto::INT)
.Attr(
"max_skip_count",
"Maximum number of items (integers/strings) to be skipped when constructing an n-gram from X."
"If max_skip_count=1, min_gram_length=2, max_gram_length=3, this operator may generate 2-grams"
"with skip_count=0 and skip_count=1, and 3-grams with skip_count=0 and skip_count=1",
AttributeProto::INT)
.Attr(
"pool_strings",
"List of strings n-grams learned from the training set. Either this or pool_int64s attributes must be present but not both."
"It's an 1-D tensor starting with the collections of all 1-grams and ending with the collections of n-grams."
"The i-th element in pool stores the n-gram that should be mapped to index ngram_indexes[i] in the output vector.",
AttributeProto::STRINGS,
OPTIONAL)
.Attr(
"pool_int64s",
"List of int64 n-grams learned from the training set. Either this or pool_strings attributes must be present but not both."
"It's an 1-D tensor starting with the collections of all 1-grams and ending with the collections of n-grams."
"The i-th element in pool stores the n-gram that should be mapped to index ngram_indexes[i] in the output vector.",
AttributeProto::INTS,
OPTIONAL)
.Attr(
"ngram_counts",
"The starting indexes of 1-grams, 2-grams, and so on in pool."
"It is useful when determining the boundary between two consecutive collections of n-grams."
"For example, if ngram_counts is [0, 17, 36], the first index (zero-based) of 1-gram/2-gram/3-gram"
"in pool are 0/17/36. This format is essentially identical to CSR (or CSC) sparse matrix format, "
"and we choose to keep this due to its popularity.",
AttributeProto::INTS)
.Attr(
"ngram_indexes",
"list of int64s (type: AttributeProto::INTS). This list is parallel to the specified 'pool_*' attribute."
"The i-th element in ngram_indexes indicate the coordinate of the i-th n-gram in the output tensor.",
AttributeProto::INTS)
.Attr(
"weights",
"list of floats. This attribute stores the weight of each n-gram in pool. The i-th element in weights"
"is the weight of the i-th n-gram in pool. Its length equals to the size of ngram_indexes."
"By default, weights is an all-one tensor.This attribute is used when mode is \"IDF\" or \"TFIDF\""
"to scale the associated word counts.",
AttributeProto::FLOATS,
OPTIONAL)
.Attr(
"mode",
"The weighting criteria. It can be one of \"TF\" (term frequency),"
"\"IDF\" (inverse document frequency), and \"TFIDF\" (the combination of TF and IDF)",
AttributeProto::STRING)
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
auto output_elem_type = ctx.getOutputType(0)->mutable_tensor_type();
output_elem_type->set_elem_type(ONNX_NAMESPACE::TensorProto::FLOAT);
if (hasInputShape(ctx, 0)) {
std::vector<int64_t> ngram_indexes;
ONNX_NAMESPACE::getRepeatedAttribute(ctx, "ngram_indexes", ngram_indexes);
if (ngram_indexes.empty() || !std::all_of(ngram_indexes.cbegin(), ngram_indexes.cend(),
[](int64_t i) { return i >= 0; })) {
fail_shape_inference(
"ngram_indexes must be non-empty with no negative values");
}
auto greatest_hit = std::max_element(ngram_indexes.cbegin(), ngram_indexes.cend());
auto max_last_axis = *greatest_hit + 1;
ONNX_NAMESPACE::TensorShapeProto output_shape;
auto& input_shape = ctx.getInputType(0)->tensor_type().shape();
auto dim_size = input_shape.dim_size();
if (dim_size == 0 || dim_size == 1) {
output_shape.add_dim()->set_dim_value(max_last_axis);
} else if (dim_size == 2) {
auto& B_dim = input_shape.dim(0);
if (!B_dim.has_dim_value()) {
fail_shape_inference(
"Input shape does not have first dimension value");
}
output_shape.add_dim()->set_dim_value(B_dim.dim_value());
output_shape.add_dim()->set_dim_value(max_last_axis);
} else {
fail_shape_inference(
"Input shape must have either [C] or [B,C] dimensions where C > 0 and B > 0");
}
updateOutputShape(ctx, 0, output_shape);
}
})
.SetDoc(R"DOC(
This transform extracts n-grams from the input sequence and save them as a vector. Input can
be either a 1-D or 2-D tensor. For 1-D input, output is the n-gram representation of that input.
For 2-D input, the output is also a 2-D tensor whose i-th row is the n-gram representation of the i-th input row.
More specifically, if input shape is [C], the corresponding output shape would be [max(ngram_indexes) + 1].
If input shape is [N, C], this operator produces a [N, max(ngram_indexes) + 1]-tensor.
In contrast to standard n-gram extraction, here, the indexes of extracting an n-gram from the original
sequence are not necessarily consecutive numbers. The discontinuity between indexes are controlled by the number of skips.
If the number of skips is 2, we should skip two tokens when scanning through the original sequence.
Let's consider an example. Assume that input sequence is [94, 17, 36, 12, 28] and the number of skips is 2.
The associated 2-grams are [94, 12] and [17, 28] respectively indexed by [0, 3] and [1, 4].
If the number of skips becomes 0, the 2-grams generated are [94, 17], [17, 36], [36, 12], [12, 28]
indexed by [0, 1], [1, 2], [2, 3], [3, 4], respectively.
The output vector stores the count of each n-gram;
Y[i] indicates the times that the i-th n-gram is found. The attribute ngram_indexes is used to determine the mapping
between index i and the corresponding n-gram. If pool_int64s is [94 , 17 ,17, 36], ngram_indexes is [1, 0],
ngram_counts=[0, 0], then the Y[0] (first element in Y) and Y[1] (second element in Y) are the counts of [17, 36] and [94, 17],
respectively. An n-gram which cannot be found in pool_strings/pool_int64s should be ignored and has no effect on the output.
Note that we may consider all skips up to S when generating the n-grams.
The examples used above are true if mode is "TF". If mode is "IDF", all the counts larger than 1 would be truncated to 1 and
the i-th element in weights would be used to scale (by multiplication) the count of the i-th n-gram in pool. If mode is "TFIDF",
this operator first computes the counts of all n-grams and then scale them by the associated values in the weights attribute.
Only one of pool_strings and pool_int64s can be set. If pool_int64s is set, the input should be an integer tensor.
If pool_strings is set, the input must be a string tensor.
)DOC");
// Operators for linear 8 bit quanitzation support.
ONNX_CONTRIB_OPERATOR_SCHEMA(QuantizeLinear)
.SetDomain(kMSDomain)

View file

@ -248,6 +248,9 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Asi
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Acosh);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Atanh);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Scan);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, string, TfIdfVectorizer);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, int32_t, TfIdfVectorizer);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, int64_t, TfIdfVectorizer);
void RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, Clip)>());
@ -489,6 +492,9 @@ void RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Acosh)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Atanh)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Scan)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, string, TfIdfVectorizer)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, int32_t, TfIdfVectorizer)>());
kernel_registry.Register(BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, int64_t, TfIdfVectorizer)>());
}
// Forward declarations of ml op kernels

View file

@ -1,7 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "ngram.h"
#include "tfidfvectorizer.h"
#include "onnx/defs/schema.h"
#include "core/common/common.h"
#include "core/framework/tensor.h"
@ -12,34 +12,33 @@
#include <iterator>
namespace onnxruntime {
namespace contrib {
ONNX_CPU_OPERATOR_TYPED_MS_KERNEL(
Ngram,
1,
ONNX_CPU_OPERATOR_TYPED_KERNEL(
TfIdfVectorizer,
9,
string,
KernelDefBuilder()
.TypeConstraint("T", DataTypeImpl::GetTensorType<std::string>())
.TypeConstraint("T1", DataTypeImpl::GetTensorType<float>()),
contrib::Ngram);
TfIdfVectorizer);
ONNX_CPU_OPERATOR_TYPED_MS_KERNEL(
Ngram,
1,
ONNX_CPU_OPERATOR_TYPED_KERNEL(
TfIdfVectorizer,
9,
int32_t,
KernelDefBuilder()
.TypeConstraint("T", DataTypeImpl::GetTensorType<int32_t>())
.TypeConstraint("T1", DataTypeImpl::GetTensorType<float>()),
contrib::Ngram);
TfIdfVectorizer);
ONNX_CPU_OPERATOR_TYPED_MS_KERNEL(
Ngram,
1,
ONNX_CPU_OPERATOR_TYPED_KERNEL(
TfIdfVectorizer,
9,
int64_t,
KernelDefBuilder()
.TypeConstraint("T", DataTypeImpl::GetTensorType<int64_t>())
.TypeConstraint("T1", DataTypeImpl::GetTensorType<float>()),
contrib::Ngram);
TfIdfVectorizer);
namespace ngram_details {
@ -169,10 +168,9 @@ inline void Emplace(ForwardIter first, size_t ngrams, size_t ngram_size, size_t&
}
} // namespace ngram_details
} // namespace contrib
} // namespace onnxruntime
using namespace onnxruntime::contrib::ngram_details;
using namespace onnxruntime::ngram_details;
namespace std {
template <typename T>
@ -186,7 +184,6 @@ struct hash<NgramEntry<T>> {
} // namespace std
namespace onnxruntime {
namespace contrib {
// The weighting criteria.
// "TF"(term frequency),
@ -206,7 +203,7 @@ enum WeightingCriteria {
kTFIDF = 3
};
struct Ngram::Impl {
struct TfIdfVectorizer::Impl {
WeightingCriteria weighting_criteria_ = kNone;
int64_t max_gram_length_ = 0;
int64_t min_gram_length_ = 0;
@ -251,36 +248,36 @@ struct Ngram::Impl {
};
template <>
inline auto Ngram::Impl::PoolEnd<int64_t>() const {
inline auto TfIdfVectorizer::Impl::PoolEnd<int64_t>() const {
return int64_set_.cend();
}
template <>
inline auto Ngram::Impl::PoolEnd<int32_t>() const {
inline auto TfIdfVectorizer::Impl::PoolEnd<int32_t>() const {
return PoolEnd<int64_t>();
}
template <>
inline auto Ngram::Impl::PoolEnd<std::string>() const {
inline auto TfIdfVectorizer::Impl::PoolEnd<std::string>() const {
return str_set_.cend();
}
template <>
inline auto Ngram::Impl::PoolFind<int64_t>(const NgramEntry<int64_t>& i) const {
inline auto TfIdfVectorizer::Impl::PoolFind<int64_t>(const NgramEntry<int64_t>& i) const {
return int64_set_.find(i);
}
template <>
inline auto Ngram::Impl::PoolFind<int32_t>(const NgramEntry<int32_t>& i) const {
inline auto TfIdfVectorizer::Impl::PoolFind<int32_t>(const NgramEntry<int32_t>& i) const {
return int64_set_.find(i);
}
template <>
inline auto Ngram::Impl::PoolFind<std::string>(const NgramEntry<std::string>& i) const {
inline auto TfIdfVectorizer::Impl::PoolFind<std::string>(const NgramEntry<std::string>& i) const {
return str_set_.find(i);
}
Ngram::Ngram(const OpKernelInfo& info) : OpKernel(info), impl_(new Impl) {
TfIdfVectorizer::TfIdfVectorizer(const OpKernelInfo& info) : OpKernel(info), impl_(new Impl) {
std::string mode;
Status status = info.GetAttr("mode", &mode);
ORT_ENFORCE(status.IsOK(), "mode is required");
@ -381,10 +378,10 @@ Ngram::Ngram(const OpKernelInfo& info) : OpKernel(info), impl_(new Impl) {
}
}
Ngram::~Ngram() {
TfIdfVectorizer::~TfIdfVectorizer() {
}
void Ngram::OutputResult(OpKernelContext* ctx, size_t B, const std::vector<uint32_t>& frequences) const {
void TfIdfVectorizer::OutputResult(OpKernelContext* ctx, size_t B, const std::vector<uint32_t>& frequences) const {
const Impl& impl = *impl_;
std::vector<int64_t> output_dims;
if (B == 0) {
@ -437,7 +434,7 @@ void Ngram::OutputResult(OpKernelContext* ctx, size_t B, const std::vector<uint3
}
template <typename T>
Status Ngram::ComputeImpl(OpKernelContext* ctx) const {
Status TfIdfVectorizer::ComputeImpl(OpKernelContext* ctx) const {
const auto& impl = *impl_;
auto const set_end = impl.PoolEnd<T>();
@ -559,7 +556,7 @@ Status Ngram::ComputeImpl(OpKernelContext* ctx) const {
return Status::OK();
}
Status Ngram::Compute(OpKernelContext* ctx) const {
Status TfIdfVectorizer::Compute(OpKernelContext* ctx) const {
Status s;
auto X = ctx->Input<Tensor>(0);
@ -578,5 +575,4 @@ Status Ngram::Compute(OpKernelContext* ctx) const {
return s;
}
} // namespace contrib
} // namespace onnxruntime

View file

@ -9,13 +9,12 @@
#include <vector>
namespace onnxruntime {
namespace contrib {
class Ngram final : public OpKernel {
class TfIdfVectorizer final : public OpKernel {
public:
explicit Ngram(const OpKernelInfo& info);
~Ngram();
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Ngram);
explicit TfIdfVectorizer(const OpKernelInfo& info);
~TfIdfVectorizer();
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TfIdfVectorizer);
Status Compute(OpKernelContext* ctx) const override;
@ -30,5 +29,4 @@ class Ngram final : public OpKernel {
std::unique_ptr<Impl> impl_;
};
} // namespace contrib
} // namespace onnxruntime

View file

@ -344,14 +344,7 @@ int real_main(int argc, char* argv[]) {
{"cast_FLOAT_to_STRING", "Cast opset 9 not supported yet"},
{"cast_FLOAT_to_FLOAT16", "Cast opset 9 not supported yet"},
{"cast_FLOAT16_to_DOUBLE", "Cast opset 9 not supported yet"},
{"nonzero_example", "NonZero opset 9 not supported yet"},
{"tfidfvectorizer_tf_uniandbigrams_skip5", "TfIdfVectorizer opset 9 not supported yet"},
{"tfidfvectorizer_tf_batch_onlybigrams_skip0", "TfIdfVectorizer opset 9 not supported yet"},
{"tfidfvectorizer_tf_onlybigrams_skip5", "TfIdfVectorizer opset 9 not supported yet"},
{"tfidfvectorizer_tf_only_bigrams_skip0", "TfIdfVectorizer opset 9 not supported yet"},
{"tfidfvectorizer_tf_onlybigrams_levelempty", "TfIdfVectorizer opset 9 not supported yet"},
{"tfidfvectorizer_tf_batch_uniandbigrams_skip5", "TfIdfVectorizer opset 9 not supported yet"},
{"tfidfvectorizer_tf_batch_onlybigrams_skip5", "TfIdfVectorizer opset 9 not supported yet"}};
{"nonzero_example", "NonZero opset 9 not supported yet"}};
#ifdef USE_CUDA
broken_tests["maxpool_2d_default"] = "cudnn pooling only support input dimension >= 3";

View file

@ -8,10 +8,10 @@
namespace onnxruntime {
namespace test {
namespace ngram_test {
namespace tfidfvectorizer_test {
constexpr const char* domain = onnxruntime::kMSDomain;
const int opset_ver = 1;
constexpr const char* domain = kOnnxDomain;
const int opset_ver = 9;
void InitTestAttr(OpTester& test, const std::string& mode,
int64_t min_gram_length, int64_t max_gram_length, int64_t max_skip_count,
@ -36,9 +36,9 @@ void InitTestAttr(OpTester& test, const std::string& mode,
test.AddAttribute("pool_strings", pool_strings);
}
}
} // namespace ngram_test
} // namespace tfidfvectorizer_test
using namespace ngram_test;
using namespace tfidfvectorizer_test;
// Here is what takes place in general and in particular
// in this unit test.There are 7 n - grams : 4 unigrams and 3 bigrams
@ -48,8 +48,8 @@ using namespace ngram_test;
// However, attribute all controls whether we consider all of the supplied ngram[M..N] sizes
// into consideration or not.With all = false, we only consider N - grams.
TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_Skip0) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_Skip0) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=0, Min=Max=2, weights empty, int32
InitTestAttr(test, "TF", 2, 2, 0,
{0, 4},
@ -70,8 +70,8 @@ TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_Skip0) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, Int32_TF_BatchOnlyBigrams_Skip0) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, Int32_TF_BatchOnlyBigrams_Skip0) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=0, Min=Max=2, weights empty, int32
InitTestAttr(test, "TF", 2, 2, 0,
{0, 4},
@ -95,8 +95,8 @@ TEST(ContribOpNgramTest, Int32_TF_BatchOnlyBigrams_Skip0) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, String_TF_OnlyBigrams_Skip0) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, String_TF_OnlyBigrams_Skip0) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=0, Min=Max=2, weights empty, string
InitTestAttr(test, "TF", 2, 2, 0,
{0, 4},
@ -118,8 +118,8 @@ TEST(ContribOpNgramTest, String_TF_OnlyBigrams_Skip0) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, String_TF_BatchOnlyBigrams_Skip0) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, String_TF_BatchOnlyBigrams_Skip0) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=0, Min=Max=2, weights empty, string
InitTestAttr(test, "TF", 2, 2, 0,
{0, 4},
@ -145,8 +145,8 @@ TEST(ContribOpNgramTest, String_TF_BatchOnlyBigrams_Skip0) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_LevelEmpty) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_LevelEmpty) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=0, Min=Max=2, weights empty, int32
InitTestAttr(test, "TF", 2, 2, 0,
{0, 0}, // no unigrams, bi-grams start immediately
@ -171,8 +171,8 @@ TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_LevelEmpty) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, Int32_TF_onlyBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, Min=Max=2, weights empty, int32
InitTestAttr(test, "TF", 2, 2, 5,
{0, 4},
@ -195,8 +195,8 @@ TEST(ContribOpNgramTest, Int32_TF_onlyBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, Int32_TF_BatchOnlyBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, Int32_TF_BatchOnlyBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, , Min=Max=2, weights empty, int32
InitTestAttr(test, "TF", 2, 2, 5,
{0, 4},
@ -221,8 +221,8 @@ TEST(ContribOpNgramTest, Int32_TF_BatchOnlyBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, String_TF_onlyBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, String_TF_onlyBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, , Min=Max=2, weights empty, string
InitTestAttr(test, "TF", 2, 2, 5,
{0, 4},
@ -246,8 +246,8 @@ TEST(ContribOpNgramTest, String_TF_onlyBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, String_TF_BatchOnlyBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, String_TF_BatchOnlyBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, , Min=Max=2, weights empty, string
InitTestAttr(test, "TF", 2, 2, 5,
{0, 4},
@ -270,8 +270,8 @@ TEST(ContribOpNgramTest, String_TF_BatchOnlyBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, Int32_TF_UniAndBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, Int32_TF_UniAndBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, , Min=1, Max=2, weights empty, int32
InitTestAttr(test, "TF", 1, 2, 5,
{0, 4},
@ -293,8 +293,8 @@ TEST(ContribOpNgramTest, Int32_TF_UniAndBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, Int32_TF_BatchUniAndBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, Int32_TF_BatchUniAndBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, Min=1, Max=2, weights empty, int32
InitTestAttr(test, "TF", 1, 2, 5,
{0, 4},
@ -318,8 +318,8 @@ TEST(ContribOpNgramTest, Int32_TF_BatchUniAndBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, String_TF_UniAndBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, String_TF_UniAndBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, Min=1, Max=2, weights empty, string
InitTestAttr(test, "TF", 1, 2, 5,
{0, 4},
@ -341,8 +341,8 @@ TEST(ContribOpNgramTest, String_TF_UniAndBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, String_TF_BatchUniAndBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, String_TF_BatchUniAndBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, Min=1, Max=2, weights empty, string
InitTestAttr(test, "TF", 1, 2, 5,
{0, 4},
@ -366,8 +366,8 @@ TEST(ContribOpNgramTest, String_TF_BatchUniAndBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, Int32_IDF_onlyBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, Int32_IDF_onlyBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, Min=Max=2, weights empty, int32
// We change to IDF but do not supply weights so
// we should get all 1.0f where count is not zero
@ -390,8 +390,8 @@ TEST(ContribOpNgramTest, Int32_IDF_onlyBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, String_IDF_onlyBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, String_IDF_onlyBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, Min=Max=2, weights empty, string
InitTestAttr(test, "IDF", 2, 2, 5,
{0, 4},
@ -413,8 +413,8 @@ TEST(ContribOpNgramTest, String_IDF_onlyBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, Int32_TFIDF_onlyBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, Int32_TFIDF_onlyBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, Min=Max=2, weights empty, int32
// We change to TFIDF but do not supply weights so
// we should all get the original values as weights are 1.0f by
@ -438,8 +438,8 @@ TEST(ContribOpNgramTest, Int32_TFIDF_onlyBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, String_TFIDF_onlyBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, String_TFIDF_onlyBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, Min=Max=2, weights empty, string
InitTestAttr(test, "TFIDF", 2, 2, 5,
{0, 4},
@ -461,8 +461,8 @@ TEST(ContribOpNgramTest, String_TFIDF_onlyBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, Int32_IDFWeights_onlyBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, Int32_IDFWeights_onlyBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, Min=Max=2, weights specified, int32
// We change to IDF with supplied weights. All
// with non-zero counts must be replaced with the supplied weights
@ -485,8 +485,8 @@ TEST(ContribOpNgramTest, Int32_IDFWeights_onlyBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, String_IDFWeights_onlyBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, String_IDFWeights_onlyBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, Min=Max=2, weights specified, string
InitTestAttr(test, "IDF", 2, 2, 5,
{0, 4},
@ -508,8 +508,8 @@ TEST(ContribOpNgramTest, String_IDFWeights_onlyBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, Int32_TFIDFWeights_onlyBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, Int32_TFIDFWeights_onlyBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, Min=Max=2, weights specified, int32
// We change to TFIDF with supplied weights.
// We should have all counts scaled by weights
@ -532,8 +532,8 @@ TEST(ContribOpNgramTest, Int32_TFIDFWeights_onlyBigrams_Skip5) {
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(ContribOpNgramTest, String_TFIDFWeights_onlyBigrams_Skip5) {
OpTester test("Ngram", opset_ver, domain);
TEST(TfIdfVectorizerTest, String_TFIDFWeights_onlyBigrams_Skip5) {
OpTester test("TfIdfVectorizer", opset_ver, domain);
// s=5, Min=Max=2, weights specified, string
InitTestAttr(test, "TFIDF", 2, 2, 5,
{0, 4},