Integrate featurizers (#1573)

Added Sample Featurizer and Infrastructure
  Make featurizers and unit tests compile and run with GTest.
  Create definitions for the first featurizer kernel.
  Add new operator domain.
  Create datetime_transformer kernel and build.
  Move OPAQUE types definitions for featurizers kerneles out to a separate cc.
  Register them with the type system.
 Provide unit tests for new AutoML DateTimeTransformer kernel.
  Make necessary adjustments to the test infrastructure to make it run
  with new types.
This commit is contained in:
Dmitri Smirnov 2019-08-15 13:59:59 -07:00 committed by GitHub
parent 7545b795df
commit 17c8fe44e3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
36 changed files with 1607 additions and 6 deletions

View file

@ -53,6 +53,7 @@ option(onnxruntime_USE_NNAPI "Build with DNNLibrary for Android NNAPI support" O
option(onnxruntime_USE_MLAS "Use optimized blas library for GEMM and 2D Convolution" ON)
option(onnxruntime_USE_MKLDNN "Build with MKL-DNN support" OFF)
option(onnxruntime_USE_MKLML "Build MKL-DNN with MKL-ML binary dependency" OFF)
option(onnxruntime_USE_AUTOML "Build AutoML support" ON)
option(onnxruntime_USE_NGRAPH "Build with nGraph support" OFF)
option(onnxruntime_USE_OPENBLAS "Use openblas" OFF)
option(onnxruntime_DEV_MODE "Enable developer warnings and treat most of them as error." OFF)
@ -646,6 +647,12 @@ include(onnxruntime_optimizer.cmake)
include(onnxruntime_session.cmake)
include(onnxruntime_mlas.cmake)
if(onnxruntime_USE_AUTOML)
add_definitions(-DMICROSOFT_AUTOML)
# Build shared featurizer library
include(onnxruntime_automl_featurizers.cmake)
endif()
if(WIN32)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES Shlwapi)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES debug Dbghelp)

View file

@ -0,0 +1,44 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# This source code should not depend on the onnxruntime and may be built independently
file(GLOB automl_featurizers_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/automl/featurizers/src/FeaturizerPrep/*.h"
"${ONNXRUNTIME_ROOT}/core/automl/featurizers/src/FeaturizerPrep/Featurizers/*.h"
"${ONNXRUNTIME_ROOT}/core/automl/featurizers/src/FeaturizerPrep/Featurizers/*.cpp"
)
source_group(TREE ${ONNXRUNTIME_ROOT}/core/automl/ FILES ${onnxruntime_automl_featurizers_srcs})
add_library(automl_featurizers ${automl_featurizers_srcs})
target_include_directories(automl_featurizers PRIVATE ${ONNXRUNTIME_ROOT} PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
set_target_properties(automl_featurizers PROPERTIES FOLDER "AutoMLFeaturizers")
# Individual featurizers unit tests added at bulk
file(GLOB automl_featurizers_tests_srcs
"${ONNXRUNTIME_ROOT}/core/automl/featurizers/src/FeaturizerPrep/Featurizers/UnitTests/*.cpp"
)
list(APPEND automl_featurizers_tests_srcs
"${ONNXRUNTIME_ROOT}/core/automl/featurizers/src/FeaturizerPrep/UnitTests/Traits_UnitTests.cpp"
"${ONNXRUNTIME_ROOT}/core/automl/featurizers/src/FeaturizerPrep/UnitTests/Featurizer_UnitTest.cpp"
"${ONNXRUNTIME_ROOT}/core/automl/featurizers/src/FeaturizerPrep/UnitTests/test_main.cpp"
)
add_executable(automl_featurizers_unittests ${automl_featurizers_tests_srcs})
add_dependencies(automl_featurizers_unittests automl_featurizers)
target_link_libraries(automl_featurizers_unittests PRIVATE gtest automl_featurizers)
source_group(TREE ${ONNXRUNTIME_ROOT}/core/automl/ FILES ${automl_featurizers_tests_srcs})
set_target_properties(automl_featurizers_unittests PROPERTIES FOLDER "AutoMLFeaturizers")
add_test(NAME automl_featurizers_unittests
COMMAND automl_featurizers_unittests
WORKING_DIRECTORY $<TARGET_FILE_DIR:automl_featurizers_unittests>
)
if (WIN32)
# Add Code Analysis properties to enable C++ Core checks. Have to do it via a props file include.
set_target_properties(automl_featurizers PROPERTIES VS_USER_PROPS ${PROJECT_SOURCE_DIR}/ConfigureVisualStudioCodeAnalysis.props)
endif()

View file

@ -14,6 +14,13 @@ if (onnxruntime_DISABLE_CONTRIB_OPS)
)
endif()
if(NOT onnxruntime_USE_AUTOML)
list(REMOVE_ITEM onnxruntime_graph_src
"${ONNXRUNTIME_ROOT}/core/graph/automl_ops/*.h"
"${ONNXRUNTIME_ROOT}/core/graph/automl_ops/*.cc"
)
endif()
file(GLOB_RECURSE onnxruntime_ir_defs_src CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/defs/*.cc"
)
@ -21,6 +28,7 @@ file(GLOB_RECURSE onnxruntime_ir_defs_src CONFIGURE_DEPENDS
add_library(onnxruntime_graph ${onnxruntime_graph_src} ${onnxruntime_ir_defs_src})
add_dependencies(onnxruntime_graph onnx_proto gsl)
onnxruntime_add_include_to_target(onnxruntime_graph onnxruntime_common gsl onnx onnx_proto protobuf::libprotobuf)
target_include_directories(onnxruntime_graph PRIVATE ${ONNXRUNTIME_ROOT})
set_target_properties(onnxruntime_graph PROPERTIES FOLDER "ONNXRuntime")
set_target_properties(onnxruntime_graph PROPERTIES LINKER_LANGUAGE CXX)

View file

@ -25,6 +25,16 @@ file(GLOB_RECURSE onnxruntime_cuda_contrib_ops_cu_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/contrib_ops/cuda/*.cuh"
)
file(GLOB onnxruntime_cpu_automl_cc_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/automl_ops/cpu_automl_kernels.h"
"${ONNXRUNTIME_ROOT}/automl_ops/cpu_automl_kernels.cc"
"${ONNXRUNTIME_ROOT}/automl_ops/automl_types.h"
"${ONNXRUNTIME_ROOT}/automl_ops/automl_types.cc"
"${ONNXRUNTIME_ROOT}/automl_ops/automl_featurizers.h"
"${ONNXRUNTIME_ROOT}/automl_ops/cpu/*.h"
"${ONNXRUNTIME_ROOT}/automl_ops/cpu/*.cc"
)
file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/*.cc"
@ -55,17 +65,30 @@ if(onnxruntime_USE_NNAPI)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES nnapi)
endif()
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_common_srcs} ${onnxruntime_providers_srcs})
# add using ONNXRUNTIME_ROOT so they show up under the 'contrib_ops' folder in Visual Studio
source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_cpu_contrib_ops_srcs})
set(onnxruntime_providers_src ${onnxruntime_providers_common_srcs} ${onnxruntime_providers_srcs})
# disable contrib ops conditionally
if(onnxruntime_DISABLE_CONTRIB_OPS)
add_library(onnxruntime_providers ${onnxruntime_providers_common_srcs} ${onnxruntime_providers_srcs})
else()
add_library(onnxruntime_providers ${onnxruntime_providers_common_srcs} ${onnxruntime_providers_srcs} ${onnxruntime_cpu_contrib_ops_srcs})
if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
# add using ONNXRUNTIME_ROOT so they show up under the 'contrib_ops' folder in Visual Studio
source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_cpu_contrib_ops_srcs})
list(APPEND onnxruntime_providers_src ${onnxruntime_cpu_contrib_ops_srcs})
endif()
if (onnxruntime_USE_AUTOML)
source_group(TREE ${ONNXRUNTIME_ROOT}/ FILES ${onnxruntime_cpu_automl_cc_srcs})
list(APPEND onnxruntime_providers_src ${onnxruntime_cpu_automl_cc_srcs})
endif()
add_library(onnxruntime_providers ${onnxruntime_providers_src})
onnxruntime_add_include_to_target(onnxruntime_providers onnxruntime_common onnxruntime_framework gsl onnx onnx_proto protobuf::libprotobuf)
if (onnxruntime_USE_AUTOML)
add_dependencies(onnxruntime_providers automl_featurizers)
onnxruntime_add_include_to_target(onnxruntime_providers automl_featurizers)
target_link_libraries(onnxruntime_providers automl_featurizers)
endif()
if(HAS_DEPRECATED_COPY)
#temporarily ignore this warning
#see: https://en.wikipedia.org/wiki/Rule_of_three_(C%2B%2B_programming)

View file

@ -126,6 +126,12 @@ if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
"${TEST_SRC_DIR}/contrib_ops/*.cc")
endif()
if(onnxruntime_USE_AUTOML)
list(APPEND onnxruntime_test_providers_src_patterns
"${TEST_SRC_DIR}/automl_ops/*.h"
"${TEST_SRC_DIR}/automl_ops/*.cc")
endif()
file(GLOB onnxruntime_test_providers_src CONFIGURE_DEPENDS
${onnxruntime_test_providers_src_patterns})
file(GLOB_RECURSE onnxruntime_test_providers_cpu_src CONFIGURE_DEPENDS
@ -209,6 +215,10 @@ if(onnxruntime_USE_NNAPI)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_nnapi)
endif()
if(onnxruntime_USE_AUTOML)
list(APPEND onnxruntime_test_providers_dependencies automl_featurizers)
endif()
file(GLOB_RECURSE onnxruntime_test_tvm_src CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/test/tvm/*.h"
"${ONNXRUNTIME_ROOT}/test/tvm/*.cc"

View file

@ -231,6 +231,11 @@ template <typename T>
KernelCreateInfo BuildKernelCreateInfo();
} // namespace contrib
namespace automl {
template <typename T>
KernelCreateInfo BuildKernelCreateInfo();
} // namespace automl
namespace contrib {
namespace cuda {
template <typename T>

View file

@ -19,6 +19,7 @@ constexpr const char* kOnnxDomainAlias = "ai.onnx";
constexpr const char* kMLDomain = "ai.onnx.ml";
constexpr const char* kMSDomain = "com.microsoft";
constexpr const char* kMSNchwcDomain = "com.microsoft.nchwc";
constexpr const char* kMSAutoMLDomain = "com.microsoft.automl";
constexpr const char* kNGraphDomain = "com.intel.ai";
constexpr const char* kCpuExecutionProvider = "CPUExecutionProvider";
constexpr const char* kCudaExecutionProvider = "CUDAExecutionProvider";

View file

@ -0,0 +1,8 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// Cumulative header with automl featurizers includes exposed to
// ORT
#pragma once
#include "core/automl/featurizers/src/FeaturizerPrep/Featurizers/DateTimeFeaturizer.h"

View file

@ -0,0 +1,39 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "core/common/common.h"
#include "core/framework/data_types.h"
#include "core/framework/op_kernel.h"
#include "automl_ops/automl_types.h"
#include "automl_ops/automl_featurizers.h"
namespace dtf = Microsoft::Featurizer::DateTimeFeaturizer;
namespace onnxruntime {
// This temporary to register custom types so ORT is aware of it
// although it still can not serialize such a type.
// These character arrays must be extern so the resulting instantiated template
// is globally unique
extern const char kMsAutoMLDomain[] = "com.microsoft.automl";
extern const char kTimepointName[] = "DateTimeFeaturizer_TimePoint";
// This has to be under onnxruntime to properly specialize a function template
ORT_REGISTER_OPAQUE_TYPE(dtf::TimePoint, kMsAutoMLDomain, kTimepointName);
namespace automl {
#define REGISTER_CUSTOM_PROTO(TYPE, reg_fn) \
{ \
MLDataType mltype = DataTypeImpl::GetType<TYPE>(); \
reg_fn(mltype); \
}
void RegisterAutoMLTypes(const std::function<void(MLDataType)>& reg_fn) {
REGISTER_CUSTOM_PROTO(dtf::TimePoint, reg_fn);
}
#undef REGISTER_CUSTOM_PROTO
} // namespace automl
} // namespace onnxruntime

View file

@ -0,0 +1,13 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include "core/framework/data_types.h"
#include <functional>
namespace onnxruntime {
namespace automl {
void RegisterAutoMLTypes(const std::function<void(MLDataType)>& reg_fn);
} // namespace automl
} // namespace onnxruntime

View file

@ -0,0 +1,42 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "core/common/common.h"
#include "core/framework/data_types.h"
#include "core/framework/op_kernel.h"
#include "core/automl/featurizers/src/FeaturizerPrep/Featurizers/DateTimeFeaturizer.h"
namespace dtf = Microsoft::Featurizer::DateTimeFeaturizer;
namespace onnxruntime {
namespace automl {
class DateTimeTransformer final : public OpKernel {
public:
explicit DateTimeTransformer(const OpKernelInfo& info) : OpKernel(info) {}
Status Compute(OpKernelContext* context) const override;
};
Status DateTimeTransformer::Compute(OpKernelContext* ctx) const {
Status s;
auto input_tensor = ctx->Input<Tensor>(0);
dtf::TimePoint* output = ctx->Output<dtf::TimePoint>(0);
int64_t tp = *input_tensor->Data<int64_t>();
std::chrono::system_clock::time_point sys_time{std::chrono::seconds(tp)};
*output = std::move(dtf::SystemToDPTimePoint(sys_time));
return s;
}
ONNX_OPERATOR_KERNEL_EX(
DateTimeTransformer,
kMSAutoMLDomain,
1,
kCpuExecutionProvider,
KernelDefBuilder()
.TypeConstraint("T1", DataTypeImpl::GetTensorType<int64_t>())
.TypeConstraint("T2", DataTypeImpl::GetType<Microsoft::Featurizer::DateTimeFeaturizer::TimePoint>()),
DateTimeTransformer);
} // namespace automl
} // namespace onnxruntime

View file

@ -0,0 +1,25 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "automl_ops/cpu_automl_kernels.h"
#include "core/graph/constants.h"
#include "core/framework/data_types.h"
namespace onnxruntime {
namespace automl {
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSAutoMLDomain, 1, DateTimeTransformer);
void RegisterCpuAutoMLKernels(KernelRegistry& kernel_registry) {
static const BuildKernelCreateInfoFn function_table[] = {
// add more kernels here
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSAutoMLDomain, 1, DateTimeTransformer)>
};
for (auto& function_table_entry : function_table) {
kernel_registry.Register(function_table_entry());
}
}
} // namespace automl
} // namespace onnxruntime

View file

@ -0,0 +1,13 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include "core/framework/op_kernel.h"
#include "core/framework/kernel_registry.h"
namespace onnxruntime {
namespace automl {
void RegisterCpuAutoMLKernels(KernelRegistry& kernel_registry);
} // namespace automl
} // namespace onnxruntime

View file

@ -0,0 +1,163 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#pragma once
#include <memory>
#include <tuple>
namespace Microsoft {
namespace Featurizer {
/////////////////////////////////////////////////////////////////////////
/// \class Transformer
/// \brief Transforms a single "value" and output the result.
/// A value can be anything from an integer to a collection
/// of integers.
///
template <typename ReturnT, typename ArgT>
class Transformer {
public:
// ----------------------------------------------------------------------
// | Public Types
using return_type = ReturnT;
using arg_type = ArgT;
using transformer_type = Transformer<ReturnT, ArgT>;
// ----------------------------------------------------------------------
// | Public Methods
Transformer(void) = default;
virtual ~Transformer(void) = default;
Transformer(Transformer const &) = delete;
Transformer & operator =(Transformer const &) = delete;
Transformer(Transformer &&) = default;
Transformer & operator =(Transformer &&) = delete;
virtual return_type transform(arg_type const &arg) const = 0;
private:
// ----------------------------------------------------------------------
// | Private Methods
template <typename ArchiveT>
void serialize(ArchiveT &, unsigned int const /*version*/);
};
/////////////////////////////////////////////////////////////////////////
/// \class Estimator
/// \brief Collects state over a collection of data, then produces
/// a `Transformer` that is able to operate on that collected
/// state.
///
template <typename ReturnT, typename ArgT>
class Estimator {
public:
// ----------------------------------------------------------------------
// | Public Types
using transformer_type = Transformer<ReturnT, ArgT>;
using TransformerUniquePtr = std::unique_ptr<transformer_type>;
using estimator_type = Estimator<ReturnT, ArgT>;
using apache_arrow = unsigned long; // TODO: Temp type as we figure out what will eventually be here
// ----------------------------------------------------------------------
// | Public Methods
Estimator(void) = default;
virtual ~Estimator(void) = default;
Estimator(Estimator const &) = delete;
Estimator & operator =(Estimator const &) = delete;
Estimator(Estimator &&) = default;
Estimator & operator =(Estimator &&) = delete;
// This method can be called repeatedly in the support of streaming scenarios
Estimator & fit(apache_arrow const &data);
// Calls to `commit` are destructive - all previously generated state should
// be reset. `Estimator` objects that want to share state prior to calls to commit
// should implement a `copy` method.
TransformerUniquePtr commit(void);
private:
// ----------------------------------------------------------------------
// | Private Data
bool _committed = false;
// ----------------------------------------------------------------------
// | Private Methods
template <typename ArchiveT>
void serialize(ArchiveT &, unsigned int const /*version*/);
virtual Estimator & fit_impl(apache_arrow const &data) = 0;
virtual TransformerUniquePtr commit_impl(void) = 0;
};
template <typename EstimatorT, typename... EstimatorConstructorArgsT>
typename EstimatorT::TransformerUniquePtr fit_and_commit(typename EstimatorT::apache_arrow const &data, EstimatorConstructorArgsT &&...args);
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// |
// | Implementation
// |
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// |
// | Transformer
// |
// ----------------------------------------------------------------------
template <typename ReturnT, typename ArgT>
template <typename ArchiveT>
void Transformer<ReturnT, ArgT>::serialize(ArchiveT & /*ar*/, unsigned int const /*version*/) {
}
// ----------------------------------------------------------------------
// |
// | Estimator
// |
// ----------------------------------------------------------------------
template <typename ReturnT, typename ArgT>
Estimator<ReturnT, ArgT> & Estimator<ReturnT, ArgT>::fit(apache_arrow const &data) {
if(_committed)
throw std::runtime_error("This instance has already been committed");
return fit_impl(data);
}
template <typename ReturnT, typename ArgT>
typename Estimator<ReturnT, ArgT>::TransformerUniquePtr Estimator<ReturnT, ArgT>::commit(void) {
if(_committed)
throw std::runtime_error("This instance has already been committed");
TransformerUniquePtr result(commit_impl());
if(!result)
throw std::runtime_error("Invalid result");
_committed = true;
return result;
}
template <typename ReturnT, typename ArgT>
template <typename ArchiveT>
void Estimator<ReturnT, ArgT>::serialize(ArchiveT & /*ar*/, unsigned int const /*version*/) {
}
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
template <typename EstimatorT, typename... EstimatorConstructorArgsT>
typename EstimatorT::TransformerUniquePtr fit_and_commit(typename EstimatorT::apache_arrow const &data, EstimatorConstructorArgsT &&...args) {
return EstimatorT(std::forward<EstimatorConstructorArgsT>(args)...).fit(data).commit();
}
} // namespace Featurizer
} // namespace Microsoft

View file

@ -0,0 +1,56 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#include "DateTimeFeaturizer.h"
#ifdef _MSC_VER
inline struct tm *gmtime_r(time_t const* const timer, struct tm* const result) {
return gmtime_s(result, timer) == 0 ? result : nullptr;
}
#endif
namespace Microsoft {
namespace Featurizer {
namespace DateTimeFeaturizer {
TimePoint::TimePoint(const std::chrono::system_clock::time_point& sysTime) {
// Get to a tm to get what we need.
// Eventually C++202x will have expanded chrono support that might
// have what we need, but not yet!
std::tm tmt;
time_t tt = std::chrono::system_clock::to_time_t(sysTime);
std::tm* res = gmtime_r(&tt, &tmt);
if (res) {
year = static_cast<std::int32_t>(tmt.tm_year) + 1900;
month = static_cast<std::uint8_t>(tmt.tm_mon) + 1;
day = static_cast<std::uint8_t>(tmt.tm_mday);
hour = static_cast<std::uint8_t>(tmt.tm_hour);
minute = static_cast<std::uint8_t>(tmt.tm_min);
second = static_cast<std::uint8_t>(tmt.tm_sec);
dayOfWeek = static_cast<std::uint8_t>(tmt.tm_wday);
dayOfYear = static_cast<std::uint16_t>(tmt.tm_yday);
quarterOfYear = (month + 2) / 3;
weekOfMonth = (day - 1) / 7;
}
else
{
if (tt < 0) {
throw std::invalid_argument("Dates prior to 1970 are not supported.");
}
else {
throw std::invalid_argument("Unknown error converting input date.");
}
}
}
Transformer::return_type Transformer::transform(arg_type const &arg) const /*override*/ {
return Microsoft::Featurizer::DateTimeFeaturizer::TimePoint(arg);
}
} // namespace DateTimeFeaturizer
} // namespace Featurizer
} // namespace Microsoft

View file

@ -0,0 +1,101 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#pragma once
#include "../Featurizer.h"
#include <chrono>
#include <ctime>
#include <cstdint>
#include <stdexcept>
namespace Microsoft {
namespace Featurizer {
/////////////////////////////////////////////////////////////////////////
/// \namespace DateTimeTransformer
/// \brief A Transformer that takes a chrono::system_clock::time_point and
/// returns a struct with all the data split out.
///
namespace DateTimeFeaturizer {
/////////////////////////////////////////////////////////////////////////
/// \struct TimePoint
/// \brief Struct to hold various components of DateTime information
///
struct TimePoint {
std::int32_t year = 0;
std::uint8_t month = 0; /* 1-12 */
std::uint8_t day = 0; /* 1-31 */
std::uint8_t hour = 0; /* 0-23 */
std::uint8_t minute = 0; /* 0-59 */
std::uint8_t second = 0; /* 0-59 */
std::uint8_t dayOfWeek = 0; /* 0-6 */
std::uint16_t dayOfYear = 0; /* 0-365 */
std::uint8_t quarterOfYear = 0; /* 1-4 */
std::uint8_t weekOfMonth = 0; /* 0-4 */
// Need default __ctor to satisfy ORT type system
TimePoint() = default;
TimePoint(const std::chrono::system_clock::time_point& sysTime);
TimePoint(TimePoint&&) = default;
TimePoint& operator=(TimePoint&&) = default;
TimePoint(const TimePoint&) = delete;
TimePoint& operator=(const TimePoint&) = delete;
bool operator==(const TimePoint& o) const {
return year == o.year &&
month == o.month &&
day == o.day &&
hour == o.hour &&
minute == o.minute &&
second == o.second &&
dayOfWeek == o.dayOfWeek &&
dayOfYear == o.dayOfYear &&
quarterOfYear == o.quarterOfYear &&
weekOfMonth == o.weekOfMonth;
}
enum {
JANUARY = 1, FEBRUARY, MARCH, APRIL, MAY, JUNE,
JULY, AUGUST, SEPTEMBER, OCTOBER, NOVEMBER, DECEMBER
};
enum {
SUNDAY = 0, MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY
};
};
inline TimePoint SystemToDPTimePoint(const std::chrono::system_clock::time_point& sysTime) {
return TimePoint (sysTime);
}
/////////////////////////////////////////////////////////////////////////
/// \class DateTimeTransformer
/// \brief Transformer
///
class Transformer : public Microsoft::Featurizer::Transformer<Microsoft::Featurizer::DateTimeFeaturizer::TimePoint, std::chrono::system_clock::time_point> {
public:
Transformer(void) = default;
~Transformer(void) override = default;
Transformer(Transformer const &) = delete;
Transformer & operator =(Transformer const &) = delete;
Transformer(Transformer &&) = default;
Transformer & operator =(Transformer &&) = delete;
return_type transform(arg_type const &arg) const override;
private:
// ----------------------------------------------------------------------
// | Private Methods
template <typename ArchiveT>
void serialize(ArchiveT &ar, unsigned int const version);
};
} // Namespace DateTimeFeaturizer
} // Namespace Featurizer
} // Namespace Microsoft

View file

@ -0,0 +1,40 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#include "SampleAdd.h"
namespace Microsoft {
namespace Featurizer {
namespace SampleAdd {
// ----------------------------------------------------------------------
// |
// | Transformer
// |
// ----------------------------------------------------------------------
Transformer::Transformer(std::uint16_t delta) :
_delta(delta) {
}
Transformer::return_type Transformer::transform(arg_type const &arg) const /*override*/ {
return _delta + arg;
}
// ----------------------------------------------------------------------
// |
// | Estimator
// |
// ----------------------------------------------------------------------
Estimator & Estimator::fit_impl(apache_arrow const &data) /*override*/ {
_accumulated_delta += static_cast<std::uint16_t>(data);
return *this;
}
Estimator::TransformerUniquePtr Estimator::commit_impl(void) /*override*/ {
return std::make_unique<SampleAdd::Transformer>(static_cast<std::uint16_t>(_accumulated_delta));
}
} // namespace SampleAdd
} // namespace Featurizer
} // namespace Microsoft

View file

@ -0,0 +1,118 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#pragma once
#include "../Featurizer.h"
namespace Microsoft {
namespace Featurizer {
/////////////////////////////////////////////////////////////////////////
/// \namespace SampleAdd
/// \brief A Transformer and Estimator that add values. This is a
/// sample intended to demonstrate patterns within the
/// implementation of these types.
///
namespace SampleAdd {
/////////////////////////////////////////////////////////////////////////
/// \class Transformer
/// \brief Transformer that adds an integer value to a saved delta
/// and returns the result.
///
class Transformer : public Microsoft::Featurizer::Transformer<std::uint32_t, std::uint16_t> {
public:
// ----------------------------------------------------------------------
// | Public Methods
Transformer(std::uint16_t delta=0);
~Transformer(void) override = default;
Transformer(Transformer const &) = delete;
Transformer & operator =(Transformer const &) = delete;
Transformer(Transformer &&) = default;
Transformer & operator =(Transformer &&) = delete;
return_type transform(arg_type const &arg) const override;
private:
// ----------------------------------------------------------------------
// | Private Data
std::uint32_t const _delta;
// ----------------------------------------------------------------------
// | Private Methods
template <typename ArchiveT>
void serialize(ArchiveT &ar, unsigned int const version);
};
/////////////////////////////////////////////////////////////////////////
/// \class Estimator
/// \brief Estimator that accumulates a delta value and then
/// creates a Transformer with than value when requested.
///
class Estimator : public Microsoft::Featurizer::Estimator<std::uint32_t, std::uint16_t> {
public:
// ----------------------------------------------------------------------
// | Public Methods
Estimator(void) = default;
~Estimator(void) override = default;
Estimator(Estimator const &) = delete;
Estimator & operator =(Estimator const &) = delete;
Estimator(Estimator &&) = default;
Estimator & operator =(Estimator &&) = delete;
private:
// ----------------------------------------------------------------------
// | Private Data
std::uint32_t _accumulated_delta = 0;
// ----------------------------------------------------------------------
// | Private Methods
template <typename ArchiveT>
void serialize(ArchiveT &ar, unsigned int const version);
Estimator & fit_impl(apache_arrow const &data) override;
TransformerUniquePtr commit_impl(void) override;
};
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// |
// | Implementation
// |
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// |
// | Transformer
// |
// ----------------------------------------------------------------------
template <typename ArchiveT>
void Transformer::serialize(ArchiveT &ar, unsigned int const version) {
ar & boost::serialization::base_object<Microsoft::Featurizer::Transformer>(*this);
ar & boost::serialization::make_nvp("delta", _delta);
}
// ----------------------------------------------------------------------
// |
// | Estimator
// |
// ----------------------------------------------------------------------
template <typename ArchiveT>
void Estimator::serialize(ArchiveT &ar, unsigned int const version) {
ar & boost::serialization::base_object<Microsoft::Featurizer::Estimator>(*this);
ar & boost::serialization::make_nvp("accumulated_delta", _accumulated_delta);
}
} // namespace SampleAdd
} // namespace Featurizer
} // namespace Microsoft

View file

@ -0,0 +1,48 @@
# ----------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License
# ----------------------------------------------------------------------
cmake_minimum_required(VERSION 3.5.0)
project(Featurizer_UnitTests LANGUAGES CXX)
set(CMAKE_MODULE_PATH "$ENV{DEVELOPMENT_ENVIRONMENT_CMAKE_MODULE_PATH}")
if(NOT WIN32)
string(REPLACE ":" ";" CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH}")
string(REPLACE ":" ";" _includes "$ENV{INCLUDE}")
string(REPLACE ":" ";" _libs "$ENV{LIB}")
endif()
set(CppCommon_STATIC_CRT ON CACHE BOOL "" FORCE)
set(BoostCommon_HEADER_ONLY ON CACHE BOOL "" FORCE)
include(CppCommon)
include(BoostCommon)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
add_library(libFeaturizers STATIC
../SampleAdd.h
../SampleAdd.cpp
../DateTimeFeaturizer.h
../DateTimeFeaturizer.cpp
)
enable_testing()
foreach(_test_name IN ITEMS
SampleAdd_UnitTest
DateTimeFeaturizer_UnitTests
)
add_executable(${_test_name} ${_test_name}.cpp)
target_include_directories(${_test_name} PRIVATE ${_includes})
target_link_directories(${_test_name} PRIVATE ${_libs})
target_link_libraries(${_test_name} PRIVATE ${Boost_LIBRARIES} libFeaturizers)
add_test(NAME ${_test_name} COMMAND ${_test_name} --success)
endforeach()

View file

@ -0,0 +1,125 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#define CATCH_CONFIG_MAIN
#include <cstdio>
#include "gtest/gtest.h"
#include "../DateTimeFeaturizer.h"
namespace Microsoft {
namespace Featurizer {
namespace DateTimeFeaturizer {
using SysClock = std::chrono::system_clock;
TEST(DateTimeFeaturizer_DateTime, Past_1976_Nov_17__12_27_04) {
const time_t date = 217081624;
SysClock::time_point stp = SysClock::from_time_t(date);
// Constructor
TimePoint tp(stp);
ASSERT_TRUE(tp.year == 1976);
ASSERT_TRUE(tp.month == TimePoint::NOVEMBER);
ASSERT_TRUE(tp.day == 17);
ASSERT_TRUE(tp.hour == 12);
ASSERT_TRUE(tp.minute == 27);
ASSERT_TRUE(tp.second == 4);
ASSERT_TRUE(tp.dayOfWeek == TimePoint::WEDNESDAY);
ASSERT_TRUE(tp.dayOfYear == 321);
ASSERT_TRUE(tp.quarterOfYear == 4);
ASSERT_TRUE(tp.weekOfMonth == 2);
// assignment
TimePoint tp1 = stp;
ASSERT_TRUE(tp1.year == 1976);
ASSERT_TRUE(tp1.month == TimePoint::NOVEMBER);
ASSERT_TRUE(tp1.day == 17);
// function
TimePoint tp2 = SystemToDPTimePoint(stp);
ASSERT_TRUE(tp2.year == 1976);
ASSERT_TRUE(tp2.month == TimePoint::NOVEMBER);
ASSERT_TRUE(tp2.day == 17);
}
TEST(DateTimeFeaturizer_Transformer , Past_1976_Nov_17__12_27_05) {
const time_t date = 217081625;
SysClock::time_point stp = SysClock::from_time_t(date);
Transformer dt;
TimePoint tp = dt.transform(stp);
ASSERT_TRUE(tp.year == 1976);
ASSERT_TRUE(tp.month == TimePoint::NOVEMBER);
ASSERT_TRUE(tp.day == 17);
ASSERT_TRUE(tp.hour == 12);
ASSERT_TRUE(tp.minute == 27);
ASSERT_TRUE(tp.second == 5);
ASSERT_TRUE(tp.dayOfWeek == TimePoint::WEDNESDAY);
ASSERT_TRUE(tp.dayOfYear == 321);
ASSERT_TRUE(tp.quarterOfYear == 4);
ASSERT_TRUE(tp.weekOfMonth == 2);
}
TEST(DateTimeFeaturizer_Transformer , Future_2025_June_30) {
const time_t date = 1751241600;
SysClock::time_point stp = SysClock::from_time_t(date);
Transformer dt;
TimePoint tp = dt.transform(stp);
ASSERT_TRUE(tp.year == 2025);
ASSERT_TRUE(tp.month == TimePoint::JUNE);
ASSERT_TRUE(tp.day == 30);
ASSERT_TRUE(tp.hour == 0);
ASSERT_TRUE(tp.minute == 0);
ASSERT_TRUE(tp.second == 0);
ASSERT_TRUE(tp.dayOfWeek == TimePoint::MONDAY);
ASSERT_TRUE(tp.dayOfYear == 180);
ASSERT_TRUE(tp.quarterOfYear == 2);
ASSERT_TRUE(tp.weekOfMonth == 4);
}
#ifdef _MSC_VER
// others define system_clock::time_point as nanoseconds (64-bit),
// which rolls over somewhere around 2260. Still a couple hundred years!
TEST(DateTimeFeaturizer_Transformer , Far_Future__2998_March_2__14_03_02) {
const time_t date = 32445842582;
SysClock::time_point stp = SysClock::from_time_t(date);
Transformer dt;
TimePoint tp = dt.transform(stp);
ASSERT_TRUE(tp.year == 2998);
ASSERT_TRUE(tp.month == TimePoint::MARCH);
ASSERT_TRUE(tp.day == 2);
ASSERT_TRUE(tp.hour == 14);
ASSERT_TRUE(tp.minute == 3);
ASSERT_TRUE(tp.second == 2);
ASSERT_TRUE(tp.dayOfWeek == TimePoint::FRIDAY);
ASSERT_TRUE(tp.dayOfYear == 60);
ASSERT_TRUE(tp.quarterOfYear == 1);
ASSERT_TRUE(tp.weekOfMonth == 0);
}
#else
// msvcrt doesn't support negative time_t, so nothing before 1970
TEST(DateTimeFeaturizer_Transformer, Pre_Epoch__1776_July_4) {
const time_t date = -6106060800;
SysClock::time_point stp = SysClock::from_time_t(date);
// Constructor
Transformer dt;
TimePoint tp = dt.transform(stp);
ASSERT_TRUE(tp.year == 1776);
ASSERT_TRUE(tp.month == TimePoint::JULY);
ASSERT_TRUE(tp.day == 4);
}
#endif /* _MSC_VER */
} // namespace DateTimeFeaturizer
} // namespace Featurizer
} // namespace Microsoft

View file

@ -0,0 +1,22 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#define CATCH_CONFIG_MAIN
#include "gtest/gtest.h"
#include "../SampleAdd.h"
TEST(SampleAddTests, Transformer) {
ASSERT_TRUE(Microsoft::Featurizer::SampleAdd::Transformer(10).transform(20) == 30);
ASSERT_TRUE(Microsoft::Featurizer::SampleAdd::Transformer(20).transform(1) == 21);
}
TEST(SampleAddTests, Estimator) {
ASSERT_TRUE(Microsoft::Featurizer::SampleAdd::Estimator().fit(10).commit()->transform(20) == 30);
ASSERT_TRUE(Microsoft::Featurizer::SampleAdd::Estimator().fit(20).commit()->transform(1) == 21);
ASSERT_TRUE(Microsoft::Featurizer::SampleAdd::Estimator().fit(10).fit(20).commit()->transform(20) == 50);
ASSERT_TRUE(Microsoft::Featurizer::SampleAdd::Estimator().fit(10).fit(20).fit(30).commit()->transform(20) == 80);
}

View file

@ -0,0 +1,5 @@
filter:
includes:
- Microsoft::Featurizer::*
excludes:
- std::*

View file

@ -0,0 +1,217 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#pragma once
#include <array>
#include <functional>
#include <map>
#include <vector>
namespace Microsoft {
namespace Featurizer {
namespace Traits {
// XXX: Define the type
template<class T>
struct Nullable {};
/////////////////////////////////////////////////////////////////////////
/// \namespace Traits
/// \brief We have a range of of types we are dealing with. Many types
/// have different ways to represent what a `NULL` value is
/// (float has NAN for example) as well as different ways to
/// convert the value to a string representation. By using
/// templates combined with partial template specialization
/// we can handle scenarios like these that vary based on the data type.
///
/// Example: This allows us to do things like `Traits<std::int8_t>::IsNull()`
/// and `Traits<float>::IsNull()` and let the trait itself deal with the
/// actual implementation and allows us as developers to not worry about that.
///
/// This benefit is magnified because we are also using templates for our
/// transformers. When we declare that a transformer has type T = std::int8_t,
/// we can then also use `Traits<T>::IsNull()` and the compiler will know that
/// `T` is a `std::int8_t` and call the appropate template specialization.
///
template <typename T>
struct Traits {};
/////////////////////////////////////////////////////////////////////////
/// \namespace Traits
/// \brief When using partial template specilization, if the compiler
/// cannot find a more specfic implementation of the template
/// it will fall back to the base template and use whatever is
/// defined there. If you have methods defined in that base template,
/// it makes it very difficult to debug what is going on. By
/// putting no implementation in the `Traits<>` template and
/// having the real base struct be `TraitsImpl<>`, if you try and
/// specify a trait that doesn't have a specilization, the compiler
/// can detect that and throw an error during compilation.
///
/// Example: There is no template `Traits<char>`. If you try and use it
/// the compiler will fall back to the `Traits<>` struct which has no methods
/// defined. Trying to then use `Traits<char>` will cause a compile time error
/// letting you know something isn't correct.
///
template <typename T>
struct TraitsImpl {
using nullable_type = Nullable<T>;
static bool IsNull(nullable_type const& value) {
return !value.is_initialized();
}
};
template <>
struct Traits<float> : public TraitsImpl<float> {
using nullable_type = float;
static bool IsNull(nullable_type const& value) {
return isnan(value);
}
// static std::string ToString(nullable_type const& value) {
// return std::to_string(value);
// }
};
template <>
struct Traits<double> : public TraitsImpl<double> {
using nullable_type = double;
static bool IsNull(nullable_type const& value) {
return isnan(value);
}
// static std::string ToString(nullable_type const& value) {
// return std::to_string(value);
// }
};
template <>
struct Traits<std::int8_t> : public TraitsImpl<std::int8_t> {
// static std::string ToString(std::int8_t const& value) {
// return std::to_string(value);
// }
};
template <>
struct Traits<std::int16_t> : public TraitsImpl<std::int16_t> {
// static std::string ToString(std::int16_t const& value) {
// return std::to_string(value);
// }
};
template <>
struct Traits<std::int32_t> : public TraitsImpl<std::int32_t> {
// static std::string ToString(std::int32_t const& value) {
// return std::to_string(value);
// }
};
template <>
struct Traits<std::int64_t> : public TraitsImpl<std::int64_t> {
// static std::string ToString(std::int64_t const& value) {
// return std::to_string(value);
// }
};
template <>
struct Traits<std::uint8_t> : public TraitsImpl<std::uint8_t> {
// static std::string ToString(std::uint8_t const& value) {
// return std::to_string(value);
// }
};
template <>
struct Traits<std::uint16_t> : public TraitsImpl<std::uint16_t> {
using nullable_type = Nullable<std::uint16_t>;
// static std::string ToString(std::uint16_t const& value) {
// return std::to_string(value);
// }
};
template <>
struct Traits<std::uint32_t> : public TraitsImpl<std::uint32_t> {
// static std::string ToString(std::uint32_t const& value) {
// return std::to_string(value);
// }
};
template <>
struct Traits<std::uint64_t> : public TraitsImpl<std::uint64_t> {
// static std::string ToString(std::uint64_t const& value) {
// return std::to_string(value);
// }
};
template <>
struct Traits<std::string> : public TraitsImpl<std::string> {
// static std::string ToString(std::string const& value) {
// value;
// }
};
template <typename T, size_t size>
struct Traits<std::array<T, size>> : public TraitsImpl<std::array<T, size>> {
// static std::string ToString(std::array<T, size> const& value) {
// // Decide what to return here
// throw std::logic_error("Function not yet implemented");
// }
};
template <>
struct Traits<bool> : public TraitsImpl<bool> {
// static std::string ToString(bool const& value) {
// // Decide what to return here
// throw std::logic_error("Function not yet implemented");
// }
};
template <typename KeyT, typename T, typename CompareT, typename AllocatorT>
struct Traits<std::map<KeyT, T, CompareT, AllocatorT>> : public TraitsImpl<std::map<KeyT, T, CompareT, AllocatorT>> {
// static std::string ToString(std::map<KeyT, T, CompareT, AllocatorT> const& value) {
// // Decide what to return here
// throw std::logic_error("Function not yet implemented");
// }
};
template <typename T, typename AllocatorT>
struct Traits<std::vector<T, AllocatorT>> : public TraitsImpl<std::vector<T, AllocatorT>> {
// static std::string ToString(std::vector<T, AllocatorT> const& value) {
// // Decide what to return here
// throw std::logic_error("Function not yet implemented");
// }
};
template <typename... Types>
struct Traits<std::function<Types...>> : public TraitsImpl<std::function<Types...>> {
// static std::string ToString(std::function<Types ...> const& value) {
// // Decide what to return here
// throw std::logic_error("Function not yet implemented");
// }
};
template <typename T>
struct Traits<Nullable<T>> : public TraitsImpl<Nullable<T>> {
using nullable_type = Nullable<T>;
// static std::string ToString(nullable_type const& value) {
// if (value) {
// return Traits<T>::ToString(value.get());
// }
// return "NULL";
// }
};
template <typename... Types>
struct Traits<std::tuple<Types...>> : public TraitsImpl<std::tuple<Types...>> {
// static std::string ToString(std::tuple<Types ...> const& value) {
// // Decide what to return here
// throw std::logic_error("Function not yet implemented");
// }
};
} // namespace Traits
} // namespace Featurizer
} // namespace Microsoft

View file

@ -0,0 +1,41 @@
# ----------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License
# ----------------------------------------------------------------------
cmake_minimum_required(VERSION 3.5.0)
project(Featurizer_UnitTests LANGUAGES CXX)
set(CMAKE_MODULE_PATH "$ENV{DEVELOPMENT_ENVIRONMENT_CMAKE_MODULE_PATH}")
if(NOT WIN32)
string(REPLACE ":" ";" CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH}")
string(REPLACE ":" ";" _includes "$ENV{INCLUDE}")
string(REPLACE ":" ";" _libs "$ENV{LIB}")
endif()
set(CppCommon_STATIC_CRT ON CACHE BOOL "" FORCE)
set(BoostCommon_HEADER_ONLY ON CACHE BOOL "" FORCE)
include(CppCommon)
include(BoostCommon)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
enable_testing()
foreach(_test_name IN ITEMS
Featurizer_UnitTest
Traits_UnitTests
)
add_executable(${_test_name} ${_test_name}.cpp)
target_include_directories(${_test_name} PRIVATE ${_includes})
target_link_directories(${_test_name} PRIVATE ${_libs})
target_link_libraries(${_test_name} PRIVATE ${Boost_LIBRARIES})
add_test(NAME ${_test_name} COMMAND ${_test_name} --success)
endforeach()

View file

@ -0,0 +1,119 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#define CATCH_CONFIG_MAIN
#include "gtest/gtest.h"
#include "../Featurizer.h"
class MyTransformer : public Microsoft::Featurizer::Transformer<bool, int> {
public:
// ----------------------------------------------------------------------
// | Public Methods
MyTransformer(bool true_on_odd=false) :
_true_on_odd(true_on_odd) {
}
~MyTransformer(void) override = default;
MyTransformer(MyTransformer const &) = delete;
MyTransformer & operator =(MyTransformer const &) = delete;
MyTransformer(MyTransformer &&) = default;
MyTransformer & operator =(MyTransformer &&) = delete;
return_type transform(arg_type const &arg) const override {
bool const is_odd(arg & 1);
return _true_on_odd ? is_odd : !is_odd;
}
private:
// ----------------------------------------------------------------------
// | Private Data
bool const _true_on_odd;
// ----------------------------------------------------------------------
// | Private Methods
template <typename ArchiveT>
void serialize(ArchiveT &ar, unsigned int const /*version*/) {
ar & boost::serialization::base_object<transformer_type>(*this);
ar & boost::serialization::make_nvp("true_on_odd", const_cast<bool &>(_true_on_odd));
}
};
class MyEstimator : public Microsoft::Featurizer::Estimator<bool, int> {
public:
// ----------------------------------------------------------------------
// | Public Methods
MyEstimator(bool return_invalid_transformer=false) :
_return_invalid_transformer(return_invalid_transformer) {
}
~MyEstimator(void) override = default;
MyEstimator(MyEstimator const &) = delete;
MyEstimator & operator =(MyEstimator const &) = delete;
MyEstimator(MyEstimator &&) = default;
MyEstimator & operator =(MyEstimator &&) = delete;
private:
// ----------------------------------------------------------------------
// | Private Data
bool const _return_invalid_transformer;
bool _true_on_odd_state;
// ----------------------------------------------------------------------
// | Private Methods
MyEstimator & fit_impl(apache_arrow const &data) override {
_true_on_odd_state = static_cast<bool>(data);
return *this;
}
TransformerUniquePtr commit_impl(void) override {
if(_return_invalid_transformer)
return TransformerUniquePtr();
return std::make_unique<MyTransformer>(_true_on_odd_state);
}
template <typename ArchiveT>
void serialize(ArchiveT &ar, unsigned int const /*version*/) {
ar & boost::serialization::base_object<estimator_type>(*this);
ar & boost::serialization::make_nvp("return_invalid_transformer", const_cast<bool &>(_return_invalid_transformer));
ar & boost::serialization::make_nvp("true_on_odd_state", const_cast<bool &>(_true_on_odd_state));
}
};
TEST(FeaturizerTests, TransformerFunctionality) {
ASSERT_TRUE(MyTransformer(true).transform(1) == true);
ASSERT_TRUE(MyTransformer(false).transform(1) == false);
ASSERT_TRUE(MyTransformer(true).transform(2) == false);
ASSERT_TRUE(MyTransformer(false).transform(2) == true);
}
TEST(FeaturizerTests, EstimatorFunctionality) {
ASSERT_TRUE(MyEstimator().fit(1).commit()->transform(1) == true);
ASSERT_TRUE(MyEstimator().fit(0).commit()->transform(1) == false);
ASSERT_TRUE(MyEstimator().fit(1).commit()->transform(2) == false);
ASSERT_TRUE(MyEstimator().fit(0).commit()->transform(2) == true);
}
TEST(FeaturizerTests, EstimatorErrors) {
MyEstimator e;
ASSERT_NE(e.commit(), nullptr);
//CHECK_THROWS_WITH(e.fit(1), Catch::Contains("has already been committed"));
//CHECK_THROWS_WITH(e.commit(), Catch::Contains("has already been committed"));
//CHECK_THROWS_WITH(MyEstimator(true).commit(), Catch::Matches("Invalid result"));
}
TEST(FeaturizerTests, EstimatorFitAndCommit) {
ASSERT_TRUE(Microsoft::Featurizer::fit_and_commit<MyEstimator>(1, false)->transform(1) == true);
ASSERT_TRUE(Microsoft::Featurizer::fit_and_commit<MyEstimator>(0, false)->transform(1) == false);
ASSERT_TRUE(Microsoft::Featurizer::fit_and_commit<MyEstimator>(1, false)->transform(2) == false);
ASSERT_TRUE(Microsoft::Featurizer::fit_and_commit<MyEstimator>(0, false)->transform(2) == true);
}

View file

@ -0,0 +1,40 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#define CATCH_CONFIG_MAIN
#include <type_traits>
#include "gtest/gtest.h"
#include "../Traits.h"
using namespace Microsoft::Featurizer::Traits;
// Floating point values
static_assert(std::is_same<Traits<float>::nullable_type, float>::value, "Incorrect nullable type for float");
static_assert(std::is_same<Traits<double>::nullable_type, double>::value, "Incorrect nullable type for double");
// Int values
static_assert(std::is_same<Traits<std::int8_t>::nullable_type, Nullable<std::int8_t>>::value, "Incorrect nullable type for std::int8_t");
static_assert(std::is_same<Traits<std::int16_t>::nullable_type, Nullable<std::int16_t>>::value, "Incorrect nullable type for std::int16_t");
static_assert(std::is_same<Traits<std::int32_t>::nullable_type, Nullable<std::int32_t>>::value, "Incorrect nullable type for std::int32_t");
static_assert(std::is_same<Traits<std::int64_t>::nullable_type, Nullable<std::int64_t>>::value, "Incorrect nullable type for std::int64_t");
static_assert(std::is_same<Traits<std::uint8_t>::nullable_type, Nullable<std::uint8_t>>::value, "Incorrect nullable type for std::uint8_t");
static_assert(std::is_same<Traits<std::uint16_t>::nullable_type, Nullable<std::uint16_t>>::value, "Incorrect nullable type for std::uint16_t");
static_assert(std::is_same<Traits<std::uint32_t>::nullable_type, Nullable<std::uint32_t>>::value, "Incorrect nullable type for std::uint32_t");
static_assert(std::is_same<Traits<std::uint64_t>::nullable_type, Nullable<std::uint64_t>>::value, "Incorrect nullable type for std::uint64_t");
// Others
static_assert(std::is_same<Traits<std::string>::nullable_type, Nullable<std::string>>::value, "Incorrect nullable type for std::string");
static_assert(std::is_same<Traits<std::array<char, 4>>::nullable_type, Nullable<std::array<char, 4>>>::value, "Incorrect nullable type for std::array");
static_assert(std::is_same<Traits<bool>::nullable_type, Nullable<bool>>::value, "Incorrect nullable type for std::string");
static_assert(std::is_same<Traits<std::map<int,int>>::nullable_type, Nullable<std::map<int,int>>>::value, "Incorrect nullable type for std::string");
static_assert(std::is_same<Traits<std::vector<int>>::nullable_type, Nullable<std::vector<int>>>::value, "Incorrect nullable type for std::string");
static_assert(std::is_same<Traits<std::function<int>>::nullable_type, Nullable<std::function<int>>>::value, "Incorrect nullable type for std::string");
static_assert(std::is_same<Traits<Nullable<int>>::nullable_type, Nullable<int>>::value, "Incorrect nullable type for std::string");
static_assert(std::is_same<Traits<std::tuple<int>>::nullable_type, Nullable<std::tuple<int>>>::value, "Incorrect nullable type for std::string");
// Dummy test so it will compile. Replace this with actual tests.
TEST(TraitsTests, Dummy) {
ASSERT_TRUE(true);
}

View file

@ -0,0 +1,18 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "gtest/gtest.h"
GTEST_API_ int main(int argc, char** argv) {
int status = 0;
testing::InitGoogleTest(&argc, argv);
try {
status = RUN_ALL_TESTS();
} catch (const std::exception& ex) {
std::cerr << ex.what();
status = -1;
}
return status;
}

View file

@ -6,6 +6,10 @@
#include "core/framework/sparse_tensor.h"
#include "core/graph/onnx_protobuf.h"
#ifdef MICROSOFT_AUTOML
#include "automl_ops/automl_types.h"
#endif
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wignored-qualifiers"
@ -285,6 +289,9 @@ class DataTypeRegistry {
DataTypeRegistry() {
RegisterAllProtos([this](MLDataType mltype) { RegisterDataType(mltype); });
#ifdef MICROSOFT_AUTOML
automl::RegisterAutoMLTypes([this](MLDataType mltype) { RegisterDataType(mltype); });
#endif
}
~DataTypeRegistry() = default;

View file

@ -0,0 +1,46 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "core/graph/constants.h"
#include "core/graph/automl_ops/automl_defs.h"
#include "core/graph/op.h"
#include "onnx/defs/schema.h"
#include "onnx/defs/shape_inference.h"
namespace onnxruntime {
namespace automl {
using ONNX_NAMESPACE::AttributeProto;
using ONNX_NAMESPACE::OpSchema;
using ONNX_NAMESPACE::OPTIONAL;
void RegisterAutoMLSchemas() {
static const char* DateTimeTransformer_ver1_doc = R"DOC(
DateTimeTransformer accepts a single scalar int64 tensor, constructs
an instance of std::chrono::system_clock::time_point and passes it as an argument
to Microsoft::DateTimeFeaturizer which is a part of a shared library.
It returns an instance of TimePoint class.
)DOC";
MS_AUTOML_OPERATOR_SCHEMA(DateTimeTransformer)
.SinceVersion(1)
.SetDomain(kMSAutoMLDomain)
.SetDoc(DateTimeTransformer_ver1_doc)
.Input(0, "X",
"The input represents a number of seconds passed since the epoch, suitable to properly construct"
"an instance of std::chrono::system_clock::time_point",
"T1")
.Output(0, "Y", "The output which is a Microsoft::DateTimeFeaturizer::TimePoint structure", "T2")
.TypeConstraint(
"T1",
{"tensor(int64)"},
"Constrain input type to int64 scalar tensor.")
.TypeConstraint(
"T2",
{"opaque(com.microsoft.automl,DateTimeFeaturizer_TimePoint)"},
"Constrain output type to an AutoML specific Microsoft::Featurizers::TimePoint type"
"currently not part of ONNX standard. When it becomes a part of the standard we will adjust this"
"kernel definition and move it to ONNX repo");
}
} // namespace automl
} // namespace onnxruntime

View file

@ -0,0 +1,30 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include "core/graph/onnx_protobuf.h"
namespace onnxruntime {
namespace automl {
#define MS_AUTOML_OPERATOR_SCHEMA(name) \
MS_AUTOML_OPERATOR_SCHEMA_UNIQ_HELPER(__COUNTER__, name)
#define MS_AUTOML_OPERATOR_SCHEMA_UNIQ_HELPER(Counter, name) \
MS_AUTOML_OPERATOR_SCHEMA_UNIQ(Counter, name)
#define MS_AUTOML_OPERATOR_SCHEMA_UNIQ(Counter, name) \
static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce( \
op_schema_register_once##name##Counter) ONNX_UNUSED = \
ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__)
#define MS_AUTOML_OPERATOR_SCHEMA_ELSEWHERE(name, schema_func) \
MS_AUTOML_OPERATOR_SCHEMA_UNIQ_HELPER_ELSEWHERE(__COUNTER__, name, schema_func)
#define MS_AUTOML_OPERATOR_SCHEMA_UNIQ_HELPER_ELSEWHERE(Counter, name, schema_func) \
MS_AUTOML_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func)
#define MS_AUTOML_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) \
static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce( \
op_schema_register_once##name##Counter) ONNX_UNUSED = \
schema_func(ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__))
void RegisterAutoMLSchemas();
} // namespace automl
} // namespace onnxruntime

View file

@ -9,6 +9,10 @@
#include "contrib_ops/cpu_contrib_kernels.h"
#endif
#ifdef MICROSOFT_AUTOML
#include "automl_ops/cpu_automl_kernels.h"
#endif
#include "core/framework/compute_capability.h"
namespace onnxruntime {
@ -696,6 +700,9 @@ static void RegisterCPUKernels(KernelRegistry& kernel_registry) {
#ifndef DISABLE_CONTRIB_OPS
::onnxruntime::contrib::RegisterCpuContribKernels(kernel_registry);
#endif
#ifdef MICROSOFT_AUTOML
::onnxruntime::automl::RegisterCpuAutoMLKernels(kernel_registry);
#endif
}
std::shared_ptr<KernelRegistry> GetCpuKernelRegistry() {

View file

@ -10,6 +10,9 @@
#ifndef DISABLE_CONTRIB_OPS
#include "core/graph/contrib_ops/contrib_defs.h"
#endif
#ifdef MICROSOFT_AUTOML
#include "core/graph/automl_ops/automl_defs.h"
#endif
namespace onnxruntime {
using namespace ::onnxruntime::common;
@ -33,10 +36,14 @@ Status Environment::Initialize() {
std::call_once(schemaRegistrationOnceFlag, []() {
ONNX_NAMESPACE::OpSchemaRegistry::DomainToVersionRange::Instance().AddDomainToVersion(onnxruntime::kMSDomain, 1, 1);
ONNX_NAMESPACE::OpSchemaRegistry::DomainToVersionRange::Instance().AddDomainToVersion(onnxruntime::kMSNchwcDomain, 1, 1);
ONNX_NAMESPACE::OpSchemaRegistry::DomainToVersionRange::Instance().AddDomainToVersion(onnxruntime::kMSAutoMLDomain, 1, 1);
// Register contributed schemas.
// The corresponding kernels are registered inside the appropriate execution provider.
#ifndef DISABLE_CONTRIB_OPS
contrib::RegisterContribSchemas();
#endif
#ifdef MICROSOFT_AUTOML
automl::RegisterAutoMLSchemas();
#endif
RegisterOnnxOperatorSetSchema();
RegisterOnnxMLOperatorSetSchema();

View file

@ -0,0 +1,70 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "gtest/gtest.h"
#include "test/providers/provider_test_utils.h"
#include "core/automl/featurizers/src/FeaturizerPrep/Featurizers/DateTimeFeaturizer.h"
namespace dft = Microsoft::Featurizer::DateTimeFeaturizer;
using SysClock = std::chrono::system_clock;
namespace onnxruntime {
namespace test {
TEST(DateTimeFeaturizer_DateTime, Past_1976_Nov_17__12_27_04) {
const time_t date = 217081624;
OpTester test("DateTimeTransformer", 1, onnxruntime::kMSAutoMLDomain);
// We are adding a scalar Tensor in this instance
test.AddInput<int64_t>("X", {1}, {date});
SysClock::time_point stp = SysClock::from_time_t(date);
dft::TimePoint tp(stp);
ASSERT_TRUE(tp.year == 1976);
ASSERT_TRUE(tp.month == dft::TimePoint::NOVEMBER);
ASSERT_TRUE(tp.day == 17);
ASSERT_TRUE(tp.hour == 12);
ASSERT_TRUE(tp.minute == 27);
ASSERT_TRUE(tp.second == 4);
ASSERT_TRUE(tp.dayOfWeek == dft::TimePoint::WEDNESDAY);
ASSERT_TRUE(tp.dayOfYear == 321);
ASSERT_TRUE(tp.quarterOfYear == 4);
ASSERT_TRUE(tp.weekOfMonth == 2);
// Expected output.
test.AddOutput<dft::TimePoint>("Y", std::move(tp));
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
TEST(DateTimeFeaturizer_Transformer, Past_1976_Nov_17__12_27_05) {
const time_t date = 32445842582;
OpTester test("DateTimeTransformer", 1, onnxruntime::kMSAutoMLDomain);
// We are adding a scalar Tensor in this instance
test.AddInput<int64_t>("X", {1}, {date});
SysClock::time_point stp = SysClock::from_time_t(date);
dft::Transformer dt;
dft::TimePoint tp = dt.transform(stp);
ASSERT_TRUE(tp.year == 2998);
ASSERT_TRUE(tp.month == dft::TimePoint::MARCH);
ASSERT_TRUE(tp.day == 2);
ASSERT_TRUE(tp.hour == 14);
ASSERT_TRUE(tp.minute == 3);
ASSERT_TRUE(tp.second == 2);
ASSERT_TRUE(tp.dayOfWeek == dft::TimePoint::FRIDAY);
ASSERT_TRUE(tp.dayOfYear == 60);
ASSERT_TRUE(tp.quarterOfYear == 1);
ASSERT_TRUE(tp.weekOfMonth == 0);
// Expected output.
test.AddOutput<dft::TimePoint>("Y", std::move(tp));
test.Run(OpTester::ExpectResult::kExpectSuccess);
}
} // namespace test
} // namespace onnxruntime

View file

@ -14,6 +14,11 @@
#include "core/session/inference_session.h"
#include "test/util/include/default_providers.h"
#ifdef MICROSOFT_AUTOML
#include "automl_ops/automl_featurizers.h"
namespace dtf = Microsoft::Featurizer::DateTimeFeaturizer;
#endif
using namespace ::onnxruntime::logging;
namespace onnxruntime {
@ -133,6 +138,30 @@ void Check<MLFloat16>(const OpTester::Data& expected_data, const Tensor& output_
}
}
template <>
void Check<BFloat16>(const OpTester::Data& expected_data, const Tensor& output_tensor, const std::string& provider_type) {
auto& expected_tensor = expected_data.data_.Get<Tensor>();
auto* expected = expected_tensor.template Data<BFloat16>();
auto* output = output_tensor.template Data<BFloat16>();
auto size = output_tensor.Shape().Size();
std::vector<float> f_expected(size);
std::vector<float> f_output(size);
BFloat16ToFloat(expected, f_expected.data(), static_cast<size_t>(size));
BFloat16ToFloat(output, f_output.data(), static_cast<size_t>(size));
/// XXX: May need to adjust threshold as BFloat is coarse
float threshold = 0.001f;
for (int i = 0; i < size; ++i) {
if (std::isinf(f_expected[i])) // Test infinity for equality
EXPECT_EQ(f_expected[i], f_output[i]);
else {
// the default for existing tests
EXPECT_NEAR(f_expected[i], f_output[i], threshold) << "provider_type: " << provider_type;
}
}
}
template <typename Type>
void CheckDispatch(MLDataType type, const OpTester::Data& expected_data, const Tensor& output_tensor, const std::string& provider_type) {
if (type == DataTypeImpl::GetType<Type>())
@ -184,8 +213,13 @@ void CheckDispatch(MLDataType type, const OpTester::Data& expected_data, OrtValu
}
void Check(const OpTester::Data& expected_data, OrtValue& ort_value, const std::string& provider_type) {
#ifdef MICROSOFT_AUTOML
CheckDispatch<dtf::TimePoint,VectorMapStringToFloat, VectorMapInt64ToFloat>(expected_data.data_.Type(), expected_data, ort_value,
provider_type);
#else
CheckDispatch<VectorMapStringToFloat, VectorMapInt64ToFloat>(expected_data.data_.Type(), expected_data, ort_value,
provider_type);
#endif
}
void DebugTrap() {

View file

@ -176,6 +176,30 @@ class OpTester {
AddData(input_data_, name, dims, values.data(), values.size(), is_initializer);
}
// Add other registered types, possibly experimental
template <typename T>
void AddInput(const char* name, const T& val) {
auto mltype = DataTypeImpl::GetType<T>();
ORT_ENFORCE(mltype != nullptr, "T must be a registered cpp type");
auto ptr = std::make_unique<T>(val);
OrtValue value;
value.Init(ptr.get(), mltype, mltype->GetDeleteFunc());
ptr.release();
input_data_.push_back({{name, mltype->GetTypeProto()}, value, optional<float>(), optional<float>()});
}
template <typename T>
void AddInput(const char* name, T&& val) {
auto mltype = DataTypeImpl::GetType<T>();
ORT_ENFORCE(mltype != nullptr, "T must be a registered cpp type");
auto ptr = std::make_unique<T>(std::move(val));
OrtValue value;
value.Init(ptr.get(), mltype, mltype->GetDeleteFunc());
ptr.release();
input_data_.push_back({{name, mltype->GetTypeProto()}, value, optional<float>(), optional<float>()});
}
template <typename TKey, typename TVal>
void AddInput(const char* name, const std::map<TKey, TVal>& val) {
std::unique_ptr<std::map<TKey, TVal>> ptr = std::make_unique<std::map<TKey, TVal>>(val);
@ -208,6 +232,29 @@ class OpTester {
output_data_.push_back({{name, &s_type_proto<T>}, {}, optional<float>(), optional<float>()});
}
// Add other registered types, possibly experimental
template <typename T>
void AddOutput(const char* name, const T& val) {
auto mltype = DataTypeImpl::GetType<T>();
ORT_ENFORCE(mltype != nullptr, "T must be a registered cpp type");
auto ptr = std::make_unique<T>(val);
OrtValue value;
value.Init(ptr.get(), mltype, mltype->GetDeleteFunc());
ptr.release();
output_data_.push_back({{name, mltype->GetTypeProto()}, value, optional<float>(), optional<float>()});
}
template <typename T>
void AddOutput(const char* name, T&& val) {
auto mltype = DataTypeImpl::GetType<T>();
ORT_ENFORCE(mltype != nullptr, "T must be a registered cpp type");
auto ptr = std::make_unique<T>(std::move(val));
OrtValue value;
value.Init(ptr.get(), mltype, mltype->GetDeleteFunc());
ptr.release();
output_data_.push_back({{name, mltype->GetTypeProto()}, value, optional<float>(), optional<float>()});
}
// Add non tensor output
template <typename TKey, typename TVal>
void AddOutput(const char* name, const std::vector<std::map<TKey, TVal>>& val) {

View file

@ -127,6 +127,7 @@ Use the individual flags to only run the specified stages.
parser.add_argument("--use_openblas", action='store_true', help="Build with OpenBLAS.")
parser.add_argument("--use_mkldnn", action='store_true', help="Build with MKLDNN.")
parser.add_argument("--use_mklml", action='store_true', help="Build with MKLML.")
parser.add_argument("--use_automl", action='store_true', help="Build with AutoML support.")
parser.add_argument("--use_ngraph", action='store_true', help="Build with nGraph.")
parser.add_argument("--use_openvino", nargs="?", const="CPU_FP32",
choices=["CPU_FP32","GPU_FP32","GPU_FP16","VAD-M_FP16","MYRIAD_FP16"], help="Build with OpenVINO for specific hardware.")
@ -323,6 +324,7 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home
"-Donnxruntime_USE_CUDA=" + ("ON" if args.use_cuda else "OFF"),
"-Donnxruntime_USE_NSYNC=" + ("OFF" if is_windows() or not args.use_nsync else "ON"),
"-Donnxruntime_CUDNN_HOME=" + (cudnn_home if args.use_cuda else ""),
"-Donnxruntime_USE_AUTOML=" + ("ON" if args.use_automl else "OFF"),
"-Donnxruntime_CUDA_HOME=" + (cuda_home if args.use_cuda else ""),
"-Donnxruntime_USE_JEMALLOC=" + ("ON" if args.use_jemalloc else "OFF"),
"-Donnxruntime_ENABLE_PYTHON=" + ("ON" if args.enable_pybind else "OFF"),