Add optional dft_length input to the DFT and IDFT operators. (#11427)

* Add optional dft_length input.

* CR Feedback

Co-authored-by: Sheil Kumar <sheilk@microsoft.com>
This commit is contained in:
Sheil Kumar 2022-05-03 16:17:43 -07:00 committed by GitHub
parent ae043e3963
commit 85fa168dc1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 188 additions and 125 deletions

View file

@ -43,6 +43,26 @@ ONNX_OPERATOR_KERNEL_EX(
.TypeConstraint("T2", BuildKernelDefConstraints<int64_t>()),
STFT);
// dedupe with the other one in window_functions.cc
template <typename T>
static T get_scalar_value_from_tensor(const Tensor* tensor) {
ORT_ENFORCE(tensor->Shape().Size() == 1, "ratio input should have a single value.");
auto data_type = tensor->DataType()->AsPrimitiveDataType()->GetDataType();
switch (data_type) {
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
return static_cast<T>(*reinterpret_cast<const float*>(tensor->DataRaw()));
case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE:
return static_cast<T>(*reinterpret_cast<const double*>(tensor->DataRaw()));
case ONNX_NAMESPACE::TensorProto_DataType_INT32:
return static_cast<T>(*reinterpret_cast<const int32_t*>(tensor->DataRaw()));
case ONNX_NAMESPACE::TensorProto_DataType_INT64:
return static_cast<T>(*reinterpret_cast<const int64_t*>(tensor->DataRaw()));
default:
ORT_THROW("Unsupported input data type of ", data_type);
}
}
static bool is_real_valued_signal(const onnxruntime::TensorShape & shape) {
return shape.NumDimensions() == 2 || shape[shape.NumDimensions() - 1] == 1;
}
@ -141,7 +161,7 @@ static T compute_angular_velocity(size_t number_of_samples, bool inverse) {
template <typename T, typename U>
static Status fft_radix2(OpKernelContext* /*ctx*/,
const Tensor* X, Tensor* Y,
size_t X_offset, size_t X_stride, size_t Y_offset, size_t Y_stride, int64_t axis,
size_t X_offset, size_t X_stride, size_t Y_offset, size_t Y_stride, int64_t axis, size_t dft_length,
const Tensor* window, bool is_onesided, bool inverse,
std::vector<std::complex<T>>& V,
std::vector<std::complex<T>>& temp_output) {
@ -149,7 +169,7 @@ static Status fft_radix2(OpKernelContext* /*ctx*/,
// Get shape and significant bits
const auto& X_shape = X->Shape();
size_t number_of_samples = static_cast<size_t>(X_shape[axis]);
unsigned significant_bits = static_cast<unsigned>(log2(number_of_samples));
unsigned significant_bits = static_cast<unsigned>(log2(dft_length));
// Get data
auto* X_data = const_cast<U*>(reinterpret_cast<const U*>(X->DataRaw())) + X_offset;
@ -162,8 +182,8 @@ static Status fft_radix2(OpKernelContext* /*ctx*/,
size_t Y_data_stride = 1;
std::complex<T>* Y_data;
if (is_onesided) {
if (temp_output.size() != number_of_samples) {
temp_output = std::vector<std::complex<T>>(number_of_samples);
if (temp_output.size() != dft_length) {
temp_output = std::vector<std::complex<T>>(dft_length);
}
Y_data = temp_output.data();
} else {
@ -171,34 +191,34 @@ static Status fft_radix2(OpKernelContext* /*ctx*/,
Y_data_stride = Y_stride;
}
auto angular_velocity = compute_angular_velocity<T>(number_of_samples, inverse);
auto angular_velocity = compute_angular_velocity<T>(dft_length, inverse);
// Create vandermonde matrix V ordered with the bit-reversed permutation
if (V.size() != number_of_samples) {
V = std::vector<std::complex<T>>(number_of_samples); // e^(i *2*pi / N * k)
for (size_t i = 0; i < number_of_samples; i++) {
if (V.size() != dft_length) {
V = std::vector<std::complex<T>>(dft_length); // e^(i *2*pi / N * k)
for (size_t i = 0; i < dft_length; i++) {
size_t bit_reversed_index = bit_reverse(i, significant_bits);
V[bit_reversed_index] = std::complex<T>(cos(i * angular_velocity), sin(i * angular_velocity));
}
}
for (size_t i = 0; i < number_of_samples; i++) {
for (size_t i = 0; i < dft_length; i++) {
size_t bit_reversed_index = bit_reverse(i, significant_bits);
auto x = *(X_data + bit_reversed_index*X_stride);
auto window_element = window_data ? *(window_data + bit_reversed_index) : 1;
auto x = (bit_reversed_index < number_of_samples) ? * (X_data + bit_reversed_index * X_stride) : 0;
auto window_element = window_data ? *(window_data + bit_reversed_index) : 1;
*(Y_data + i*Y_data_stride) = std::complex<T>(1, 0) * x * window_element;
}
// Run fft_radix2
unsigned current_significant_bits = 0;
for (size_t i = 2; i <= number_of_samples; i <<= 1) {
for (size_t i = 2; i <= dft_length; i <<= 1) {
size_t midpoint = i >> 1;
current_significant_bits++;
for (size_t k = 0; k < midpoint; k++) {
auto first_idx = bit_reverse(k, current_significant_bits);
auto second_idx = bit_reverse(midpoint + k, current_significant_bits);
for (size_t j = 0; j < number_of_samples; j += i) {
for (size_t j = 0; j < dft_length; j += i) {
auto even_index = k + j;
auto odd_index = k + j + midpoint;
std::complex<T>* even = (Y_data + even_index * Y_data_stride);
@ -213,15 +233,15 @@ static Status fft_radix2(OpKernelContext* /*ctx*/,
// Scale the output if inverse
if (inverse) {
for (size_t i = 0; i < number_of_samples; i++) {
for (size_t i = 0; i < dft_length; i++) {
std::complex<T>& val = *(Y_data + i * Y_data_stride);
val /= static_cast<T>(number_of_samples);
val /= static_cast<T>(dft_length);
}
}
if (is_onesided) {
auto destination = reinterpret_cast<std::complex<T>*>(Y->MutableDataRaw()) + Y_offset;
for (size_t i = 0; i < number_of_samples; i++) {
for (size_t i = 0; i < dft_length; i++) {
*(destination + Y_stride * i) = *(Y_data + i * Y_data_stride);
}
}
@ -232,7 +252,7 @@ static Status fft_radix2(OpKernelContext* /*ctx*/,
template <typename T, typename U>
static Status dft_naive(const Tensor* X, Tensor* Y,
size_t X_offset, size_t X_stride, size_t Y_offset, size_t Y_stride, int64_t axis,
const Tensor* window, bool inverse) {
size_t dft_length, const Tensor* window, bool inverse) {
// Get shape and significant bits
const auto& X_shape = X->Shape();
size_t number_of_samples = static_cast<size_t>(X_shape[axis]);
@ -242,28 +262,29 @@ static Status dft_naive(const Tensor* X, Tensor* Y,
// Get data
auto* X_data = const_cast<U*>(reinterpret_cast<const U*>(X->DataRaw())) + X_offset;
auto* Y_data = reinterpret_cast<std::complex<T>*>(Y->MutableDataRaw()) + Y_offset;
U* window_data = nullptr;
if (window) {
window_data = const_cast<U*>(reinterpret_cast<const U*>(window->DataRaw()));
}
auto angular_velocity = compute_angular_velocity<T>(number_of_samples, inverse);
auto angular_velocity = compute_angular_velocity<T>(dft_length, inverse);
for (size_t i = 0; i < dft_output_size; i++) {
std::complex<T>& out = *(Y_data + i*Y_stride);
out.real(0);
out.imag(0);
for (size_t j = 0; j < number_of_samples; j++) { // vectorize over this loop
for (size_t j = 0; j < dft_length; j++) { // vectorize over this loop
auto exponential = std::complex<T>(cos(i * j * angular_velocity), sin(i * j * angular_velocity));
auto window_element = window_data ? * (window_data + j) : 1;
auto element = *(X_data + j*X_stride) * window_element;
auto x = (j < number_of_samples) ? *(X_data + j * X_stride) : 0;
auto element = x * window_element;
out += exponential * element;
}
if (inverse) {
out /= static_cast<T>(number_of_samples);
out /= static_cast<T>(dft_length);
}
}
@ -271,31 +292,28 @@ static Status dft_naive(const Tensor* X, Tensor* Y,
}
template <typename T, typename U>
static Status discrete_fourier_transform(OpKernelContext* ctx, const Tensor* X, Tensor* Y, int64_t axis, const Tensor* window, bool is_onesided, bool inverse,
static Status discrete_fourier_transform(OpKernelContext* ctx, const Tensor* X, Tensor* Y, int64_t axis, int64_t dft_length, const Tensor* window, bool is_onesided, bool inverse,
std::vector<std::complex<T>>& V, std::vector<std::complex<T>>& temp_output) {
// Get shape
const auto& X_shape = X->Shape();
const auto& Y_shape = Y->Shape();
size_t number_of_samples = static_cast<size_t>(X_shape[axis]);
auto batch_and_signal_rank = X->Shape().NumDimensions();
auto total_dfts = static_cast<size_t>(X->Shape().Size() / X->Shape()[axis]);
auto is_input_real = X->Shape().NumDimensions() == 2 || X->Shape()[X->Shape().NumDimensions() - 1] == 1;
auto compex_input_factor = is_input_real ? 1 : 2;
auto complex_input_factor = is_input_real ? 1 : 2;
if (X->Shape().NumDimensions() > 2)
{
total_dfts /= X->Shape()[X->Shape().NumDimensions() - 1];
batch_and_signal_rank -= 1;
}
// Calculate x/y offsets/strides
for (size_t i = 0; i < total_dfts; i++)
{
size_t X_offset = 0;
size_t X_stride = X_shape.SizeFromDimension(axis+1) / compex_input_factor;
size_t X_stride = X_shape.SizeFromDimension(axis+1) / complex_input_factor;
size_t cumulative_packed_stride = total_dfts;
size_t temp = i;
for (size_t r = 0; r < batch_and_signal_rank; r++) {
@ -306,7 +324,7 @@ static Status discrete_fourier_transform(OpKernelContext* ctx, const Tensor* X,
cumulative_packed_stride /= X_shape[r];
auto index = temp / cumulative_packed_stride;
temp -= (index * cumulative_packed_stride);
X_offset += index * X_shape.SizeFromDimension(r + 1) / compex_input_factor;
X_offset += index * X_shape.SizeFromDimension(r + 1) / complex_input_factor;
}
size_t Y_offset = 0;
@ -324,12 +342,12 @@ static Status discrete_fourier_transform(OpKernelContext* ctx, const Tensor* X,
Y_offset += index * Y_shape.SizeFromDimension(r + 1) / 2;
}
if (is_power_of_2(number_of_samples)) {
ORT_RETURN_IF_ERROR((fft_radix2<T, U>(ctx, X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, window, is_onesided, inverse, V, temp_output)));
if (is_power_of_2(dft_length)) {
ORT_RETURN_IF_ERROR((fft_radix2<T, U>(ctx, X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, dft_length, window, is_onesided, inverse, V, temp_output)));
} else {
ORT_RETURN_IF_ERROR((dft_naive<T, U>(X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, window, inverse)));
ORT_RETURN_IF_ERROR((dft_naive<T, U>(X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, dft_length, window, inverse)));
}
}
}
return Status::OK();
}
@ -337,13 +355,20 @@ static Status discrete_fourier_transform(OpKernelContext* ctx, const Tensor* X,
static Status discrete_fourier_transform(OpKernelContext* ctx, int64_t axis, bool is_onesided, bool inverse) {
// Get input shape
const auto* X = ctx->Input<Tensor>(0);
const auto* dft_length = ctx->Input<Tensor>(1);
const auto& X_shape = X->Shape();
const auto is_real_valued = is_real_valued_signal(X_shape);
const auto is_complex_valued = is_complex_valued_signal(X_shape);
int64_t number_of_samples = static_cast<int64_t>(X_shape[axis]);
if (dft_length) {
const auto& dft_length_shape = dft_length->Shape();
ORT_RETURN_IF(!dft_length_shape.IsScalar(), "dft_length must be a scalar value.");
number_of_samples = static_cast<int>(get_scalar_value_from_tensor<int64_t>(dft_length));
}
// Get the DFT output size. Onesided will return only the unique values!
// note: x >> 1 === std::floor(x / 2.f)
int64_t number_of_samples = static_cast<int64_t>(X_shape[axis]);
auto dft_output_size = is_onesided ?
((number_of_samples >> 1) + 1) :
number_of_samples;
@ -353,7 +378,7 @@ static Status discrete_fourier_transform(OpKernelContext* ctx, int64_t axis, boo
if (X_shape.NumDimensions() == 2)
{
Y_shape = onnxruntime::TensorShape({X_shape[0], dft_output_size, 2});
} else
} else
{
Y_shape[Y_shape.NumDimensions() - 1] = 2;
}
@ -368,9 +393,9 @@ static Status discrete_fourier_transform(OpKernelContext* ctx, int64_t axis, boo
std::vector<std::complex<float>> V;
std::vector<std::complex<float>> temp_output;
if (is_real_valued) {
ORT_RETURN_IF_ERROR((discrete_fourier_transform<float, float>(ctx, X, Y, axis, nullptr, is_onesided, inverse, V, temp_output)));
ORT_RETURN_IF_ERROR((discrete_fourier_transform<float, float>(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output)));
} else if (is_complex_valued) {
ORT_RETURN_IF_ERROR((discrete_fourier_transform<float, std::complex<float>>(ctx, X, Y, axis, nullptr, is_onesided, inverse, V, temp_output)));
ORT_RETURN_IF_ERROR((discrete_fourier_transform<float, std::complex<float>>(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output)));
} else {
ORT_THROW("Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for complex inputs.", data_type);
}
@ -378,9 +403,9 @@ static Status discrete_fourier_transform(OpKernelContext* ctx, int64_t axis, boo
std::vector<std::complex<double>> V;
std::vector<std::complex<double>> temp_output;
if (is_real_valued) {
ORT_RETURN_IF_ERROR((discrete_fourier_transform<double, double>(ctx, X, Y, axis, nullptr, is_onesided, inverse, V, temp_output)));
ORT_RETURN_IF_ERROR((discrete_fourier_transform<double, double>(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output)));
} else if (is_complex_valued) {
ORT_RETURN_IF_ERROR((discrete_fourier_transform<double, std::complex<double>>(ctx, X, Y, axis, nullptr, is_onesided, inverse, V, temp_output)));
ORT_RETURN_IF_ERROR((discrete_fourier_transform<double, std::complex<double>>(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output)));
} else {
ORT_THROW("Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for complex inputs.", data_type);
}
@ -401,26 +426,6 @@ Status IDFT::Compute(OpKernelContext* ctx) const {
return Status::OK();
}
// dedupe with the other one in window_functions.cc
template <typename T>
static T get_scalar_value_from_tensor(const Tensor* tensor) {
ORT_ENFORCE(tensor->Shape().Size() == 1, "ratio input should have a single value.");
auto data_type = tensor->DataType()->AsPrimitiveDataType()->GetDataType();
switch (data_type) {
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
return static_cast<T>(*reinterpret_cast<const float*>(tensor->DataRaw()));
case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE:
return static_cast<T>(*reinterpret_cast<const double*>(tensor->DataRaw()));
case ONNX_NAMESPACE::TensorProto_DataType_INT32:
return static_cast<T>(*reinterpret_cast<const int32_t*>(tensor->DataRaw()));
case ONNX_NAMESPACE::TensorProto_DataType_INT64:
return static_cast<T>(*reinterpret_cast<const int64_t*>(tensor->DataRaw()));
default:
ORT_THROW("Unsupported input data type of ", data_type);
}
}
template <typename T, typename U>
static Status short_time_fourier_transform(OpKernelContext* ctx, bool is_onesided, bool /*inverse*/) {
// Attr("onesided"): default = 1
@ -429,7 +434,7 @@ static Status short_time_fourier_transform(OpKernelContext* ctx, bool is_oneside
// Input(2, "window") type = T1, optional
// Input(3, "frame_step") type = T2
// Output(0, "output") type = T1
// Get signal
const auto* signal = ctx->Input<Tensor>(0);
const auto frame_step = get_scalar_value_from_tensor<int64_t>(ctx->Input<Tensor>(1));
@ -445,8 +450,8 @@ static Status short_time_fourier_transform(OpKernelContext* ctx, bool is_oneside
ORT_ENFORCE(signal_components == 1 || signal_components == 2, "Ensure that the signal has either 1 or 2 components.");
// Get the frame length
int64_t frame_length = std::numeric_limits<int64_t>::min();
if (frame_length_tensor)
int64_t frame_length = std::numeric_limits<int64_t>::min();
if (frame_length_tensor)
{
frame_length = get_scalar_value_from_tensor<int64_t>(frame_length_tensor);
}
@ -524,7 +529,7 @@ static Status short_time_fourier_transform(OpKernelContext* ctx, bool is_oneside
0);
// Run individual dft
ORT_RETURN_IF_ERROR((discrete_fourier_transform<T, U>(ctx, &input, &output, 1, window, is_onesided, false, V, temp_output)));
ORT_RETURN_IF_ERROR((discrete_fourier_transform<T, U>(ctx, &input, &output, 1, window_size, window, is_onesided, false, V, temp_output)));
}
}
@ -575,4 +580,4 @@ Status STFT::Compute(OpKernelContext* ctx) const {
} // namespace contrib
} // namespace onnxruntime
#endif
#endif

View file

@ -78,49 +78,62 @@ void RegisterSignalSchemas() {
static_cast<int64_t>(0))
.Input(0,
"input",
"For real input, the following shape is expected: [batch_idx][n_fft]."
"For complex input, the following shape is expected: [batch_idx][n_fft][2]."
"The final dimension represents the real and imaginary parts of the value."
"For real multi-dimensional input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][1]."
"For complex multi-dimensional input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]."
"The first dimension is the batch dimension.",
"T")
"The first dimension is the batch dimension."
"The final dimension represents the real and imaginary parts of the value.",
"T1")
.Input(1,
"dft_length",
"The length of the signal."
"If greater than the axis dimension, the signal will be zero-padded up to dft_length. "
"If less than the axis dimension, only the first dft_length values will be used as the signal. "
"It's an optional value. ",
"T2",
OpSchema::Optional,
true,
1,
OpSchema::NonDifferentiable)
.Output(0,
"output",
"The Fourier Transform of the input vector."
"If signal_dimN = 1, and onesided is 0, [batch_idx][n_fft][2]"
"If signal_dimN = 1, and onesided is 1, [batch_idx][floor(n_fft/2)+1][2]"
"If signal_dimN = 2, and onesided is 0 and axis = 0, [batch_idx][signal_dim1][signal_dim2][2]"
"If signal_dimN = 2, and onesided is 0 and axis = 1, [batch_idx][signal_dim1][signal_dim2][2]"
"If signal_dimN = 2, and onesided is 1 and axis = 0, [batch_idx][floor(signal_dim1/2)+1][signal_dim2][2]"
"If signal_dimN = 2, and onesided is 1 and axis = 1, [batch_idx][signal_dim1][floor(signal_dim2/2)+1][2]",
"T")
"The signal_dim at the specified axis is equal to the dft_length."
"If onesided is 0, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]."
"If axis=0 and onesided is 1, the following shape is expected: [batch_idx][floor(signal_dim1/2)+1][signal_dim2]...[signal_dimN][2]."
"If axis=1 and onesided is 1, the following shape is expected: [batch_idx][signal_dim1][floor(signal_dim2/2)+1]...[signal_dimN][2]."
"If axis=N-1 and onesided is 1, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[floor(signal_dimN/2)+1][2].",
"T1")
.TypeConstraint(
"T",
"T1",
{"tensor(float16)", "tensor(float)", "tensor(double)", "tensor(bfloat16)"},
"Constrain input and output types to float tensors.")
.TypeConstraint(
"T2",
{"tensor(int64)"},
"Constrain scalar length types to int64_t.")
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
propagateElemTypeFromInputToOutput(ctx, 0, 0);
const int64_t batch_ndim = 1;
auto& input_shape = getInputShape(ctx, 0);
auto dim_size = static_cast<int64_t>(input_shape.dim_size());
auto has_component_dimension = dim_size > 2;
auto has_component_dimension = dim_size > 2;
ONNX_NAMESPACE::TensorShapeProto result_shape_proto = input_shape;
bool axis = static_cast<bool>(getAttribute(ctx, "axis", 0));
bool is_onesided = static_cast<bool>(getAttribute(ctx, "onesided", 0));
if (is_onesided) {
// Since signal_ndim = 1, and multidimensional DFT is not supported,
// only the single signal dim (1) needs to be updated
auto n_fft = input_shape.dim(1).dim_value();
result_shape_proto.mutable_dim(1)->set_dim_value((n_fft >> 1) + 1);
auto n_fft = input_shape.dim(1 + axis).dim_value();
result_shape_proto.mutable_dim(1 + axis)->set_dim_value((n_fft >> 1) + 1);
}
if (has_component_dimension) {
result_shape_proto.mutable_dim(static_cast<int>(dim_size - 1))->set_dim_value(2);
result_shape_proto.mutable_dim(static_cast<int>(dim_size - 1))->set_dim_value(2);
} else {
result_shape_proto.add_dim()->set_dim_value(2);
result_shape_proto.add_dim()->set_dim_value(2);
}
updateOutputShape(ctx, 0, result_shape_proto);
@ -137,42 +150,54 @@ void RegisterSignalSchemas() {
static_cast<int64_t>(0))
.Input(0,
"input",
"For real input, the following shape is expected: [batch_idx][n_fft]."
"For complex input, the following shape is expected: [batch_idx][n_fft][2]."
"The final dimension represents the real and imaginary parts of the value."
"For real multi-dimensional input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][1]."
"For complex multi-dimensional input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]."
"The first dimension is the batch dimension.",
"T")
"The first dimension is the batch dimension."
"The final dimension represents the real and imaginary parts of the value.",
"T1")
.Input(1,
"dft_length",
"The length of the signal."
"If greater than the axis dimension, the signal will be zero-padded up to dft_length. "
"If less than the axis dimension, only the first dft_length values will be used as the signal. "
"It's an optional value. ",
"T2",
OpSchema::Optional,
true,
1,
OpSchema::NonDifferentiable)
.Output(0,
"output",
"The inverse discrete Fourier transform of the input. "
"If signal_dimN = 1, [batch_idx][n_fft][2]"
"If signal_dimN = 2 and axis = 0, [batch_idx][signal_dim1][signal_dim2][2]"
"If signal_dimN = 2 and axis = 1, [batch_idx][signal_dim1][signal_dim2][2]"
"The signal_dim at the specified axis is equal to the dft_length."
"The expected shape is [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]"
"For all types of input, the last dimension of the output represents the components of a complex number.",
"T",
"T1",
OpSchema::Single,
true,
1,
OpSchema::NonDifferentiable)
.TypeConstraint(
"T",
"T1",
{"tensor(float16)", "tensor(float)", "tensor(double)", "tensor(bfloat16)"},
"Constrain input and output types to float tensors.")
.TypeConstraint(
"T2",
{"tensor(int64)"},
"Constrain scalar length types to int64_t.")
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
propagateElemTypeFromInputToOutput(ctx, 0, 0);
const int64_t batch_ndim = 1;
auto& input_shape = getInputShape(ctx, 0);
ONNX_NAMESPACE::TensorShapeProto result_shape = input_shape;
auto dim_size = static_cast<int64_t>(input_shape.dim_size());
auto has_component_dimension = dim_size > 2;
auto has_component_dimension = dim_size > 2;
if (has_component_dimension) {
result_shape.mutable_dim(static_cast<int>(dim_size - 1))->set_dim_value(2);
} else {
result_shape.add_dim()->set_dim_value(2);
result_shape.add_dim()->set_dim_value(2);
}
updateOutputShape(ctx, 0, result_shape);
@ -450,4 +475,4 @@ void RegisterSignalSchemas() {
} // namespace audio
} // namespace onnxruntime
#endif
#endif

View file

@ -309,7 +309,7 @@ static void EvaluateSessionAndCloseModel() {
WINML_EXPECT_NO_THROW(::EvaluateSessionAndCloseModelHelper(LearningModelDeviceKind::Cpu, false));
}
static void NamedDimensionOverride()
static void NamedDimensionOverride()
{
LearningModel model = nullptr;
WINML_EXPECT_NO_THROW(APITest::LoadModel(L"fns-candy.onnx", model));
@ -323,7 +323,7 @@ static void NamedDimensionOverride()
LearningModelSessionOptions options;
options.OverrideNamedDimension(L"None", n);
// Verifies that if a Dim name doesn't exist the named dimension override does not interfere with successful evaluation
// The override is still expected to be present in the internal onnxruntime override data
options.OverrideNamedDimension(L"DimNameThatDoesntExist", n);
@ -441,8 +441,8 @@ static void WindowFunction(const wchar_t* window_operator_name, TensorKind kind)
if (kind == TensorKind::Double) {
window_operator.SetAttribute(L"output_datatype", double_data_type);
}
auto model =
auto model =
LearningModelBuilder::Create(13)
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::Int64, scalar_shape))
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", kind, output_shape))
@ -502,7 +502,7 @@ static void DiscreteFourierTransform_2D() {
IRandomAccessStream stream = imagefile.OpenAsync(FileAccessMode::Read).get();
SoftwareBitmap softwareBitmap = (BitmapDecoder::CreateAsync(stream).get()).GetSoftwareBitmapAsync().get();
VideoFrame frame = VideoFrame::CreateWithSoftwareBitmap(softwareBitmap);
auto corrected_image =
winrt::Windows::Media::VideoFrame(
winrt::Windows::Graphics::Imaging::BitmapPixelFormat::Bgra8,
@ -516,7 +516,7 @@ static void DiscreteFourierTransform_2D() {
std::vector<int64_t> shape = {1, 1, height, width};
std::vector<int64_t> output_shape = {1, 1, height, width};
auto builder =
LearningModelBuilder::Create(13)
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input.Signal", TensorKind::Float, shape))
@ -575,7 +575,7 @@ static void DiscreteFourierTransform_2D() {
.SetOutput(L"C", L"Output.Error"));
auto model = builder.CreateModel();
LearningModelSession session(model);
LearningModelBinding binding(session);
@ -610,8 +610,9 @@ static void DiscreteFourierTransform_2D() {
static void DiscreteFourierTransform(
const std::vector<std::complex<float>>& input,
const std::vector<int64_t>& shape,
const std::vector<std::complex<float>>& expected_output,
const std::vector<std::complex<float>>& expected_output,
size_t axis,
size_t dft_length,
bool is_onesided = false) {
auto axis_dim = axis + 1;
printf("\nDiscrete Fourier Transform [axis=%d, is_onesided=%s]\n", static_cast<int>(axis_dim), is_onesided ? "true" : "false");
@ -627,18 +628,20 @@ static void DiscreteFourierTransform(
output_shape[output_shape.size() - 1] = 2;
}
output_shape[axis_dim] = is_onesided ? (1 + (shape[axis_dim] >> 1)) : shape[axis_dim];
auto model =
LearningModelBuilder::Create(13)
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input.Signal", TensorKind::Float, shape))
.Inputs().AddConstant(L"Input.DFTLength", TensorInt64Bit::CreateFromArray({}, {INT64(dft_length)}))
.Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output.Spectra", TensorKind::Float, output_shape))
.Operators().Add(Operator(L"DFT", MS_EXPERIMENTAL_DOMAIN)
.SetInput(L"input", L"Input.Signal")
.SetInput(L"dft_length", L"Input.DFTLength")
.SetAttribute(L"axis", TensorInt64Bit::CreateFromArray({}, {INT64(axis)}))
.SetAttribute(L"onesided", TensorInt64Bit::CreateFromArray({}, {is_onesided}))
.SetOutput(L"output", L"Output.Spectra"))
.CreateModel();
LearningModelSession session(model);
LearningModelBinding binding(session);
@ -663,7 +666,7 @@ static void DiscreteFourierTransform(
constexpr float error_threshold = .001f;
WINML_EXPECT_TRUE(abs(y_ivv.GetAt(i) - expected_output[i / 2].real()) < error_threshold);
WINML_EXPECT_TRUE(abs(y_ivv.GetAt(i + 1) - expected_output[i / 2].imag()) < error_threshold);
}
}
printf("\n");
}
#endif
@ -726,7 +729,7 @@ static void STFT(size_t batch_size, size_t signal_size, size_t dft_size,
2
};
auto dft_length = TensorInt64Bit::CreateFromArray({}, {INT64(dft_size)});
auto model =
LearningModelBuilder::Create(13)
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input.TimeSignal", TensorKind::Float, input_shape))
@ -879,7 +882,7 @@ static void MelSpectrogramOnThreeToneSignal(
// Bind input
auto signal = MakeThreeTones<float>(signal_size, sampling_rate);
binding.Bind(L"Input.TimeSignal", TensorFloat::CreateFromArray(signal_shape, signal));
// Bind output
auto output_image =
winrt::Windows::Media::VideoFrame(
@ -918,8 +921,8 @@ static void ModelBuilding_StandardDeviationNormalization() {
int64_t height = 256;
int64_t width = 256;
int64_t channels = 3;
std::vector<int64_t> input_shape = {1, height, width, channels};
std::vector<int64_t> output_shape = {1, channels, height, width};
std::vector<int64_t> input_shape = {1, height, width, channels};
std::vector<int64_t> output_shape = {1, channels, height, width};
auto sub_model =
LearningModelBuilder::Create(13)
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", L"The NHWC image", TensorKind::Float, input_shape))
@ -930,7 +933,7 @@ static void ModelBuilding_StandardDeviationNormalization() {
.SetInput(L"B", L"Means")
.SetOutput(L"C", L"Output"))
.CreateModel();
auto div_model =
auto div_model =
LearningModelBuilder::Create(13)
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", L"The NHWC image", TensorKind::Float, input_shape))
.Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"StdDevs", TensorKind::Float, {channels}))
@ -1077,13 +1080,13 @@ static void ModelBuilding_DiscreteFourierTransform() {
{1.00f, 0.00f}, {2.00, 0.00f}, {3.00f, 0.00f}, {4.00f, 0.00f}, {5.00f, 0.00f}, {6.00f, 0.00f}, {7.00f, 0.00f}, {8.00f, 0.00f},
{1.00f, 0.00f}, {2.00, 0.00f}, {3.00f, 0.00f}, {4.00f, 0.00f}, {5.00f, 0.00f}, {6.00f, 0.00f}, {7.00f, 0.00f}, {8.00f, 0.00f},
{1.00f, 0.00f}, {2.00, 0.00f}, {3.00f, 0.00f}, {4.00f, 0.00f}, {5.00f, 0.00f}, {6.00f, 0.00f}, {7.00f, 0.00f}, {8.00f, 0.00f},
{1.00f, 0.00f}, {2.00, 0.00f}, {3.00f, 0.00f}, {4.00f, 0.00f}, {5.00f, 0.00f}, {6.00f, 0.00f}, {7.00f, 0.00f}, {8.00f, 0.00f},
{1.00f, 0.00f}, {2.00, 0.00f}, {3.00f, 0.00f}, {4.00f, 0.00f}, {5.00f, 0.00f}, {6.00f, 0.00f}, {7.00f, 0.00f}, {8.00f, 0.00f},
{2.00f, 1.00f}, {4.00, 2.00f}, {6.00f, 3.00f}, {8.00f, 4.00f}, {10.00f, 5.00f}, {12.00f, 6.00f}, {14.00f, 7.00f}, {16.00f, 8.00f},
{2.00f, 1.00f}, {4.00, 2.00f}, {6.00f, 3.00f}, {8.00f, 4.00f}, {10.00f, 5.00f}, {12.00f, 6.00f}, {14.00f, 7.00f}, {16.00f, 8.00f},
{2.00f, 1.00f}, {4.00, 2.00f}, {6.00f, 3.00f}, {8.00f, 4.00f}, {10.00f, 5.00f}, {12.00f, 6.00f}, {14.00f, 7.00f}, {16.00f, 8.00f},
{2.00f, 1.00f}, {4.00, 2.00f}, {6.00f, 3.00f}, {8.00f, 4.00f}, {10.00f, 5.00f}, {12.00f, 6.00f}, {14.00f, 7.00f}, {16.00f, 8.00f},
{2.00f, 1.00f}, {4.00, 2.00f}, {6.00f, 3.00f}, {8.00f, 4.00f}, {10.00f, 5.00f}, {12.00f, 6.00f}, {14.00f, 7.00f}, {16.00f, 8.00f},
{2.00f, 1.00f}, {4.00, 2.00f}, {6.00f, 3.00f}, {8.00f, 4.00f}, {10.00f, 5.00f}, {12.00f, 6.00f}, {14.00f, 7.00f}, {16.00f, 8.00f},
};
std::vector<std::complex<float>> expected_axis_0_two_sided = {
@ -1099,7 +1102,37 @@ static void ModelBuilding_DiscreteFourierTransform() {
{0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f},
{-0.000f, 0.000f}, {-0.000f, 0.000f}, {-0.000f, 0.000f}, {-0.000f, 0.000f}, {-0.000f, 0.000f}, {-0.000f, 0.000f}, {-0.000f, 0.000f}, {-0.000f, 0.000f}
};
DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_0_two_sided, 0, false /*onesided*/);
DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_0_two_sided, 0, 5, false /*onesided*/);
std::vector<std::complex<float>> expected_axis_0_two_sided_small_dft_length = {
{4.000f, 0.000f}, {8.000f, 0.000f}, {12.000f, 0.000f}, {16.000f, 0.000f}, {20.000f, 0.000f}, {24.000f, 0.000f}, {28.000f, 0.000f}, {32.000f, 0.000f},
{0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f},
{0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f},
{0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f},
{8.000f, 4.000f}, {16.000f, 8.000f}, {24.000f, 12.000f}, {32.000f, 16.000f}, {40.000f, 20.000f}, {48.000f, 24.000f}, {56.000f, 28.000f}, {64.000f, 32.000f},
{0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {-0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f},
{0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {-0.000f, 0.000f}, {0.000f, 0.000f}, {-0.000f, 0.000f}, {0.000f, 0.000f},
{0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f},
};
DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_0_two_sided_small_dft_length, 0, 4, false /*onesided*/);
std::vector<std::complex<float>> expected_axis_0_two_sided_bigger_dft_length = {
{5.000000f, 0.000000f}, {10.000000f, 0.000000f}, {15.000000f, 0.000000f}, {20.000000f, 0.000000f}, {25.000000f, 0.000000f}, {30.000000f, 0.000000f}, {35.000000f, 0.000000f}, {40.000000f, 0.000000f},
{-0.500000f, -0.866025f}, {-1.000000f, -1.732051f}, {-1.500000f, -2.598076f}, {-2.000000f, -3.464101f}, {-2.500000f, -4.330126f}, {-3.000000f, -5.196152f}, {-3.500000f, -6.062176f}, {-4.000000f, -6.928203f},
{0.500000f, -0.866025f}, {1.000000f, -1.732051f}, {1.500000f, -2.598076f}, {1.999999f, -3.464102f}, {2.499999f, -4.330127f}, {2.999999f, -5.196152f}, {3.499999f, -6.062178f}, {3.999999f, -6.928203f},
{1.000000f, -0.000000f}, {2.000000f, -0.000001f}, {3.000000f, -0.000001f}, {4.000000f, -0.000002f}, {5.000000f, -0.000002f}, {6.000000f, -0.000002f}, {7.000000f, -0.000003f}, {8.000000f, -0.000003f},
{0.500000f, 0.866025f}, {1.000001f, 1.732051f}, {1.500001f, 2.598076f}, {2.000001f, 3.464102f}, {2.500002f, 4.330127f}, {3.000002f, 5.196153f}, {3.500002f, 6.062179f}, {4.000003f, 6.928204f},
{-0.500000f, 0.866026f}, {-1.000000f, 1.732052f}, {-1.500000f, 2.598077f}, {-2.000000f, 3.464104f}, {-2.500000f, 4.330130f}, {-2.999999f, 5.196155f}, {-3.500000f, 6.062181f}, {-4.000000f, 6.928207f},
{10.000000f, 5.000000f}, {20.000000f, 10.000000f}, {30.000000f, 15.000000f}, {40.000000f, 20.000000f}, {50.000000f, 25.000000f}, {60.000000f, 30.000000f}, {70.000000f, 35.000000f}, {80.000000f, 40.000000f},
{-0.133975f, -2.232050f}, {-0.267949f, -4.464101f}, {-0.401925f, -6.696153f}, {-0.535898f, -8.928202f}, {-0.669872f, -11.160252f}, {-0.803849f, -13.392305f}, {-0.937822f, -15.624352f}, {-1.071796f, -17.856403f},
{1.866025f, -1.232051f}, {3.732050f, -2.464102f}, {5.598075f, -3.696153f}, {7.464101f, -4.928204f}, {9.330126f, -6.160254f}, {11.196151f, -7.392306f}, {13.062176f, -8.624355f}, {14.928202f, -9.856407f},
{2.000000f, 0.999999f}, {4.000001f, 1.999998f}, {6.000001f, 2.999998f}, {8.000002f, 3.999997f}, {10.000003f, 4.999996f}, {12.000002f, 5.999995f}, {14.000003f, 6.999995f}, {16.000004f, 7.999993f},
{0.133975f, 2.232051f}, {0.267951f, 4.464102f}, {0.401926f, 6.696153f}, {0.535901f, 8.928205f}, {0.669876f, 11.160257f}, {0.803851f, 13.392306f}, {0.937826f, 15.624360f}, {1.071802f, 17.856409f},
{-1.866026f, 1.232052f}, {-3.732052f, 2.464104f}, {-5.598077f, 3.696155f}, {-7.464104f, 4.928207f}, {-9.330130f, 6.160261f}, {-11.196154f, 7.392309f}, {-13.062180f, 8.624363f}, {-14.928207f, 9.856415f},
};
DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_0_two_sided_bigger_dft_length, 0, 6, false /*onesided*/);
std::vector<std::complex<float>> expected_axis_0_one_sided = {
{5.000f, 0.000f}, {10.000f, 0.000f}, {15.000f, 0.000f}, {20.000f, 0.000f}, {25.000f, 0.000f}, {30.000f, 0.000f}, {35.000f, 0.000f}, {40.000f, 0.000f},
@ -1110,7 +1143,7 @@ static void ModelBuilding_DiscreteFourierTransform() {
{0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {-0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f},
{0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {-0.000f, 0.000f}, {0.000f, 0.000f}, {-0.000f, 0.000f}, {0.000f, 0.000f},
};
DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_0_one_sided, 0, true /*onesided*/);
DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_0_one_sided, 0, 5, true /*onesided*/);
std::vector<std::complex<float>> expected_axis_1_two_sided = {
{36.000f, 0.000f}, {-4.000f, 9.657f}, {-4.000f, 4.000f}, {-4.000f, 1.657f}, {-4.000f, 0.000f}, {-4.000f, -1.657f}, {-4.000f, -4.000f}, {-4.000f, -9.657f},
@ -1125,7 +1158,7 @@ static void ModelBuilding_DiscreteFourierTransform() {
{72.000f, 36.000f}, {-17.657f, 15.314f}, {-12.000f, 4.000f}, {-9.657f, -0.686f}, {-8.000f, -4.000f}, {-6.343f, -7.314f}, {-4.000f, -12.000f}, {1.657f, -23.314f},
{72.000f, 36.000f}, {-17.657f, 15.314f}, {-12.000f, 4.000f}, {-9.657f, -0.686f}, {-8.000f, -4.000f}, {-6.343f, -7.314f}, {-4.000f, -12.000f}, {1.657f, -23.314f},
};
DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_1_two_sided, 1, false /*onesided*/);
DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_1_two_sided, 1, 8, false /*onesided*/);
std::vector<std::complex<float>> expected_axis_1_one_sided = {
{36.000f, 0.000f}, {-4.000f, 9.657f}, {-4.000f, 4.000f}, {-4.000f, 1.657f}, {-4.000f, 0.000f},
@ -1139,7 +1172,7 @@ static void ModelBuilding_DiscreteFourierTransform() {
{72.000f, 36.000f}, {-17.657f, 15.314f}, {-12.000f, 4.000f}, {-9.657f, -0.686f}, {-8.000f, -4.000f},
{72.000f, 36.000f}, {-17.657f, 15.314f}, {-12.000f, 4.000f}, {-9.657f, -0.686f}, {-8.000f, -4.000f},
};
DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_1_one_sided, 1, true /*onesided*/);
DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_1_one_sided, 1, 8, true /*onesided*/);
DiscreteFourierTransform_2D();
@ -1175,7 +1208,7 @@ static void DiscreteFourierTransformInverse(size_t axis) {
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8,
2, 4, 6, 8, 10, 12, 14, 16,
2, 4, 6, 8, 10, 12, 14, 16,
@ -1192,7 +1225,7 @@ static void DiscreteFourierTransformInverse(size_t axis) {
// Evaluate
auto result = session.Evaluate(binding, L"");
// Check results
auto y_tensor = result.Outputs().Lookup(L"Output.Inverse").as<TensorFloat>();
auto y_ivv = y_tensor.GetAsVectorView();
@ -1200,7 +1233,7 @@ static void DiscreteFourierTransformInverse(size_t axis) {
constexpr float error_threshold = .001f;
WINML_EXPECT_TRUE(abs(y_ivv.GetAt(i) - input_vector[i / 2]) < error_threshold);
WINML_EXPECT_TRUE(abs(y_ivv.GetAt(i + 1) - 0) < error_threshold);
}
}
}
#endif
@ -1299,7 +1332,7 @@ static void SetIntraOpThreadSpinning() {
auto device = LearningModelDevice(LearningModelDeviceKind::Cpu);
auto shape = std::vector<int64_t>{1, 1000};
auto model = ProtobufHelpers::CreateModel(TensorKind::Float, shape, 1000);
std::vector<float> input(1000);
std::iota(std::begin(input), std::end(input), 0.0f);
auto tensor_input = TensorFloat::CreateFromArray(shape, input);