From 85fa168dc1a9d4866472d3345f71a2fc2d99f979 Mon Sep 17 00:00:00 2001 From: Sheil Kumar Date: Tue, 3 May 2022 16:17:43 -0700 Subject: [PATCH] Add optional dft_length input to the DFT and IDFT operators. (#11427) * Add optional dft_length input. * CR Feedback Co-authored-by: Sheil Kumar --- onnxruntime/contrib_ops/cpu/signal/dft.cc | 135 +++++++++--------- .../core/graph/signal_ops/signal_defs.cc | 93 +++++++----- .../test/api/LearningModelSessionAPITest.cpp | 85 +++++++---- 3 files changed, 188 insertions(+), 125 deletions(-) diff --git a/onnxruntime/contrib_ops/cpu/signal/dft.cc b/onnxruntime/contrib_ops/cpu/signal/dft.cc index 19bfb58ddf..5fb6d6b9e1 100644 --- a/onnxruntime/contrib_ops/cpu/signal/dft.cc +++ b/onnxruntime/contrib_ops/cpu/signal/dft.cc @@ -43,6 +43,26 @@ ONNX_OPERATOR_KERNEL_EX( .TypeConstraint("T2", BuildKernelDefConstraints()), STFT); +// dedupe with the other one in window_functions.cc +template +static T get_scalar_value_from_tensor(const Tensor* tensor) { + ORT_ENFORCE(tensor->Shape().Size() == 1, "ratio input should have a single value."); + + auto data_type = tensor->DataType()->AsPrimitiveDataType()->GetDataType(); + switch (data_type) { + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: + return static_cast(*reinterpret_cast(tensor->DataRaw())); + case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: + return static_cast(*reinterpret_cast(tensor->DataRaw())); + case ONNX_NAMESPACE::TensorProto_DataType_INT32: + return static_cast(*reinterpret_cast(tensor->DataRaw())); + case ONNX_NAMESPACE::TensorProto_DataType_INT64: + return static_cast(*reinterpret_cast(tensor->DataRaw())); + default: + ORT_THROW("Unsupported input data type of ", data_type); + } +} + static bool is_real_valued_signal(const onnxruntime::TensorShape & shape) { return shape.NumDimensions() == 2 || shape[shape.NumDimensions() - 1] == 1; } @@ -141,7 +161,7 @@ static T compute_angular_velocity(size_t number_of_samples, bool inverse) { template static Status fft_radix2(OpKernelContext* /*ctx*/, const Tensor* X, Tensor* Y, - size_t X_offset, size_t X_stride, size_t Y_offset, size_t Y_stride, int64_t axis, + size_t X_offset, size_t X_stride, size_t Y_offset, size_t Y_stride, int64_t axis, size_t dft_length, const Tensor* window, bool is_onesided, bool inverse, std::vector>& V, std::vector>& temp_output) { @@ -149,7 +169,7 @@ static Status fft_radix2(OpKernelContext* /*ctx*/, // Get shape and significant bits const auto& X_shape = X->Shape(); size_t number_of_samples = static_cast(X_shape[axis]); - unsigned significant_bits = static_cast(log2(number_of_samples)); + unsigned significant_bits = static_cast(log2(dft_length)); // Get data auto* X_data = const_cast(reinterpret_cast(X->DataRaw())) + X_offset; @@ -162,8 +182,8 @@ static Status fft_radix2(OpKernelContext* /*ctx*/, size_t Y_data_stride = 1; std::complex* Y_data; if (is_onesided) { - if (temp_output.size() != number_of_samples) { - temp_output = std::vector>(number_of_samples); + if (temp_output.size() != dft_length) { + temp_output = std::vector>(dft_length); } Y_data = temp_output.data(); } else { @@ -171,34 +191,34 @@ static Status fft_radix2(OpKernelContext* /*ctx*/, Y_data_stride = Y_stride; } - auto angular_velocity = compute_angular_velocity(number_of_samples, inverse); + auto angular_velocity = compute_angular_velocity(dft_length, inverse); // Create vandermonde matrix V ordered with the bit-reversed permutation - if (V.size() != number_of_samples) { - V = std::vector>(number_of_samples); // e^(i *2*pi / N * k) - for (size_t i = 0; i < number_of_samples; i++) { + if (V.size() != dft_length) { + V = std::vector>(dft_length); // e^(i *2*pi / N * k) + for (size_t i = 0; i < dft_length; i++) { size_t bit_reversed_index = bit_reverse(i, significant_bits); V[bit_reversed_index] = std::complex(cos(i * angular_velocity), sin(i * angular_velocity)); } } - for (size_t i = 0; i < number_of_samples; i++) { + for (size_t i = 0; i < dft_length; i++) { size_t bit_reversed_index = bit_reverse(i, significant_bits); - auto x = *(X_data + bit_reversed_index*X_stride); - auto window_element = window_data ? *(window_data + bit_reversed_index) : 1; + auto x = (bit_reversed_index < number_of_samples) ? * (X_data + bit_reversed_index * X_stride) : 0; + auto window_element = window_data ? *(window_data + bit_reversed_index) : 1; *(Y_data + i*Y_data_stride) = std::complex(1, 0) * x * window_element; } // Run fft_radix2 unsigned current_significant_bits = 0; - for (size_t i = 2; i <= number_of_samples; i <<= 1) { + for (size_t i = 2; i <= dft_length; i <<= 1) { size_t midpoint = i >> 1; current_significant_bits++; for (size_t k = 0; k < midpoint; k++) { auto first_idx = bit_reverse(k, current_significant_bits); auto second_idx = bit_reverse(midpoint + k, current_significant_bits); - for (size_t j = 0; j < number_of_samples; j += i) { + for (size_t j = 0; j < dft_length; j += i) { auto even_index = k + j; auto odd_index = k + j + midpoint; std::complex* even = (Y_data + even_index * Y_data_stride); @@ -213,15 +233,15 @@ static Status fft_radix2(OpKernelContext* /*ctx*/, // Scale the output if inverse if (inverse) { - for (size_t i = 0; i < number_of_samples; i++) { + for (size_t i = 0; i < dft_length; i++) { std::complex& val = *(Y_data + i * Y_data_stride); - val /= static_cast(number_of_samples); + val /= static_cast(dft_length); } } if (is_onesided) { auto destination = reinterpret_cast*>(Y->MutableDataRaw()) + Y_offset; - for (size_t i = 0; i < number_of_samples; i++) { + for (size_t i = 0; i < dft_length; i++) { *(destination + Y_stride * i) = *(Y_data + i * Y_data_stride); } } @@ -232,7 +252,7 @@ static Status fft_radix2(OpKernelContext* /*ctx*/, template static Status dft_naive(const Tensor* X, Tensor* Y, size_t X_offset, size_t X_stride, size_t Y_offset, size_t Y_stride, int64_t axis, - const Tensor* window, bool inverse) { + size_t dft_length, const Tensor* window, bool inverse) { // Get shape and significant bits const auto& X_shape = X->Shape(); size_t number_of_samples = static_cast(X_shape[axis]); @@ -242,28 +262,29 @@ static Status dft_naive(const Tensor* X, Tensor* Y, // Get data auto* X_data = const_cast(reinterpret_cast(X->DataRaw())) + X_offset; auto* Y_data = reinterpret_cast*>(Y->MutableDataRaw()) + Y_offset; - + U* window_data = nullptr; if (window) { window_data = const_cast(reinterpret_cast(window->DataRaw())); } - auto angular_velocity = compute_angular_velocity(number_of_samples, inverse); + auto angular_velocity = compute_angular_velocity(dft_length, inverse); for (size_t i = 0; i < dft_output_size; i++) { std::complex& out = *(Y_data + i*Y_stride); out.real(0); out.imag(0); - for (size_t j = 0; j < number_of_samples; j++) { // vectorize over this loop + for (size_t j = 0; j < dft_length; j++) { // vectorize over this loop auto exponential = std::complex(cos(i * j * angular_velocity), sin(i * j * angular_velocity)); auto window_element = window_data ? * (window_data + j) : 1; - auto element = *(X_data + j*X_stride) * window_element; + auto x = (j < number_of_samples) ? *(X_data + j * X_stride) : 0; + auto element = x * window_element; out += exponential * element; } if (inverse) { - out /= static_cast(number_of_samples); + out /= static_cast(dft_length); } } @@ -271,31 +292,28 @@ static Status dft_naive(const Tensor* X, Tensor* Y, } template -static Status discrete_fourier_transform(OpKernelContext* ctx, const Tensor* X, Tensor* Y, int64_t axis, const Tensor* window, bool is_onesided, bool inverse, +static Status discrete_fourier_transform(OpKernelContext* ctx, const Tensor* X, Tensor* Y, int64_t axis, int64_t dft_length, const Tensor* window, bool is_onesided, bool inverse, std::vector>& V, std::vector>& temp_output) { // Get shape const auto& X_shape = X->Shape(); const auto& Y_shape = Y->Shape(); - size_t number_of_samples = static_cast(X_shape[axis]); - + auto batch_and_signal_rank = X->Shape().NumDimensions(); auto total_dfts = static_cast(X->Shape().Size() / X->Shape()[axis]); auto is_input_real = X->Shape().NumDimensions() == 2 || X->Shape()[X->Shape().NumDimensions() - 1] == 1; - auto compex_input_factor = is_input_real ? 1 : 2; + auto complex_input_factor = is_input_real ? 1 : 2; if (X->Shape().NumDimensions() > 2) { total_dfts /= X->Shape()[X->Shape().NumDimensions() - 1]; batch_and_signal_rank -= 1; } - - // Calculate x/y offsets/strides for (size_t i = 0; i < total_dfts; i++) { size_t X_offset = 0; - size_t X_stride = X_shape.SizeFromDimension(axis+1) / compex_input_factor; + size_t X_stride = X_shape.SizeFromDimension(axis+1) / complex_input_factor; size_t cumulative_packed_stride = total_dfts; size_t temp = i; for (size_t r = 0; r < batch_and_signal_rank; r++) { @@ -306,7 +324,7 @@ static Status discrete_fourier_transform(OpKernelContext* ctx, const Tensor* X, cumulative_packed_stride /= X_shape[r]; auto index = temp / cumulative_packed_stride; temp -= (index * cumulative_packed_stride); - X_offset += index * X_shape.SizeFromDimension(r + 1) / compex_input_factor; + X_offset += index * X_shape.SizeFromDimension(r + 1) / complex_input_factor; } size_t Y_offset = 0; @@ -324,12 +342,12 @@ static Status discrete_fourier_transform(OpKernelContext* ctx, const Tensor* X, Y_offset += index * Y_shape.SizeFromDimension(r + 1) / 2; } - if (is_power_of_2(number_of_samples)) { - ORT_RETURN_IF_ERROR((fft_radix2(ctx, X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, window, is_onesided, inverse, V, temp_output))); + if (is_power_of_2(dft_length)) { + ORT_RETURN_IF_ERROR((fft_radix2(ctx, X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, dft_length, window, is_onesided, inverse, V, temp_output))); } else { - ORT_RETURN_IF_ERROR((dft_naive(X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, window, inverse))); + ORT_RETURN_IF_ERROR((dft_naive(X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, dft_length, window, inverse))); } - } + } return Status::OK(); } @@ -337,13 +355,20 @@ static Status discrete_fourier_transform(OpKernelContext* ctx, const Tensor* X, static Status discrete_fourier_transform(OpKernelContext* ctx, int64_t axis, bool is_onesided, bool inverse) { // Get input shape const auto* X = ctx->Input(0); + const auto* dft_length = ctx->Input(1); const auto& X_shape = X->Shape(); const auto is_real_valued = is_real_valued_signal(X_shape); const auto is_complex_valued = is_complex_valued_signal(X_shape); + int64_t number_of_samples = static_cast(X_shape[axis]); + if (dft_length) { + const auto& dft_length_shape = dft_length->Shape(); + ORT_RETURN_IF(!dft_length_shape.IsScalar(), "dft_length must be a scalar value."); + number_of_samples = static_cast(get_scalar_value_from_tensor(dft_length)); + } + // Get the DFT output size. Onesided will return only the unique values! // note: x >> 1 === std::floor(x / 2.f) - int64_t number_of_samples = static_cast(X_shape[axis]); auto dft_output_size = is_onesided ? ((number_of_samples >> 1) + 1) : number_of_samples; @@ -353,7 +378,7 @@ static Status discrete_fourier_transform(OpKernelContext* ctx, int64_t axis, boo if (X_shape.NumDimensions() == 2) { Y_shape = onnxruntime::TensorShape({X_shape[0], dft_output_size, 2}); - } else + } else { Y_shape[Y_shape.NumDimensions() - 1] = 2; } @@ -368,9 +393,9 @@ static Status discrete_fourier_transform(OpKernelContext* ctx, int64_t axis, boo std::vector> V; std::vector> temp_output; if (is_real_valued) { - ORT_RETURN_IF_ERROR((discrete_fourier_transform(ctx, X, Y, axis, nullptr, is_onesided, inverse, V, temp_output))); + ORT_RETURN_IF_ERROR((discrete_fourier_transform(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output))); } else if (is_complex_valued) { - ORT_RETURN_IF_ERROR((discrete_fourier_transform>(ctx, X, Y, axis, nullptr, is_onesided, inverse, V, temp_output))); + ORT_RETURN_IF_ERROR((discrete_fourier_transform>(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output))); } else { ORT_THROW("Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for complex inputs.", data_type); } @@ -378,9 +403,9 @@ static Status discrete_fourier_transform(OpKernelContext* ctx, int64_t axis, boo std::vector> V; std::vector> temp_output; if (is_real_valued) { - ORT_RETURN_IF_ERROR((discrete_fourier_transform(ctx, X, Y, axis, nullptr, is_onesided, inverse, V, temp_output))); + ORT_RETURN_IF_ERROR((discrete_fourier_transform(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output))); } else if (is_complex_valued) { - ORT_RETURN_IF_ERROR((discrete_fourier_transform>(ctx, X, Y, axis, nullptr, is_onesided, inverse, V, temp_output))); + ORT_RETURN_IF_ERROR((discrete_fourier_transform>(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output))); } else { ORT_THROW("Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for complex inputs.", data_type); } @@ -401,26 +426,6 @@ Status IDFT::Compute(OpKernelContext* ctx) const { return Status::OK(); } -// dedupe with the other one in window_functions.cc -template -static T get_scalar_value_from_tensor(const Tensor* tensor) { - ORT_ENFORCE(tensor->Shape().Size() == 1, "ratio input should have a single value."); - - auto data_type = tensor->DataType()->AsPrimitiveDataType()->GetDataType(); - switch (data_type) { - case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: - return static_cast(*reinterpret_cast(tensor->DataRaw())); - case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: - return static_cast(*reinterpret_cast(tensor->DataRaw())); - case ONNX_NAMESPACE::TensorProto_DataType_INT32: - return static_cast(*reinterpret_cast(tensor->DataRaw())); - case ONNX_NAMESPACE::TensorProto_DataType_INT64: - return static_cast(*reinterpret_cast(tensor->DataRaw())); - default: - ORT_THROW("Unsupported input data type of ", data_type); - } -} - template static Status short_time_fourier_transform(OpKernelContext* ctx, bool is_onesided, bool /*inverse*/) { // Attr("onesided"): default = 1 @@ -429,7 +434,7 @@ static Status short_time_fourier_transform(OpKernelContext* ctx, bool is_oneside // Input(2, "window") type = T1, optional // Input(3, "frame_step") type = T2 // Output(0, "output") type = T1 - + // Get signal const auto* signal = ctx->Input(0); const auto frame_step = get_scalar_value_from_tensor(ctx->Input(1)); @@ -445,8 +450,8 @@ static Status short_time_fourier_transform(OpKernelContext* ctx, bool is_oneside ORT_ENFORCE(signal_components == 1 || signal_components == 2, "Ensure that the signal has either 1 or 2 components."); // Get the frame length - int64_t frame_length = std::numeric_limits::min(); - if (frame_length_tensor) + int64_t frame_length = std::numeric_limits::min(); + if (frame_length_tensor) { frame_length = get_scalar_value_from_tensor(frame_length_tensor); } @@ -524,7 +529,7 @@ static Status short_time_fourier_transform(OpKernelContext* ctx, bool is_oneside 0); // Run individual dft - ORT_RETURN_IF_ERROR((discrete_fourier_transform(ctx, &input, &output, 1, window, is_onesided, false, V, temp_output))); + ORT_RETURN_IF_ERROR((discrete_fourier_transform(ctx, &input, &output, 1, window_size, window, is_onesided, false, V, temp_output))); } } @@ -575,4 +580,4 @@ Status STFT::Compute(OpKernelContext* ctx) const { } // namespace contrib } // namespace onnxruntime -#endif \ No newline at end of file +#endif diff --git a/onnxruntime/core/graph/signal_ops/signal_defs.cc b/onnxruntime/core/graph/signal_ops/signal_defs.cc index 28de58c57c..30056f0575 100644 --- a/onnxruntime/core/graph/signal_ops/signal_defs.cc +++ b/onnxruntime/core/graph/signal_ops/signal_defs.cc @@ -78,49 +78,62 @@ void RegisterSignalSchemas() { static_cast(0)) .Input(0, "input", - "For real input, the following shape is expected: [batch_idx][n_fft]." - "For complex input, the following shape is expected: [batch_idx][n_fft][2]." - "The final dimension represents the real and imaginary parts of the value." "For real multi-dimensional input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][1]." "For complex multi-dimensional input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]." - "The first dimension is the batch dimension.", - "T") + "The first dimension is the batch dimension." + "The final dimension represents the real and imaginary parts of the value.", + "T1") + .Input(1, + "dft_length", + "The length of the signal." + "If greater than the axis dimension, the signal will be zero-padded up to dft_length. " + "If less than the axis dimension, only the first dft_length values will be used as the signal. " + "It's an optional value. ", + "T2", + OpSchema::Optional, + true, + 1, + OpSchema::NonDifferentiable) .Output(0, "output", "The Fourier Transform of the input vector." - "If signal_dimN = 1, and onesided is 0, [batch_idx][n_fft][2]" - "If signal_dimN = 1, and onesided is 1, [batch_idx][floor(n_fft/2)+1][2]" - "If signal_dimN = 2, and onesided is 0 and axis = 0, [batch_idx][signal_dim1][signal_dim2][2]" - "If signal_dimN = 2, and onesided is 0 and axis = 1, [batch_idx][signal_dim1][signal_dim2][2]" - "If signal_dimN = 2, and onesided is 1 and axis = 0, [batch_idx][floor(signal_dim1/2)+1][signal_dim2][2]" - "If signal_dimN = 2, and onesided is 1 and axis = 1, [batch_idx][signal_dim1][floor(signal_dim2/2)+1][2]", - "T") + "The signal_dim at the specified axis is equal to the dft_length." + "If onesided is 0, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]." + "If axis=0 and onesided is 1, the following shape is expected: [batch_idx][floor(signal_dim1/2)+1][signal_dim2]...[signal_dimN][2]." + "If axis=1 and onesided is 1, the following shape is expected: [batch_idx][signal_dim1][floor(signal_dim2/2)+1]...[signal_dimN][2]." + "If axis=N-1 and onesided is 1, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[floor(signal_dimN/2)+1][2].", + "T1") .TypeConstraint( - "T", + "T1", {"tensor(float16)", "tensor(float)", "tensor(double)", "tensor(bfloat16)"}, "Constrain input and output types to float tensors.") + .TypeConstraint( + "T2", + {"tensor(int64)"}, + "Constrain scalar length types to int64_t.") .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { propagateElemTypeFromInputToOutput(ctx, 0, 0); const int64_t batch_ndim = 1; auto& input_shape = getInputShape(ctx, 0); auto dim_size = static_cast(input_shape.dim_size()); - auto has_component_dimension = dim_size > 2; + auto has_component_dimension = dim_size > 2; ONNX_NAMESPACE::TensorShapeProto result_shape_proto = input_shape; - + + bool axis = static_cast(getAttribute(ctx, "axis", 0)); bool is_onesided = static_cast(getAttribute(ctx, "onesided", 0)); if (is_onesided) { // Since signal_ndim = 1, and multidimensional DFT is not supported, // only the single signal dim (1) needs to be updated - auto n_fft = input_shape.dim(1).dim_value(); - result_shape_proto.mutable_dim(1)->set_dim_value((n_fft >> 1) + 1); + auto n_fft = input_shape.dim(1 + axis).dim_value(); + result_shape_proto.mutable_dim(1 + axis)->set_dim_value((n_fft >> 1) + 1); } - + if (has_component_dimension) { - result_shape_proto.mutable_dim(static_cast(dim_size - 1))->set_dim_value(2); + result_shape_proto.mutable_dim(static_cast(dim_size - 1))->set_dim_value(2); } else { - result_shape_proto.add_dim()->set_dim_value(2); + result_shape_proto.add_dim()->set_dim_value(2); } updateOutputShape(ctx, 0, result_shape_proto); @@ -137,42 +150,54 @@ void RegisterSignalSchemas() { static_cast(0)) .Input(0, "input", - "For real input, the following shape is expected: [batch_idx][n_fft]." - "For complex input, the following shape is expected: [batch_idx][n_fft][2]." - "The final dimension represents the real and imaginary parts of the value." "For real multi-dimensional input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][1]." "For complex multi-dimensional input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]." - "The first dimension is the batch dimension.", - "T") + "The first dimension is the batch dimension." + "The final dimension represents the real and imaginary parts of the value.", + "T1") + .Input(1, + "dft_length", + "The length of the signal." + "If greater than the axis dimension, the signal will be zero-padded up to dft_length. " + "If less than the axis dimension, only the first dft_length values will be used as the signal. " + "It's an optional value. ", + "T2", + OpSchema::Optional, + true, + 1, + OpSchema::NonDifferentiable) .Output(0, "output", "The inverse discrete Fourier transform of the input. " - "If signal_dimN = 1, [batch_idx][n_fft][2]" - "If signal_dimN = 2 and axis = 0, [batch_idx][signal_dim1][signal_dim2][2]" - "If signal_dimN = 2 and axis = 1, [batch_idx][signal_dim1][signal_dim2][2]" + "The signal_dim at the specified axis is equal to the dft_length." + "The expected shape is [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]" "For all types of input, the last dimension of the output represents the components of a complex number.", - "T", + "T1", OpSchema::Single, true, 1, OpSchema::NonDifferentiable) .TypeConstraint( - "T", + "T1", {"tensor(float16)", "tensor(float)", "tensor(double)", "tensor(bfloat16)"}, "Constrain input and output types to float tensors.") + .TypeConstraint( + "T2", + {"tensor(int64)"}, + "Constrain scalar length types to int64_t.") .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { propagateElemTypeFromInputToOutput(ctx, 0, 0); const int64_t batch_ndim = 1; - + auto& input_shape = getInputShape(ctx, 0); ONNX_NAMESPACE::TensorShapeProto result_shape = input_shape; auto dim_size = static_cast(input_shape.dim_size()); - auto has_component_dimension = dim_size > 2; + auto has_component_dimension = dim_size > 2; if (has_component_dimension) { result_shape.mutable_dim(static_cast(dim_size - 1))->set_dim_value(2); } else { - result_shape.add_dim()->set_dim_value(2); + result_shape.add_dim()->set_dim_value(2); } updateOutputShape(ctx, 0, result_shape); @@ -450,4 +475,4 @@ void RegisterSignalSchemas() { } // namespace audio } // namespace onnxruntime -#endif \ No newline at end of file +#endif diff --git a/winml/test/api/LearningModelSessionAPITest.cpp b/winml/test/api/LearningModelSessionAPITest.cpp index e0b4687969..0aec8a9343 100644 --- a/winml/test/api/LearningModelSessionAPITest.cpp +++ b/winml/test/api/LearningModelSessionAPITest.cpp @@ -309,7 +309,7 @@ static void EvaluateSessionAndCloseModel() { WINML_EXPECT_NO_THROW(::EvaluateSessionAndCloseModelHelper(LearningModelDeviceKind::Cpu, false)); } -static void NamedDimensionOverride() +static void NamedDimensionOverride() { LearningModel model = nullptr; WINML_EXPECT_NO_THROW(APITest::LoadModel(L"fns-candy.onnx", model)); @@ -323,7 +323,7 @@ static void NamedDimensionOverride() LearningModelSessionOptions options; options.OverrideNamedDimension(L"None", n); - + // Verifies that if a Dim name doesn't exist the named dimension override does not interfere with successful evaluation // The override is still expected to be present in the internal onnxruntime override data options.OverrideNamedDimension(L"DimNameThatDoesntExist", n); @@ -441,8 +441,8 @@ static void WindowFunction(const wchar_t* window_operator_name, TensorKind kind) if (kind == TensorKind::Double) { window_operator.SetAttribute(L"output_datatype", double_data_type); } - - auto model = + + auto model = LearningModelBuilder::Create(13) .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", TensorKind::Int64, scalar_shape)) .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output", kind, output_shape)) @@ -502,7 +502,7 @@ static void DiscreteFourierTransform_2D() { IRandomAccessStream stream = imagefile.OpenAsync(FileAccessMode::Read).get(); SoftwareBitmap softwareBitmap = (BitmapDecoder::CreateAsync(stream).get()).GetSoftwareBitmapAsync().get(); VideoFrame frame = VideoFrame::CreateWithSoftwareBitmap(softwareBitmap); - + auto corrected_image = winrt::Windows::Media::VideoFrame( winrt::Windows::Graphics::Imaging::BitmapPixelFormat::Bgra8, @@ -516,7 +516,7 @@ static void DiscreteFourierTransform_2D() { std::vector shape = {1, 1, height, width}; std::vector output_shape = {1, 1, height, width}; - + auto builder = LearningModelBuilder::Create(13) .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input.Signal", TensorKind::Float, shape)) @@ -575,7 +575,7 @@ static void DiscreteFourierTransform_2D() { .SetOutput(L"C", L"Output.Error")); auto model = builder.CreateModel(); - + LearningModelSession session(model); LearningModelBinding binding(session); @@ -610,8 +610,9 @@ static void DiscreteFourierTransform_2D() { static void DiscreteFourierTransform( const std::vector>& input, const std::vector& shape, - const std::vector>& expected_output, + const std::vector>& expected_output, size_t axis, + size_t dft_length, bool is_onesided = false) { auto axis_dim = axis + 1; printf("\nDiscrete Fourier Transform [axis=%d, is_onesided=%s]\n", static_cast(axis_dim), is_onesided ? "true" : "false"); @@ -627,18 +628,20 @@ static void DiscreteFourierTransform( output_shape[output_shape.size() - 1] = 2; } output_shape[axis_dim] = is_onesided ? (1 + (shape[axis_dim] >> 1)) : shape[axis_dim]; - + auto model = LearningModelBuilder::Create(13) .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input.Signal", TensorKind::Float, shape)) + .Inputs().AddConstant(L"Input.DFTLength", TensorInt64Bit::CreateFromArray({}, {INT64(dft_length)})) .Outputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Output.Spectra", TensorKind::Float, output_shape)) .Operators().Add(Operator(L"DFT", MS_EXPERIMENTAL_DOMAIN) .SetInput(L"input", L"Input.Signal") + .SetInput(L"dft_length", L"Input.DFTLength") .SetAttribute(L"axis", TensorInt64Bit::CreateFromArray({}, {INT64(axis)})) .SetAttribute(L"onesided", TensorInt64Bit::CreateFromArray({}, {is_onesided})) .SetOutput(L"output", L"Output.Spectra")) .CreateModel(); - + LearningModelSession session(model); LearningModelBinding binding(session); @@ -663,7 +666,7 @@ static void DiscreteFourierTransform( constexpr float error_threshold = .001f; WINML_EXPECT_TRUE(abs(y_ivv.GetAt(i) - expected_output[i / 2].real()) < error_threshold); WINML_EXPECT_TRUE(abs(y_ivv.GetAt(i + 1) - expected_output[i / 2].imag()) < error_threshold); - } + } printf("\n"); } #endif @@ -726,7 +729,7 @@ static void STFT(size_t batch_size, size_t signal_size, size_t dft_size, 2 }; auto dft_length = TensorInt64Bit::CreateFromArray({}, {INT64(dft_size)}); - + auto model = LearningModelBuilder::Create(13) .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input.TimeSignal", TensorKind::Float, input_shape)) @@ -879,7 +882,7 @@ static void MelSpectrogramOnThreeToneSignal( // Bind input auto signal = MakeThreeTones(signal_size, sampling_rate); binding.Bind(L"Input.TimeSignal", TensorFloat::CreateFromArray(signal_shape, signal)); - + // Bind output auto output_image = winrt::Windows::Media::VideoFrame( @@ -918,8 +921,8 @@ static void ModelBuilding_StandardDeviationNormalization() { int64_t height = 256; int64_t width = 256; int64_t channels = 3; - std::vector input_shape = {1, height, width, channels}; - std::vector output_shape = {1, channels, height, width}; + std::vector input_shape = {1, height, width, channels}; + std::vector output_shape = {1, channels, height, width}; auto sub_model = LearningModelBuilder::Create(13) .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", L"The NHWC image", TensorKind::Float, input_shape)) @@ -930,7 +933,7 @@ static void ModelBuilding_StandardDeviationNormalization() { .SetInput(L"B", L"Means") .SetOutput(L"C", L"Output")) .CreateModel(); - auto div_model = + auto div_model = LearningModelBuilder::Create(13) .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"Input", L"The NHWC image", TensorKind::Float, input_shape)) .Inputs().Add(LearningModelBuilder::CreateTensorFeatureDescriptor(L"StdDevs", TensorKind::Float, {channels})) @@ -1077,13 +1080,13 @@ static void ModelBuilding_DiscreteFourierTransform() { {1.00f, 0.00f}, {2.00, 0.00f}, {3.00f, 0.00f}, {4.00f, 0.00f}, {5.00f, 0.00f}, {6.00f, 0.00f}, {7.00f, 0.00f}, {8.00f, 0.00f}, {1.00f, 0.00f}, {2.00, 0.00f}, {3.00f, 0.00f}, {4.00f, 0.00f}, {5.00f, 0.00f}, {6.00f, 0.00f}, {7.00f, 0.00f}, {8.00f, 0.00f}, {1.00f, 0.00f}, {2.00, 0.00f}, {3.00f, 0.00f}, {4.00f, 0.00f}, {5.00f, 0.00f}, {6.00f, 0.00f}, {7.00f, 0.00f}, {8.00f, 0.00f}, - {1.00f, 0.00f}, {2.00, 0.00f}, {3.00f, 0.00f}, {4.00f, 0.00f}, {5.00f, 0.00f}, {6.00f, 0.00f}, {7.00f, 0.00f}, {8.00f, 0.00f}, + {1.00f, 0.00f}, {2.00, 0.00f}, {3.00f, 0.00f}, {4.00f, 0.00f}, {5.00f, 0.00f}, {6.00f, 0.00f}, {7.00f, 0.00f}, {8.00f, 0.00f}, {2.00f, 1.00f}, {4.00, 2.00f}, {6.00f, 3.00f}, {8.00f, 4.00f}, {10.00f, 5.00f}, {12.00f, 6.00f}, {14.00f, 7.00f}, {16.00f, 8.00f}, {2.00f, 1.00f}, {4.00, 2.00f}, {6.00f, 3.00f}, {8.00f, 4.00f}, {10.00f, 5.00f}, {12.00f, 6.00f}, {14.00f, 7.00f}, {16.00f, 8.00f}, {2.00f, 1.00f}, {4.00, 2.00f}, {6.00f, 3.00f}, {8.00f, 4.00f}, {10.00f, 5.00f}, {12.00f, 6.00f}, {14.00f, 7.00f}, {16.00f, 8.00f}, {2.00f, 1.00f}, {4.00, 2.00f}, {6.00f, 3.00f}, {8.00f, 4.00f}, {10.00f, 5.00f}, {12.00f, 6.00f}, {14.00f, 7.00f}, {16.00f, 8.00f}, - {2.00f, 1.00f}, {4.00, 2.00f}, {6.00f, 3.00f}, {8.00f, 4.00f}, {10.00f, 5.00f}, {12.00f, 6.00f}, {14.00f, 7.00f}, {16.00f, 8.00f}, + {2.00f, 1.00f}, {4.00, 2.00f}, {6.00f, 3.00f}, {8.00f, 4.00f}, {10.00f, 5.00f}, {12.00f, 6.00f}, {14.00f, 7.00f}, {16.00f, 8.00f}, }; std::vector> expected_axis_0_two_sided = { @@ -1099,7 +1102,37 @@ static void ModelBuilding_DiscreteFourierTransform() { {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {-0.000f, 0.000f}, {-0.000f, 0.000f}, {-0.000f, 0.000f}, {-0.000f, 0.000f}, {-0.000f, 0.000f}, {-0.000f, 0.000f}, {-0.000f, 0.000f}, {-0.000f, 0.000f} }; - DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_0_two_sided, 0, false /*onesided*/); + DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_0_two_sided, 0, 5, false /*onesided*/); + + std::vector> expected_axis_0_two_sided_small_dft_length = { + {4.000f, 0.000f}, {8.000f, 0.000f}, {12.000f, 0.000f}, {16.000f, 0.000f}, {20.000f, 0.000f}, {24.000f, 0.000f}, {28.000f, 0.000f}, {32.000f, 0.000f}, + {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, + {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, + {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, + + {8.000f, 4.000f}, {16.000f, 8.000f}, {24.000f, 12.000f}, {32.000f, 16.000f}, {40.000f, 20.000f}, {48.000f, 24.000f}, {56.000f, 28.000f}, {64.000f, 32.000f}, + {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {-0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, + {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {-0.000f, 0.000f}, {0.000f, 0.000f}, {-0.000f, 0.000f}, {0.000f, 0.000f}, + {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, + }; + DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_0_two_sided_small_dft_length, 0, 4, false /*onesided*/); + + std::vector> expected_axis_0_two_sided_bigger_dft_length = { + {5.000000f, 0.000000f}, {10.000000f, 0.000000f}, {15.000000f, 0.000000f}, {20.000000f, 0.000000f}, {25.000000f, 0.000000f}, {30.000000f, 0.000000f}, {35.000000f, 0.000000f}, {40.000000f, 0.000000f}, + {-0.500000f, -0.866025f}, {-1.000000f, -1.732051f}, {-1.500000f, -2.598076f}, {-2.000000f, -3.464101f}, {-2.500000f, -4.330126f}, {-3.000000f, -5.196152f}, {-3.500000f, -6.062176f}, {-4.000000f, -6.928203f}, + {0.500000f, -0.866025f}, {1.000000f, -1.732051f}, {1.500000f, -2.598076f}, {1.999999f, -3.464102f}, {2.499999f, -4.330127f}, {2.999999f, -5.196152f}, {3.499999f, -6.062178f}, {3.999999f, -6.928203f}, + {1.000000f, -0.000000f}, {2.000000f, -0.000001f}, {3.000000f, -0.000001f}, {4.000000f, -0.000002f}, {5.000000f, -0.000002f}, {6.000000f, -0.000002f}, {7.000000f, -0.000003f}, {8.000000f, -0.000003f}, + {0.500000f, 0.866025f}, {1.000001f, 1.732051f}, {1.500001f, 2.598076f}, {2.000001f, 3.464102f}, {2.500002f, 4.330127f}, {3.000002f, 5.196153f}, {3.500002f, 6.062179f}, {4.000003f, 6.928204f}, + {-0.500000f, 0.866026f}, {-1.000000f, 1.732052f}, {-1.500000f, 2.598077f}, {-2.000000f, 3.464104f}, {-2.500000f, 4.330130f}, {-2.999999f, 5.196155f}, {-3.500000f, 6.062181f}, {-4.000000f, 6.928207f}, + + {10.000000f, 5.000000f}, {20.000000f, 10.000000f}, {30.000000f, 15.000000f}, {40.000000f, 20.000000f}, {50.000000f, 25.000000f}, {60.000000f, 30.000000f}, {70.000000f, 35.000000f}, {80.000000f, 40.000000f}, + {-0.133975f, -2.232050f}, {-0.267949f, -4.464101f}, {-0.401925f, -6.696153f}, {-0.535898f, -8.928202f}, {-0.669872f, -11.160252f}, {-0.803849f, -13.392305f}, {-0.937822f, -15.624352f}, {-1.071796f, -17.856403f}, + {1.866025f, -1.232051f}, {3.732050f, -2.464102f}, {5.598075f, -3.696153f}, {7.464101f, -4.928204f}, {9.330126f, -6.160254f}, {11.196151f, -7.392306f}, {13.062176f, -8.624355f}, {14.928202f, -9.856407f}, + {2.000000f, 0.999999f}, {4.000001f, 1.999998f}, {6.000001f, 2.999998f}, {8.000002f, 3.999997f}, {10.000003f, 4.999996f}, {12.000002f, 5.999995f}, {14.000003f, 6.999995f}, {16.000004f, 7.999993f}, + {0.133975f, 2.232051f}, {0.267951f, 4.464102f}, {0.401926f, 6.696153f}, {0.535901f, 8.928205f}, {0.669876f, 11.160257f}, {0.803851f, 13.392306f}, {0.937826f, 15.624360f}, {1.071802f, 17.856409f}, + {-1.866026f, 1.232052f}, {-3.732052f, 2.464104f}, {-5.598077f, 3.696155f}, {-7.464104f, 4.928207f}, {-9.330130f, 6.160261f}, {-11.196154f, 7.392309f}, {-13.062180f, 8.624363f}, {-14.928207f, 9.856415f}, + }; + DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_0_two_sided_bigger_dft_length, 0, 6, false /*onesided*/); std::vector> expected_axis_0_one_sided = { {5.000f, 0.000f}, {10.000f, 0.000f}, {15.000f, 0.000f}, {20.000f, 0.000f}, {25.000f, 0.000f}, {30.000f, 0.000f}, {35.000f, 0.000f}, {40.000f, 0.000f}, @@ -1110,7 +1143,7 @@ static void ModelBuilding_DiscreteFourierTransform() { {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {-0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {0.000f, 0.000f}, {-0.000f, 0.000f}, {0.000f, 0.000f}, {-0.000f, 0.000f}, {0.000f, 0.000f}, }; - DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_0_one_sided, 0, true /*onesided*/); + DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_0_one_sided, 0, 5, true /*onesided*/); std::vector> expected_axis_1_two_sided = { {36.000f, 0.000f}, {-4.000f, 9.657f}, {-4.000f, 4.000f}, {-4.000f, 1.657f}, {-4.000f, 0.000f}, {-4.000f, -1.657f}, {-4.000f, -4.000f}, {-4.000f, -9.657f}, @@ -1125,7 +1158,7 @@ static void ModelBuilding_DiscreteFourierTransform() { {72.000f, 36.000f}, {-17.657f, 15.314f}, {-12.000f, 4.000f}, {-9.657f, -0.686f}, {-8.000f, -4.000f}, {-6.343f, -7.314f}, {-4.000f, -12.000f}, {1.657f, -23.314f}, {72.000f, 36.000f}, {-17.657f, 15.314f}, {-12.000f, 4.000f}, {-9.657f, -0.686f}, {-8.000f, -4.000f}, {-6.343f, -7.314f}, {-4.000f, -12.000f}, {1.657f, -23.314f}, }; - DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_1_two_sided, 1, false /*onesided*/); + DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_1_two_sided, 1, 8, false /*onesided*/); std::vector> expected_axis_1_one_sided = { {36.000f, 0.000f}, {-4.000f, 9.657f}, {-4.000f, 4.000f}, {-4.000f, 1.657f}, {-4.000f, 0.000f}, @@ -1139,7 +1172,7 @@ static void ModelBuilding_DiscreteFourierTransform() { {72.000f, 36.000f}, {-17.657f, 15.314f}, {-12.000f, 4.000f}, {-9.657f, -0.686f}, {-8.000f, -4.000f}, {72.000f, 36.000f}, {-17.657f, 15.314f}, {-12.000f, 4.000f}, {-9.657f, -0.686f}, {-8.000f, -4.000f}, }; - DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_1_one_sided, 1, true /*onesided*/); + DiscreteFourierTransform(input, {2, 5, 8, 2}, expected_axis_1_one_sided, 1, 8, true /*onesided*/); DiscreteFourierTransform_2D(); @@ -1175,7 +1208,7 @@ static void DiscreteFourierTransformInverse(size_t axis) { 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, - 1, 2, 3, 4, 5, 6, 7, 8, + 1, 2, 3, 4, 5, 6, 7, 8, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16, @@ -1192,7 +1225,7 @@ static void DiscreteFourierTransformInverse(size_t axis) { // Evaluate auto result = session.Evaluate(binding, L""); - + // Check results auto y_tensor = result.Outputs().Lookup(L"Output.Inverse").as(); auto y_ivv = y_tensor.GetAsVectorView(); @@ -1200,7 +1233,7 @@ static void DiscreteFourierTransformInverse(size_t axis) { constexpr float error_threshold = .001f; WINML_EXPECT_TRUE(abs(y_ivv.GetAt(i) - input_vector[i / 2]) < error_threshold); WINML_EXPECT_TRUE(abs(y_ivv.GetAt(i + 1) - 0) < error_threshold); - } + } } #endif @@ -1299,7 +1332,7 @@ static void SetIntraOpThreadSpinning() { auto device = LearningModelDevice(LearningModelDeviceKind::Cpu); auto shape = std::vector{1, 1000}; auto model = ProtobufHelpers::CreateModel(TensorKind::Float, shape, 1000); - + std::vector input(1000); std::iota(std::begin(input), std::end(input), 0.0f); auto tensor_input = TensorFloat::CreateFromArray(shape, input);