mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-17 21:10:43 +00:00
On Windows, clang-format has a bug when AlignTrailingComments.Kind is set to `Leave` (https://clang.llvm.org/docs/ClangFormatStyleOptions.html#aligntrailingcomments), where it will keep adding indentation to comments after each formatting runs. This PR changes to always align comments so we do not hit the bug. As a consequence of the options change we need to reformat some of the files. Note that this option is aligned with the rest of the repository.
135 lines
5.6 KiB
C++
135 lines
5.6 KiB
C++
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
|
|
#pragma once
|
|
|
|
#include "core/providers/dml/DmlExecutionProvider/inc/MLOperatorAuthor.h"
|
|
#include "core/common//common.h"
|
|
|
|
struct ReluShapeInferrer : winrt::implements<ReluShapeInferrer, IMLOperatorShapeInferrer> {
|
|
STDMETHOD(InferOutputShapes)(IMLOperatorShapeInferenceContext* context) noexcept {
|
|
uint32_t inputDimsSize;
|
|
context->GetInputTensorDimensionCount(0, &inputDimsSize);
|
|
|
|
auto inputDims = new uint32_t[inputDimsSize];
|
|
context->GetInputTensorShape(0, inputDimsSize, inputDims);
|
|
|
|
context->SetOutputTensorShape(0, inputDimsSize, inputDims);
|
|
return S_OK;
|
|
}
|
|
};
|
|
|
|
struct ReluOperator : winrt::implements<ReluOperator, IMLOperatorKernel> {
|
|
ReluOperator() {}
|
|
|
|
// Computes the outputs of the kernel. In this case, the output will represent
|
|
// the Rectified Linear Unit (Relu) output.
|
|
//
|
|
// Based on the operators location in the model graph this operator may be called multiple times
|
|
// or simultaneously within the same instance of the class during evaluation. Implementations
|
|
// of this method must be thread-safe.
|
|
STDMETHOD(Compute)(IMLOperatorKernelContext* context) {
|
|
// Get the input tensor
|
|
winrt::com_ptr<IMLOperatorTensor> inputTensor;
|
|
context->GetInputTensor(0, inputTensor.put());
|
|
|
|
// Get the output tensor
|
|
winrt::com_ptr<IMLOperatorTensor> outputTensor;
|
|
context->GetOutputTensor(0, outputTensor.put());
|
|
|
|
// Get the input and output shape sizes
|
|
uint32_t inputDimsSize = inputTensor->GetDimensionCount();
|
|
uint32_t outputDimsSize = outputTensor->GetDimensionCount();
|
|
if (inputDimsSize != outputDimsSize) {
|
|
return E_UNEXPECTED;
|
|
}
|
|
|
|
// Get the input shape
|
|
std::vector<uint32_t> inputDims(inputDimsSize);
|
|
outputTensor->GetShape(inputDimsSize, inputDims.data());
|
|
|
|
// Get the output shape
|
|
std::vector<uint32_t> outputDims(outputDimsSize);
|
|
outputTensor->GetShape(outputDimsSize, outputDims.data());
|
|
|
|
// For the number of total elements in the input and output shapes
|
|
auto outputDataSize = std::accumulate(outputDims.begin(), outputDims.end(), 1, std::multiplies<uint32_t>());
|
|
auto inputDataSize = std::accumulate(inputDims.begin(), inputDims.end(), 1, std::multiplies<uint32_t>());
|
|
if (outputDataSize != inputDataSize) {
|
|
return E_UNEXPECTED;
|
|
}
|
|
|
|
// If the tensor types are both float type
|
|
if (outputTensor->GetTensorDataType() == MLOperatorTensorDataType::Float && inputTensor->GetTensorDataType() == MLOperatorTensorDataType::Float) {
|
|
// For cpu data
|
|
if (outputTensor->IsCpuData() && inputTensor->IsCpuData()) {
|
|
ComputeInternal<float>(inputTensor.get(), outputTensor.get(), inputDataSize);
|
|
}
|
|
} else if (outputTensor->GetTensorDataType() == MLOperatorTensorDataType::Double && inputTensor->GetTensorDataType() == MLOperatorTensorDataType::Double) {
|
|
// For cpu data
|
|
if (outputTensor->IsCpuData() && inputTensor->IsCpuData()) {
|
|
ComputeInternal<double>(inputTensor.get(), outputTensor.get(), inputDataSize);
|
|
}
|
|
}
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
template <typename T, typename U = T>
|
|
void ComputeInternal(IMLOperatorTensor* pInputTensor, IMLOperatorTensor* pOutputTensor, uint32_t size) {
|
|
auto inputData = static_cast<T*>(pInputTensor->GetData());
|
|
auto outputData = static_cast<U*>(pOutputTensor->GetData());
|
|
|
|
for (uint32_t i = 0; i < size; i++) {
|
|
outputData[i] = static_cast<U>(std::max<T>(0, inputData[i]));
|
|
}
|
|
}
|
|
};
|
|
|
|
struct ReluOperatorFactory : winrt::implements<ReluOperatorFactory, IMLOperatorKernelFactory> {
|
|
STDMETHOD(CreateKernel)(IMLOperatorKernelCreationContext* context, IMLOperatorKernel** kernel) {
|
|
ORT_UNUSED_PARAMETER(context);
|
|
auto reluOperator = winrt::make<ReluOperator>();
|
|
reluOperator.copy_to(kernel);
|
|
return S_OK;
|
|
}
|
|
|
|
static MLOperatorEdgeDescription CreateEdgeDescriptor(MLOperatorEdgeType type, MLOperatorTensorDataType dataType) {
|
|
ORT_UNUSED_PARAMETER(type);
|
|
MLOperatorEdgeDescription desc;
|
|
desc.edgeType = MLOperatorEdgeType::Tensor;
|
|
desc.tensorDataType = dataType;
|
|
return desc;
|
|
}
|
|
|
|
static void RegisterReluKernel(winrt::com_ptr<IMLOperatorRegistry> registry) {
|
|
MLOperatorKernelDescription kernelDescription;
|
|
kernelDescription.domain = "";
|
|
kernelDescription.name = "Relu";
|
|
kernelDescription.minimumOperatorSetVersion = 1;
|
|
kernelDescription.executionType = MLOperatorExecutionType::Cpu;
|
|
|
|
MLOperatorEdgeTypeConstrant typeConstraint;
|
|
typeConstraint.typeLabel = "T";
|
|
std::vector<MLOperatorEdgeDescription> allowedEdges{
|
|
CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Double),
|
|
CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float),
|
|
CreateEdgeDescriptor(MLOperatorEdgeType::Tensor, MLOperatorTensorDataType::Float16)};
|
|
typeConstraint.allowedTypes = allowedEdges.data();
|
|
typeConstraint.allowedTypeCount = static_cast<uint32_t>(allowedEdges.size());
|
|
|
|
std::vector<MLOperatorEdgeTypeConstrant> typeConstraints{typeConstraint};
|
|
kernelDescription.typeConstraints = typeConstraints.data();
|
|
kernelDescription.typeConstraintCount = static_cast<uint32_t>(typeConstraints.size());
|
|
|
|
kernelDescription.defaultAttributes = nullptr;
|
|
kernelDescription.defaultAttributeCount = 0;
|
|
kernelDescription.options = MLOperatorKernelOptions::None;
|
|
kernelDescription.executionOptions = 0;
|
|
|
|
auto factory = winrt::make<ReluOperatorFactory>();
|
|
auto shareInferrer = winrt::make<ReluShapeInferrer>();
|
|
|
|
registry->RegisterOperatorKernel(&kernelDescription, factory.get(), shareInferrer.get());
|
|
}
|
|
};
|