From 835b511fa81119ff5cf350ab93e2a1f709ed1d8a Mon Sep 17 00:00:00 2001 From: Hector Li Date: Tue, 15 Jan 2019 14:05:47 -0800 Subject: [PATCH] cuda fix to unblock the tf model tests (#333) * Check the pads attribute on Conv, and auto fallback to CPU if it's not symmetric padding * Insert copy nodes after all graph transformer. It causes some issue if do the cast transformer before memory copy transformer. --- .../providers/cuda/cuda_execution_provider.cc | 26 +++++++++++++++++++ .../providers/cuda/cuda_execution_provider.h | 1 + onnxruntime/core/session/inference_session.cc | 10 +++---- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc index cdf8f06b5b..7c394c262c 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc @@ -830,6 +830,30 @@ bool CUDAExecutionProvider::RNNNeedFallbackToCPU(const onnxruntime::Node& node, return false; } +bool CUDAExecutionProvider::ConvNeedFallbackToCPU(const onnxruntime::Node& node) const { + auto node_attributes = node.GetAttributes(); + // Check attributes + for (auto& attr : node_attributes) { + auto attr_name = attr.first; + auto attr_value = attr.second; + + //cudnn only supports symmetric padding + if ("pads" == attr_name && ::onnx::AttributeProto_AttributeType::AttributeProto_AttributeType_INTS == attr_value.type()) { + auto pads = attr_value.ints(); + int pads_size = pads.size(); + ORT_ENFORCE(pads_size % 2 == 0); + int rank = pads_size / 2; + for (int i = 0; i < rank; i++) { + if(pads.Get(i) != pads.Get(i + rank)) { + return true; + } + } + } + } + + return false; +} + std::vector> CUDAExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph, const std::vector& kernel_registries) const { @@ -847,6 +871,8 @@ CUDAExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph, } else if ("GRU" == node.OpType()) { std::vector activations_supported{"sigmoid", "tanh", "sigmoid", "tanh"}; fallback_to_cpu_provider = RNNNeedFallbackToCPU(node, activations_supported, node.OpType()); + } else if ("Conv" == node.OpType()) { + fallback_to_cpu_provider = ConvNeedFallbackToCPU(node); } if (fallback_to_cpu_provider) { diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.h b/onnxruntime/core/providers/cuda/cuda_execution_provider.h index 9d2af6b416..e948512930 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.h +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.h @@ -174,6 +174,7 @@ class CUDAExecutionProvider : public IExecutionProvider { void ReleasePerThreadStuffs() const; bool RNNNeedFallbackToCPU(const onnxruntime::Node& node, const std::vector activations_supported, const std::string& op_type) const; + bool ConvNeedFallbackToCPU(const onnxruntime::Node& node) const; }; } // namespace onnxruntime diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index bdea40748c..3894954217 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -278,7 +278,11 @@ class InferenceSession::Impl { GraphPartitioner partitioner(kernel_registry_manager, providers); ORT_RETURN_IF_ERROR(partitioner.Partition(graph, session_state.ExportDll(), const_cast(session_state.GetFuncMgr()))); - // Insert copy nodes. + // Insert cast node/s. + bool modified = false; + ORT_RETURN_IF_ERROR(insert_cast_transformer.Apply(graph, modified)); + + // Insert copy nodes after all graph transformer. for (auto& provider : providers) { if (provider->Type() != onnxruntime::kCpuExecutionProvider && provider->Type() != onnxruntime::kMklDnnExecutionProvider && @@ -288,10 +292,6 @@ class InferenceSession::Impl { } } - // Insert cast node/s. - bool modified = false; - ORT_RETURN_IF_ERROR(insert_cast_transformer.Apply(graph, modified)); - return common::Status::OK(); }