From 7bb3f243ff7de673e042ad3f4860539731a3ecb8 Mon Sep 17 00:00:00 2001 From: Hariharan Seshadri Date: Thu, 13 May 2021 14:11:17 -0700 Subject: [PATCH] Revert (#7663) --- .../core/framework/fallback_cpu_capability.cc | 141 ++------------- .../test/framework/session_state_test.cc | 94 ---------- .../test/testdata/cpu_fallback_pattern_0.onnx | Bin 450 -> 0 bytes .../test/testdata/cpu_fallback_pattern_1.onnx | Bin 187 -> 0 bytes .../test/testdata/cpu_fallback_pattern_2.onnx | Bin 183 -> 0 bytes .../test/testdata/cpu_fallback_pattern_3.onnx | Bin 388 -> 0 bytes .../test/testdata/cpu_fallback_pattern_4.onnx | Bin 300 -> 0 bytes .../test/testdata/cpu_fallback_pattern_5.onnx | Bin 324 -> 0 bytes .../test/testdata/cpu_fallback_test_gen.py | 170 ------------------ 9 files changed, 12 insertions(+), 393 deletions(-) delete mode 100644 onnxruntime/test/testdata/cpu_fallback_pattern_0.onnx delete mode 100644 onnxruntime/test/testdata/cpu_fallback_pattern_1.onnx delete mode 100644 onnxruntime/test/testdata/cpu_fallback_pattern_2.onnx delete mode 100644 onnxruntime/test/testdata/cpu_fallback_pattern_3.onnx delete mode 100644 onnxruntime/test/testdata/cpu_fallback_pattern_4.onnx delete mode 100644 onnxruntime/test/testdata/cpu_fallback_pattern_5.onnx delete mode 100644 onnxruntime/test/testdata/cpu_fallback_test_gen.py diff --git a/onnxruntime/core/framework/fallback_cpu_capability.cc b/onnxruntime/core/framework/fallback_cpu_capability.cc index c23ba1bcb3..011eaf9edb 100644 --- a/onnxruntime/core/framework/fallback_cpu_capability.cc +++ b/onnxruntime/core/framework/fallback_cpu_capability.cc @@ -7,10 +7,7 @@ #include "onnx/defs/data_type_utils.h" -#include "core/framework/execution_providers.h" -#include "core/framework/kernel_registry_manager.h" #include "core/framework/op_kernel.h" -#include "core/providers/cpu/cpu_execution_provider.h" using namespace ONNX_NAMESPACE::Utils; @@ -48,43 +45,18 @@ std::unordered_set GetCpuPreferredNodes(const onnxruntime::GraphViewe return node_id_to_order_map[n1] > node_id_to_order_map[n2]; }; - // If return false, n2 will be output first; If return true, n1 will be output first - auto lesser_order_comp = [&](const NodeIndex n1, const NodeIndex n2) { - return node_id_to_order_map[n1] < node_id_to_order_map[n2]; - }; - - std::priority_queue, decltype(greater_order_comp)> candidates_fw(greater_order_comp); - std::priority_queue, decltype(lesser_order_comp)> candidates_bw(lesser_order_comp); + std::priority_queue, decltype(greater_order_comp)> candidates(greater_order_comp); std::unordered_set visited; - std::unordered_set cpu_args; + std::unordered_set cpu_output_args; std::unordered_set provider_nodes; std::unordered_map node_to_kernel; - std::unordered_set cpu_kernel_available; - - // create a temp CPU kernel registry - KernelRegistryManager mgr; - ExecutionProviders cpu_ep; - CPUExecutionProviderInfo epi{false}; - ORT_ENFORCE(cpu_ep.Add(kCpuExecutionProvider, std::make_unique(epi)).IsOK()); - ORT_ENFORCE(mgr.RegisterKernels(cpu_ep).IsOK()); - std::vector cpu_kernel_registries = mgr.GetKernelRegistriesByProviderType(kCpuExecutionProvider); for (auto& node_id : tentative_nodes) { provider_nodes.insert(node_id); const Node* node = graph.GetNode(node_id); const KernelCreateInfo* kernel_info = nullptr; - - // Get the CPU kernel availability for this node - for (auto registry : cpu_kernel_registries) { - auto st = registry->TryFindKernel(*node, kCpuExecutionProvider, &kernel_info); - if (st.IsOK()) { - cpu_kernel_available.insert(node_id); - break; - } - } - for (auto registry : kernel_registries) { auto st = registry->TryFindKernel(*node, provider_type, &kernel_info); if (st.IsOK()) @@ -99,26 +71,11 @@ std::unordered_set GetCpuPreferredNodes(const onnxruntime::GraphViewe node->OutputDefs(), [&](const NodeArg& node_arg, size_t out_index) { if (kernel_info->kernel_def->IsOutputOnCpu(out_index)) { - cpu_args.insert(&node_arg); + cpu_output_args.insert(&node_arg); auto consumer_nodes = graph.GetConsumerNodes(node_arg.Name()); for (auto& consumer_node : consumer_nodes) { - candidates_fw.push(consumer_node->Index()); - LOGS_DEFAULT(INFO) << "Candidate for fallback CPU execution in forward trace: " << consumer_node->Name(); - } - } - return Status::OK(); - })); - - // then, find all the direct producers of cpu tensors. - ORT_THROW_IF_ERROR(node->ForEachWithIndex( - node->InputDefs(), - [&](const NodeArg& node_arg, size_t in_index) { - if (kernel_info->kernel_def->IsInputOnCpu(in_index)) { - cpu_args.insert(&node_arg); - auto producer_node = graph.GetProducerNode(node_arg.Name()); - if (producer_node != nullptr) { - candidates_bw.push(producer_node->Index()); - LOGS_DEFAULT(INFO) << "Candidate for fallback CPU execution in backward trace: " << producer_node->Name(); + candidates.push(consumer_node->Index()); + LOGS_DEFAULT(INFO) << "Candidate for fallback CPU execution: " << consumer_node->Name(); } } return Status::OK(); @@ -132,9 +89,9 @@ std::unordered_set GetCpuPreferredNodes(const onnxruntime::GraphViewe // The detail: // for each candidate, if one of its input is a cpu tensor and the Non-CPU kernel doesn't mark it as cpu input, // force the node to CPU to avoid memory cpu and add its output to the small cpu tensors. - while (!candidates_fw.empty()) { - NodeIndex cur = candidates_fw.top(); - candidates_fw.pop(); + while (!candidates.empty()) { + NodeIndex cur = candidates.top(); + candidates.pop(); if (visited.count(cur) != 0) continue; visited.insert(cur); @@ -161,7 +118,7 @@ std::unordered_set GetCpuPreferredNodes(const onnxruntime::GraphViewe } // the input is not a CPU tensor - if (cpu_args.find(input) == cpu_args.end()) { + if (cpu_output_args.find(input) == cpu_output_args.end()) { place_in_cpu = false; break; } @@ -173,90 +130,16 @@ std::unordered_set GetCpuPreferredNodes(const onnxruntime::GraphViewe } } - if (place_in_cpu && cpu_kernel_available.count(cur) != 0) { + if (place_in_cpu) { cpu_nodes.insert(cur); LOGS_DEFAULT(INFO) << "ORT optimization- Force fallback to CPU execution for node: " << node->Name() << " because the CPU execution path is deemed faster than overhead involved with execution on other EPs " << " capable of executing this node"; for (auto* output : node->OutputDefs()) { - cpu_args.insert(output); + cpu_output_args.insert(output); } for (auto it = node->OutputNodesBegin(); it != node->OutputNodesEnd(); ++it) { - candidates_fw.push((*it).Index()); - } - } - } - // clear the visited to prepare for backward trace - visited.clear(); - // Trace the graph backwards to find additional CPU nodes - // Starting from nodes that must produce an output on CPU, trace the producer nodes - // The trace stops when we find that - // 1) The node is already picked for CPU - // 2) Input/Output type is unsupported on CPU(float16/bfloat16) - // 3) The output is not a CPU tensor - // 4) The search hits a node that produces a CPU output - while (!candidates_bw.empty()) { - NodeIndex cur = candidates_bw.top(); - candidates_bw.pop(); - if (visited.count(cur) != 0) - continue; - visited.insert(cur); - - // node is already picked for CPU - if (cpu_nodes.count(cur) != 0) - continue; - - if (provider_nodes.find(cur) == provider_nodes.end()) - continue; - - auto* node = graph.GetNode(cur); - bool place_in_cpu = true; - for (size_t i = 0; i < node->OutputDefs().size(); ++i) { - auto* output = node->OutputDefs()[i]; - - // skip placing on CPU if the data typs is float16 or bfloat16 - if (output->Type() == DataTypeUtils::ToType("float16") || - output->Type() == DataTypeUtils::ToType("bfloat16")) { - place_in_cpu = false; - break; - } - - // the output is not a CPU tensor - if (cpu_args.find(output) == cpu_args.end()) { - place_in_cpu = false; - break; - } - - // output is a CPU tensor, but it's intended to be consumed as CPU output by the target EP - if (node_to_kernel[cur]->kernel_def->IsOutputOnCpu(i)) { - place_in_cpu = false; - break; - } - } - // Next, check if the node inputs are of supported type - if (place_in_cpu) { - for (size_t i = 0; i < node->InputDefs().size(); ++i) { - auto* input = node->InputDefs()[i]; - - // skip placing on CPU if the data typs is float16 or bfloat16 - if (input->Type() == DataTypeUtils::ToType("float16") || - input->Type() == DataTypeUtils::ToType("bfloat16")) { - place_in_cpu = false; - break; - } - } - } - - if (place_in_cpu && cpu_kernel_available.count(cur) != 0) { - cpu_nodes.insert(cur); - LOGS_DEFAULT(INFO) << "ORT optimization- Force fallback to CPU execution for node: " << node->Name() - << " because the CPU execution path is deemed faster than overhead involved with execution on other EPs " - << " capable of executing this node"; - for (auto* input : node->InputDefs()) { - cpu_args.insert(input); - } - for (auto it = node->InputNodesBegin(); it != node->InputNodesEnd(); ++it) { - candidates_bw.push((*it).Index()); + candidates.push((*it).Index()); } } } diff --git a/onnxruntime/test/framework/session_state_test.cc b/onnxruntime/test/framework/session_state_test.cc index 4109bb8e5b..cfe37b2af4 100644 --- a/onnxruntime/test/framework/session_state_test.cc +++ b/onnxruntime/test/framework/session_state_test.cc @@ -20,12 +20,6 @@ #include "gtest/gtest.h" #include "test/test_environment.h" -#ifdef USE_CUDA -#include "core/providers/cuda/cuda_execution_provider.h" -#elif USE_ROCM -#include "core/providers/rocm/rocm_execution_provider.h" -#endif - using namespace ONNX_NAMESPACE; using namespace std; namespace onnxruntime { @@ -179,94 +173,6 @@ TEST_P(SessionStateTestP, TestInitializerProcessing) { } } -#if defined(USE_CUDA) || defined(USE_ROCM) -static void TestCPUNodePlacement(const std::basic_string& model_uri, - const std::unordered_set& expected_cpu_nodes, - const std::unordered_set& expected_gpu_nodes) { - std::shared_ptr model; - ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, DefaultLoggingManager().DefaultLogger())); - Graph& graph = model->MainGraph(); - - ExecutionProviders execution_providers; -#if defined(USE_CUDA) - CUDAExecutionProviderInfo cuda_epi; - ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCudaExecutionProvider, std::make_unique(cuda_epi))); -#elif defined(USE_ROCM) - ROCMExecutionProviderInfo rocm_epi; - ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kRocmExecutionProvider, std::make_unique(rocm_epi))); -#endif - // add CPU EP - CPUExecutionProviderInfo epi; - ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCpuExecutionProvider, std::make_unique(epi))); - - KernelRegistryManager krm; - ASSERT_STATUS_OK(krm.RegisterKernels(execution_providers)); - - DataTransferManager dtm; - profiling::Profiler profiler; - - SessionState session_state(graph, execution_providers, false, nullptr, nullptr, dtm, - DefaultLoggingManager().DefaultLogger(), profiler); - - // Partition the graph. Here, the graph partitioner assigns EPs to the nodes - GraphPartitioner partitioner(krm, execution_providers); - ASSERT_STATUS_OK(partitioner.Partition(graph, session_state.ExportDll(), session_state.GetMutableFuncMgr())); - - // check which nodes are assigned to CPU and GPU - for (auto& node : graph.Nodes()) { - // assert that EP is assigned - ASSERT_TRUE(!node.GetExecutionProviderType().empty()); - auto& ep = node.GetExecutionProviderType(); - if (ep == onnxruntime::kCudaExecutionProvider || ep == onnxruntime::kRocmExecutionProvider) { - ASSERT_TRUE(expected_gpu_nodes.count(node.Name())) << "Node not found in expected gpu nodes: " << node.Name(); - } else if (ep == onnxruntime::kCpuExecutionProvider) { - ASSERT_TRUE(expected_cpu_nodes.count(node.Name())) << "Node not found in expected cpu nodes: " << node.Name(); - } else { - ASSERT_TRUE(false) << "Invalid execution provider assigned to node: " << node.Name() << " , value: " << ep; - } - } -} - -TEST(SessionStateTest, CPUPlacementTest0) { - std::unordered_set expected_cpu_nodes = {"reshape", "shape1", "const1", "mul", "equal", "where"}; - std::unordered_set expected_gpu_nodes = {"shape0", "expand"}; - TestCPUNodePlacement(ORT_TSTR("testdata/cpu_fallback_pattern_0.onnx"), expected_cpu_nodes, expected_gpu_nodes); -} -TEST(SessionStateTest, CPUPlacementTest1) { - std::unordered_set expected_cpu_nodes = {"const1"}; - std::unordered_set expected_gpu_nodes = {"shape0", "expand"}; - TestCPUNodePlacement(ORT_TSTR("testdata/cpu_fallback_pattern_1.onnx"), expected_cpu_nodes, expected_gpu_nodes); -} -TEST(SessionStateTest, CPUPlacementTest2) { - std::unordered_set expected_cpu_nodes = {"range"}; - std::unordered_set expected_gpu_nodes = {"size0", "reduce"}; - TestCPUNodePlacement(ORT_TSTR("testdata/cpu_fallback_pattern_2.onnx"), expected_cpu_nodes, expected_gpu_nodes); -} -TEST(SessionStateTest, CPUPlacementTest3) { - std::unordered_set expected_cpu_nodes = {"range0", "range1"}; - std::unordered_set expected_gpu_nodes = {"size0", "reduce0", "identity", "size1", "reduce1", "sum"}; - TestCPUNodePlacement(ORT_TSTR("testdata/cpu_fallback_pattern_3.onnx"), expected_cpu_nodes, expected_gpu_nodes); -} -TEST(SessionStateTest, CPUPlacementTest4) { - // Currently, the behaviour is different for RocM and CUDA EP as Rocm EP is missing a valid kernel - // for ReduceSum for int64 type. This causes the backward trace in GetCpuPreferredNodes to stop - // earlier. The expected values can be modified to match CUDA once the RocM EP kernel is updated -#if defined(USE_CUDA) - std::unordered_set expected_cpu_nodes = {"range", "reduce", "const1"}; - std::unordered_set expected_gpu_nodes = {"size0", "expand"}; -#elif defined(USE_ROCM) - std::unordered_set expected_cpu_nodes = {"const1", "reduce"}; - std::unordered_set expected_gpu_nodes = {"size0", "expand", "range"}; -#endif - TestCPUNodePlacement(ORT_TSTR("testdata/cpu_fallback_pattern_4.onnx"), expected_cpu_nodes, expected_gpu_nodes); -} -TEST(SessionStateTest, CPUPlacementTest5) { - std::unordered_set expected_cpu_nodes = {"gather0", "gather1", "concat"}; - std::unordered_set expected_gpu_nodes = {"shape0", "shape1", "reshape"}; - TestCPUNodePlacement(ORT_TSTR("testdata/cpu_fallback_pattern_5.onnx"), expected_cpu_nodes, expected_gpu_nodes); -} -#endif - // Test that we allocate memory for an initializer from non-arena memory even if we provide an arena-based allocator // if the relevant session option config flag is set // For this test we need to enable the arena-based allocator which is not supported on x86 builds, so diff --git a/onnxruntime/test/testdata/cpu_fallback_pattern_0.onnx b/onnxruntime/test/testdata/cpu_fallback_pattern_0.onnx deleted file mode 100644 index 4186edb736c7284c60d9ab8fa6fe066cf6b8c0f9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 450 zcmZWmK}*9h7&W$O>SN%@MHdkypofC8U3Y6sk227M;H6X>-Jxse+NwXtf3csZP125n zIpn?fz2xzIiO-gM2NxhAXJJ~ev#k=Ifw`oQTH+3a18N+OFx@FbbHREh*@G?N8Y7?k zed?l*T@0}a4i@)btFmsoBNpXptaVm4t2fg_a1Q#J6|D*;KAEy5A(7MRAL8N){Uxwc zy{A>FxU*>sp*QK=w*_v-{Xo|_RUd6u2n&)XT44^W4}s$`=+|29l&~P_lUBF{iC{~d z#KKeG+pNr`ml)(|RNdTes$3Ppnduw9!y=J=%V74;80L6B(ZAy2*e9AsmcDc}wls2a HG@bkb+_Qu0 diff --git a/onnxruntime/test/testdata/cpu_fallback_pattern_1.onnx b/onnxruntime/test/testdata/cpu_fallback_pattern_1.onnx deleted file mode 100644 index 81e7abb77c0c7cd004bfca1393bd86e3a027c6a3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 187 zcmd;Jx7xtSCC$a?D8%jdJYR^lC^0V` zs1Qsku?B&7Tnb!_PFxTnAx38@wxZOO(&SVn&LA)yT$(GyRgzjNY q$lwGvT#F0DVR8bQ9mNT93JVt#2P25-1Ysrtna&WV6AKrE051TqJt*b? diff --git a/onnxruntime/test/testdata/cpu_fallback_pattern_3.onnx b/onnxruntime/test/testdata/cpu_fallback_pattern_3.onnx deleted file mode 100644 index 5285a240d2711a53a28f7c6c38d631dde7692a4d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 388 zcmZvYF>k^!5QT9PNO%Ydc|poj)l(a(Qg3C$Osuf6R4uWpNDZkHM-}|@ZrIpH8JO<7 zr#F0eS-SdFyYSINRiL*ViI8M{sD>>9@Km)lV^7q$@U(w^tRgOC;^94paNr1X0E;N{3$t LHx6Zj(PDlDzlCK& diff --git a/onnxruntime/test/testdata/cpu_fallback_pattern_4.onnx b/onnxruntime/test/testdata/cpu_fallback_pattern_4.onnx deleted file mode 100644 index be93737c4f9e22c46fcd7ca0689243bbcfbeb5c0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 300 zcmXYsKWoD<5XEzjEIZeh22H_32SK423hkOLaxxdmNw$V9RaEr)j@wb+Q+| z_j{*1cxkfyi*SLkQgXHHJ};ET{61+>dcg-27it?Ehd6cLjfz`azx$6tbi8H4djx|+ zq-0A~f1qqsJ=+E@`x6#8hDsiqx^vIjsQR{GwzYeCD?jX+@&(3U_SjQ?ox(g>0+^}i zUl4eQ;O`9k3f7sVpR=tiEjK|GI_lixsj29gU-9b1gc>Jrwv&Tq*8)dHt(qL_f7+-0hz(%DJYfz>d$wglJN`cPGkoksOrtOe8Z$fHJTSK? IeDntWJ8rm60{{R3 diff --git a/onnxruntime/test/testdata/cpu_fallback_test_gen.py b/onnxruntime/test/testdata/cpu_fallback_test_gen.py deleted file mode 100644 index 8d8ec94639..0000000000 --- a/onnxruntime/test/testdata/cpu_fallback_test_gen.py +++ /dev/null @@ -1,170 +0,0 @@ -import onnx -from onnx import helper -from onnx import TensorProto -from onnx import shape_inference -import numpy as np - -graph_def_0 = helper.make_graph( - nodes=[ - helper.make_node(op_type="Shape", inputs=['A'], outputs=['A_shape'], name='shape0'), - helper.make_node(op_type="Reshape", inputs=['A_shape', 'shape'], outputs=['A_reshaped'], name='reshape'), - helper.make_node(op_type="Shape", inputs=['A_reshaped'], outputs=['A_shape1'], name='shape1'), - helper.make_node(op_type="ConstantOfShape", inputs=['A_shape1'], outputs=['const1'], name='const1', value=helper.make_tensor('val', TensorProto.INT64, - [1], [1])), - helper.make_node(op_type="Mul", inputs=['const1', 'neg_one'], outputs=['mul'], name='mul'), - helper.make_node(op_type="Equal", inputs=['A_reshaped', 'mul'], outputs=['equal'], name='equal'), - helper.make_node(op_type="Where", inputs=['equal', 'const1', 'A_reshaped'], outputs=['where'], name='where'), - helper.make_node(op_type="Expand", inputs=['B','where'], outputs=['C'], name='expand'), - - ], - name='test-model', - inputs=[ - # create inputs with symbolic dims - helper.make_tensor_value_info("A", TensorProto.FLOAT, None), - helper.make_tensor_value_info("B", TensorProto.FLOAT, None), - ], - outputs=[ - helper.make_tensor_value_info('C', TensorProto.FLOAT, None) - ], - initializer=[ - helper.make_tensor('shape', TensorProto.INT64, [1], [-1]), - helper.make_tensor('neg_one', TensorProto.INT64, [1], [-1]), - ]) - -model = helper.make_model(graph_def_0, opset_imports=[helper.make_operatorsetid("", 12)]) -onnx.save_model(model, "cpu_fallback_pattern_0.onnx") - -graph_def_1 = helper.make_graph( - nodes=[ - helper.make_node(op_type="Shape", inputs=['A'], outputs=['A_shape'], name='shape0'), - helper.make_node(op_type="ConstantOfShape", inputs=['A_shape'], outputs=['const1'], name='const1', value=helper.make_tensor('val', TensorProto.INT64, - [1], [1])), - helper.make_node(op_type="Expand", inputs=['B','const1'], outputs=['C'], name='expand'), - - ], - name='test-model', - inputs=[ - # create inputs with symbolic dims - helper.make_tensor_value_info("A", TensorProto.FLOAT, None), - helper.make_tensor_value_info("B", TensorProto.FLOAT, None), - ], - outputs=[ - helper.make_tensor_value_info('C', TensorProto.FLOAT, None) - ], - initializer=[]) - -model = helper.make_model(graph_def_1, opset_imports=[helper.make_operatorsetid("", 12)]) -onnx.save_model(model, "cpu_fallback_pattern_1.onnx") - - -graph_def_2 = helper.make_graph( - nodes=[ - helper.make_node(op_type="Size", inputs=['A'], outputs=['A_size'], name='size0'), - helper.make_node(op_type="Range", inputs=['zero', 'A_size', 'two'], outputs=['range'], name='range'), - helper.make_node(op_type="ReduceSum", inputs=['B', 'range'], outputs=['C'], name='reduce'), - ], - name='test-model', - inputs=[ - # create inputs with symbolic dims - helper.make_tensor_value_info("A", TensorProto.FLOAT, None), - helper.make_tensor_value_info("B", TensorProto.FLOAT, None), - ], - outputs=[ - helper.make_tensor_value_info('C', TensorProto.FLOAT, None) - ], - initializer=[ - helper.make_tensor('zero', TensorProto.INT64, [], [0]), - helper.make_tensor('two', TensorProto.INT64, [], [2]), - ]) - -model = helper.make_model(graph_def_2, opset_imports=[helper.make_operatorsetid("", 13)]) -onnx.save_model(model, "cpu_fallback_pattern_2.onnx") - - -graph_def_3 = helper.make_graph( - nodes=[ - helper.make_node(op_type="Size", inputs=['A'], outputs=['size0'], name='size0'), - helper.make_node(op_type="Range", inputs=['zero', 'size0', 'two'], outputs=['range0'], name='range0'), - helper.make_node(op_type="ReduceSum", inputs=['B', 'range0'], outputs=['reduce0'], name='reduce0'), - - helper.make_node(op_type="Identity", inputs=['reduce0'], outputs=['reduce0_cpy'], name='identity'), - - helper.make_node(op_type="Size", inputs=['reduce0_cpy'], outputs=['size1'], name='size1'), - helper.make_node(op_type="Range", inputs=['zero', 'size1', 'two'], outputs=['range1'], name='range1'), - helper.make_node(op_type="ReduceSum", inputs=['B', 'range1'], outputs=['reduce1'], name='reduce1'), - - helper.make_node(op_type="Sum", inputs=['reduce0', 'reduce1'], outputs=['C'], name='sum'), - - ], - name='test-model', - inputs=[ - # create inputs with symbolic dims - helper.make_tensor_value_info("A", TensorProto.FLOAT, None), - helper.make_tensor_value_info("B", TensorProto.FLOAT, None), - ], - outputs=[ - helper.make_tensor_value_info('C', TensorProto.FLOAT, None) - ], - initializer=[ - helper.make_tensor('zero', TensorProto.INT64, [], [0]), - helper.make_tensor('two', TensorProto.INT64, [], [2]), - ]) - -model = helper.make_model(graph_def_3, opset_imports=[helper.make_operatorsetid("", 13)]) -onnx.save_model(model, "cpu_fallback_pattern_3.onnx") - -graph_def_4 = helper.make_graph( - nodes=[ - helper.make_node(op_type="Size", inputs=['A'], outputs=['A_size'], name='size0'), - helper.make_node(op_type="Range", inputs=['zero', 'A_size', 'two'], outputs=['range'], name='range'), - helper.make_node(op_type="ReduceSum", inputs=['B', 'range'], outputs=['reduce'], name='reduce'), - helper.make_node(op_type="ConstantOfShape", inputs=['reduce'], outputs=['const1'], name='const1', value=helper.make_tensor('val', TensorProto.INT64, - [1], [1])), - helper.make_node(op_type="Expand", inputs=['C','const1'], outputs=['D'], name='expand'), - - ], - name='test-model', - inputs=[ - # create inputs with symbolic dims - helper.make_tensor_value_info("A", TensorProto.FLOAT, None), - helper.make_tensor_value_info("B", TensorProto.INT64, None), - helper.make_tensor_value_info("C", TensorProto.FLOAT, None), - ], - outputs=[ - helper.make_tensor_value_info('D', TensorProto.FLOAT, None) - ], - initializer=[ - helper.make_tensor('zero', TensorProto.INT64, [], [0]), - helper.make_tensor('two', TensorProto.INT64, [], [2]), - ]) - -model = helper.make_model(graph_def_4, opset_imports=[helper.make_operatorsetid("", 13)]) -onnx.save_model(model, "cpu_fallback_pattern_4.onnx") - -graph_def_5 = helper.make_graph( - nodes=[ - helper.make_node(op_type="Shape", inputs=['A'], outputs=['A_shape'], name='shape0'), - helper.make_node(op_type="Gather", inputs=['A_shape', 'zero'], outputs=['batch'], name='gather0'), - helper.make_node(op_type="Concat", inputs=['batch', 'seq_len'], outputs=['shape'], name='concat', axis=0), - helper.make_node(op_type="Shape", inputs=['B'], outputs=['B_shape'], name='shape1'), - helper.make_node(op_type="Gather", inputs=['B_shape', 'one'], outputs=['seq_len'], name='gather1'), - helper.make_node(op_type="Reshape", inputs=['C','shape'], outputs=['D'], name='reshape'), - - ], - name='test-model', - inputs=[ - # create inputs with symbolic dims - helper.make_tensor_value_info("A", TensorProto.FLOAT, None), - helper.make_tensor_value_info("B", TensorProto.INT64, None), - helper.make_tensor_value_info("C", TensorProto.FLOAT, None), - ], - outputs=[ - helper.make_tensor_value_info('D', TensorProto.FLOAT, None) - ], - initializer=[ - helper.make_tensor('zero', TensorProto.INT64, [1], [0]), - helper.make_tensor('one', TensorProto.INT64, [1], [1]), - ]) - -model = helper.make_model(graph_def_5, opset_imports=[helper.make_operatorsetid("", 13)]) -onnx.save_model(model, "cpu_fallback_pattern_5.onnx")