[Nuphar EP] fixes for some object detection models (#2581)

Update notebook tutorial with multi-threaded int8 GEMM from #2517
2026-07-13 18:08:13 +00:00 · 2019-12-07 13:37:00 -08:00 · 2019-12-07 13:37:00 -08:00 · 0f12346d76
commit 0f12346d76
parent cbc398bb75
3 changed files with 64 additions and 37 deletions
--- a/docs/python/notebooks/onnxruntime-nuphar-tutorial.ipynb
+++ b/docs/python/notebooks/onnxruntime-nuphar-tutorial.ipynb
@ -216,8 +216,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Fusion speed-up 437.43%\n",
-      "    Baseline: 0.733 s, Current: 0.136 s\n"
+      "Fusion speed-up 434.50%\n",
+      "    Baseline: 0.716 s, Current: 0.134 s\n"
     ]
    }
   ],
@ -339,8 +339,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Nuphar Scan speed-up 1.97%\n",
-      "    Baseline: 3.062 s, Current: 3.003 s\n"
+      "Nuphar Scan speed-up 7.68%\n",
+      "    Baseline: 3.037 s, Current: 2.821 s\n"
     ]
    }
   ],
@ -444,8 +444,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Quantization speed-up 196.18%\n",
-      "    Baseline: 3.003 s, Current: 1.014 s\n"
+      "Quantization speed-up 278.52%\n",
+      "    Baseline: 2.821 s, Current: 0.745 s\n"
     ]
    }
   ],
@ -575,8 +575,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Nuphar BERT squad speed-up 67.20%\n",
-      "    Baseline: 5.089 s, Current: 3.044 s\n"
+      "Nuphar BERT squad speed-up 65.18%\n",
+      "    Baseline: 5.023 s, Current: 3.041 s\n"
     ]
    }
   ],
@ -765,8 +765,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Nuphar quantized BiDAF speed-up 44.03%\n",
-      "    Baseline: 0.304 s, Current: 0.211 s\n"
+      "Nuphar quantized BiDAF speed-up 45.63%\n",
+      "    Baseline: 0.305 s, Current: 0.209 s\n"
     ]
    }
   ],
@ -807,7 +807,7 @@
    {
     "data": {
      "text/plain": [
-       "'JIT took 4.612 seconds'"
+       "'JIT took 4.655 seconds'"
      ]
     },
     "execution_count": 28,
@ -887,8 +887,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "AOT speed-up 952.77%\n",
-      "    Baseline: 4.612 s, Current: 0.438 s\n"
+      "AOT speed-up 967.73%\n",
+      "    Baseline: 4.655 s, Current: 0.436 s\n"
     ]
    }
   ],
@ -919,8 +919,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Single thread perf w/o parallel schedule speed-up 3.80%\n",
-      "    Baseline: 0.318 s, Current: 0.306 s\n"
+      "Single thread perf w/o parallel schedule speed-up 2.83%\n",
+      "    Baseline: 0.315 s, Current: 0.307 s\n"
     ]
    }
   ],
@ -947,13 +947,6 @@
    "end = timer()\n",
    "print_speedup('Single thread perf w/o parallel schedule', end_baseline - start_baseline, end - start)"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ],
 "metadata": {
--- a/onnxruntime/core/providers/nuphar/nuphar_execution_provider.cc
+++ b/onnxruntime/core/providers/nuphar/nuphar_execution_provider.cc
@ -225,6 +225,17 @@ NupharExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_vie
          }
        }
      }
+      // reject when pooling on symbolic dims, since shape computation does not support it yet
+      it = attrs.find("kernel_shape");
+      ORT_ENFORCE(it != attrs.end());
+      int kernel_rank = it->second.ints_size();
+      const auto output_shape = node.OutputDefs()[0]->Shape();
+      int output_rank = output_shape->dim_size();
+      for (int d = output_rank - kernel_rank; d < output_rank; ++d) {
+        if (output_shape->dim(d).has_dim_param()) {
+          return false;
+        }
+      }
    }

    if (node.OpType() == "Slice") {
--- a/onnxruntime/core/providers/nuphar/scripts/symbolic_shape_infer.py
+++ b/onnxruntime/core/providers/nuphar/scripts/symbolic_shape_infer.py
@ -120,6 +120,7 @@ class SymbolicShapeInference:
        self.run_ = True
        self.suggested_merge_ = {}
        self.symbolic_dims_ = {}
+        self.input_symbols_ = {}
        self.auto_merge_ = auto_merge
        self.guess_output_rank_ = guess_output_rank
        self.verbose_ = verbose
@ -138,7 +139,12 @@ class SymbolicShapeInference:
            if is_literal(s):
                map_to = s
                break
-        # when no literals, map to existing symbolic dims
+        # when no literals, map to input symbolic dims, then existing symbolic dims
+        if map_to is None:
+            for s in symbols:
+                if s in self.input_symbols_:
+                    map_to = s
+                    break
        if map_to is None:
            for s in symbols:
                if type(self.symbolic_dims_[s]) == sympy.Symbol:
@ -228,6 +234,7 @@ class SymbolicShapeInference:
                int_dim = is_int.index(1)
                if self.verbose_ > 0:
                    print('dim {} has been merged with value {}'.format(dims[1 - int_dim], dims[int_dim]))
+                self._check_merged_dims(dims, allow_broadcast=False)
                return dims[int_dim]
            else:
                return None
@ -621,14 +628,13 @@ class SymbolicShapeInference:
            self._update_computed_dims(sympy_shape)
            if type(sympy_shape) != list:
                sympy_shape = [sympy_shape]
-            vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
-                                                      vi.type.tensor_type.elem_type,
-                                                      get_shape_from_sympy_shape(sympy_shape)))
        else:
            # create new dynamic shape
-            vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
-                                                      vi.type.tensor_type.elem_type,
-                                                      self._new_symbolic_shape(self._get_shape_rank(node,0), node)))
+            sympy_shape = self._new_symbolic_shape(self._get_shape_rank(node,0), node)
+
+        vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
+                                                  vi.type.tensor_type.elem_type,
+                                                  get_shape_from_sympy_shape(sympy_shape)))

    def _infer_Expand(self, node):
        expand_to_shape = self._try_get_value(node, 1)
@ -813,7 +819,7 @@ class SymbolicShapeInference:
            assert is_literal(shape_rank)
            vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
                                                      vi.type.tensor_type.elem_type,
-                                                      self._new_symbolic_shape(shape_rank, node)))
+                                                      get_shape_from_sympy_shape(self._new_symbolic_shape(shape_rank, node))))
        else:
            input_shape = self._get_shape(node, 0)
            input_sympy_shape = self._get_sympy_shape(node, 0)
@ -850,19 +856,36 @@ class SymbolicShapeInference:

    def _infer_Resize(self, node):
        vi = self.known_vi_[node.output[0]]
-        if get_opset(self.out_mp_) <= 10: # only support opset 10 Resize for now
+        input_sympy_shape = self._get_sympy_shape(node, 0)
+        if get_opset(self.out_mp_) <= 10:
            scales = self._try_get_value(node, 1)
            if scales is not None:
-                input_sympy_shape = self._get_sympy_shape(node, 0)
                new_sympy_shape = [sympy.simplify(sympy.floor(d*s)) for d,s in zip(input_sympy_shape, scales)]
                self._update_computed_dims(new_sympy_shape)
                vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
                                                          self.known_vi_[node.input[0]].type.tensor_type.elem_type,
                                                          get_shape_from_sympy_shape(new_sympy_shape)))
        else:
+            roi = self._try_get_value(node, 1)
+            scales = self._try_get_value(node, 2)
+            sizes = self._try_get_value(node, 3)
+            if sizes is not None:
+                new_sympy_shape = [sympy.simplify(sympy.floor(s)) for s in sizes]
+                self._update_computed_dims(new_sympy_shape)
+            elif roi is not None and scales is not None:
+                rank = len(scales)
+                assert len(roi) == 2*rank
+                roi_start = list(roi)[:rank]
+                roi_end = list(roi)[rank:]
+                scales = list(scales)
+                new_sympy_shape = [sympy.simplify(sympy.floor(d * (end - start) * scale)) for d, start, end, scale in zip(input_sympy_shape, roi_start, roi_end, scales)]
+                self._update_computed_dims(new_sympy_shape)
+            else:
+                new_sympy_shape = self._new_symbolic_shape(self._get_shape_rank(node, 0), node)
+
            vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
                                                      self.known_vi_[node.input[0]].type.tensor_type.elem_type,
-                                                      self._new_symbolic_shape(self._get_shape_rank(node, 0), node)))
+                                                      get_shape_from_sympy_shape(new_sympy_shape)))

    def _infer_Scan(self, node):
        subgraph = get_attribute(node, 'body')
@ -1064,16 +1087,16 @@ class SymbolicShapeInference:
        self.sympy_data_ = start_sympy_data
        self.out_mp_.graph.ClearField('value_info')
        self._apply_suggested_merge(graph_input_only=True)
-        input_symbols = set()
+        self.input_symbols_ = set()
        for i in self.out_mp_.graph.input:
            input_dims = i.type.tensor_type.shape.dim
            for i_dim in range(len(input_dims)):
                if get_dim_from_type_proto(input_dims[i_dim]) is None:
                    # some models use None for symbolic dim in input, replace it with a string
                    input_dims[i_dim].dim_param = self._new_symbolic_dim(i.name, i_dim)
-            input_symbols.update([d for d in get_shape_from_type_proto(i.type) if type(d) == str])
+            self.input_symbols_.update([d for d in get_shape_from_type_proto(i.type) if type(d) == str])

-        for s in input_symbols:
+        for s in self.input_symbols_:
            if s in self.suggested_merge_:
                s_merge = self.suggested_merge_[s]
                assert s_merge in self.symbolic_dims_
@ -1166,7 +1189,7 @@ class SymbolicShapeInference:
                            new_shape = self._new_symbolic_shape(out_rank, node, i_o)
                            vi.CopyFrom(helper.make_tensor_value_info(vi.name,
                                                                      self.known_vi_[node.input[0]].type.tensor_type.elem_type,
-                                                                      new_shape))
+                                                                      get_shape_from_sympy_shape(new_shape)))

                            if self.verbose_ > 0:
                                if is_unknown_op: