mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-03 03:58:54 +00:00
[Nuphar EP] fixes for some object detection models (#2581)
Update notebook tutorial with multi-threaded int8 GEMM from #2517
This commit is contained in:
parent
cbc398bb75
commit
0f12346d76
3 changed files with 64 additions and 37 deletions
|
|
@ -216,8 +216,8 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fusion speed-up 437.43%\n",
|
||||
" Baseline: 0.733 s, Current: 0.136 s\n"
|
||||
"Fusion speed-up 434.50%\n",
|
||||
" Baseline: 0.716 s, Current: 0.134 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -339,8 +339,8 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Nuphar Scan speed-up 1.97%\n",
|
||||
" Baseline: 3.062 s, Current: 3.003 s\n"
|
||||
"Nuphar Scan speed-up 7.68%\n",
|
||||
" Baseline: 3.037 s, Current: 2.821 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -444,8 +444,8 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Quantization speed-up 196.18%\n",
|
||||
" Baseline: 3.003 s, Current: 1.014 s\n"
|
||||
"Quantization speed-up 278.52%\n",
|
||||
" Baseline: 2.821 s, Current: 0.745 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -575,8 +575,8 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Nuphar BERT squad speed-up 67.20%\n",
|
||||
" Baseline: 5.089 s, Current: 3.044 s\n"
|
||||
"Nuphar BERT squad speed-up 65.18%\n",
|
||||
" Baseline: 5.023 s, Current: 3.041 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -765,8 +765,8 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Nuphar quantized BiDAF speed-up 44.03%\n",
|
||||
" Baseline: 0.304 s, Current: 0.211 s\n"
|
||||
"Nuphar quantized BiDAF speed-up 45.63%\n",
|
||||
" Baseline: 0.305 s, Current: 0.209 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -807,7 +807,7 @@
|
|||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'JIT took 4.612 seconds'"
|
||||
"'JIT took 4.655 seconds'"
|
||||
]
|
||||
},
|
||||
"execution_count": 28,
|
||||
|
|
@ -887,8 +887,8 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"AOT speed-up 952.77%\n",
|
||||
" Baseline: 4.612 s, Current: 0.438 s\n"
|
||||
"AOT speed-up 967.73%\n",
|
||||
" Baseline: 4.655 s, Current: 0.436 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -919,8 +919,8 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Single thread perf w/o parallel schedule speed-up 3.80%\n",
|
||||
" Baseline: 0.318 s, Current: 0.306 s\n"
|
||||
"Single thread perf w/o parallel schedule speed-up 2.83%\n",
|
||||
" Baseline: 0.315 s, Current: 0.307 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -947,13 +947,6 @@
|
|||
"end = timer()\n",
|
||||
"print_speedup('Single thread perf w/o parallel schedule', end_baseline - start_baseline, end - start)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
|||
|
|
@ -225,6 +225,17 @@ NupharExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_vie
|
|||
}
|
||||
}
|
||||
}
|
||||
// reject when pooling on symbolic dims, since shape computation does not support it yet
|
||||
it = attrs.find("kernel_shape");
|
||||
ORT_ENFORCE(it != attrs.end());
|
||||
int kernel_rank = it->second.ints_size();
|
||||
const auto output_shape = node.OutputDefs()[0]->Shape();
|
||||
int output_rank = output_shape->dim_size();
|
||||
for (int d = output_rank - kernel_rank; d < output_rank; ++d) {
|
||||
if (output_shape->dim(d).has_dim_param()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (node.OpType() == "Slice") {
|
||||
|
|
|
|||
|
|
@ -120,6 +120,7 @@ class SymbolicShapeInference:
|
|||
self.run_ = True
|
||||
self.suggested_merge_ = {}
|
||||
self.symbolic_dims_ = {}
|
||||
self.input_symbols_ = {}
|
||||
self.auto_merge_ = auto_merge
|
||||
self.guess_output_rank_ = guess_output_rank
|
||||
self.verbose_ = verbose
|
||||
|
|
@ -138,7 +139,12 @@ class SymbolicShapeInference:
|
|||
if is_literal(s):
|
||||
map_to = s
|
||||
break
|
||||
# when no literals, map to existing symbolic dims
|
||||
# when no literals, map to input symbolic dims, then existing symbolic dims
|
||||
if map_to is None:
|
||||
for s in symbols:
|
||||
if s in self.input_symbols_:
|
||||
map_to = s
|
||||
break
|
||||
if map_to is None:
|
||||
for s in symbols:
|
||||
if type(self.symbolic_dims_[s]) == sympy.Symbol:
|
||||
|
|
@ -228,6 +234,7 @@ class SymbolicShapeInference:
|
|||
int_dim = is_int.index(1)
|
||||
if self.verbose_ > 0:
|
||||
print('dim {} has been merged with value {}'.format(dims[1 - int_dim], dims[int_dim]))
|
||||
self._check_merged_dims(dims, allow_broadcast=False)
|
||||
return dims[int_dim]
|
||||
else:
|
||||
return None
|
||||
|
|
@ -621,14 +628,13 @@ class SymbolicShapeInference:
|
|||
self._update_computed_dims(sympy_shape)
|
||||
if type(sympy_shape) != list:
|
||||
sympy_shape = [sympy_shape]
|
||||
vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
|
||||
vi.type.tensor_type.elem_type,
|
||||
get_shape_from_sympy_shape(sympy_shape)))
|
||||
else:
|
||||
# create new dynamic shape
|
||||
vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
|
||||
vi.type.tensor_type.elem_type,
|
||||
self._new_symbolic_shape(self._get_shape_rank(node,0), node)))
|
||||
sympy_shape = self._new_symbolic_shape(self._get_shape_rank(node,0), node)
|
||||
|
||||
vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
|
||||
vi.type.tensor_type.elem_type,
|
||||
get_shape_from_sympy_shape(sympy_shape)))
|
||||
|
||||
def _infer_Expand(self, node):
|
||||
expand_to_shape = self._try_get_value(node, 1)
|
||||
|
|
@ -813,7 +819,7 @@ class SymbolicShapeInference:
|
|||
assert is_literal(shape_rank)
|
||||
vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
|
||||
vi.type.tensor_type.elem_type,
|
||||
self._new_symbolic_shape(shape_rank, node)))
|
||||
get_shape_from_sympy_shape(self._new_symbolic_shape(shape_rank, node))))
|
||||
else:
|
||||
input_shape = self._get_shape(node, 0)
|
||||
input_sympy_shape = self._get_sympy_shape(node, 0)
|
||||
|
|
@ -850,19 +856,36 @@ class SymbolicShapeInference:
|
|||
|
||||
def _infer_Resize(self, node):
|
||||
vi = self.known_vi_[node.output[0]]
|
||||
if get_opset(self.out_mp_) <= 10: # only support opset 10 Resize for now
|
||||
input_sympy_shape = self._get_sympy_shape(node, 0)
|
||||
if get_opset(self.out_mp_) <= 10:
|
||||
scales = self._try_get_value(node, 1)
|
||||
if scales is not None:
|
||||
input_sympy_shape = self._get_sympy_shape(node, 0)
|
||||
new_sympy_shape = [sympy.simplify(sympy.floor(d*s)) for d,s in zip(input_sympy_shape, scales)]
|
||||
self._update_computed_dims(new_sympy_shape)
|
||||
vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
|
||||
self.known_vi_[node.input[0]].type.tensor_type.elem_type,
|
||||
get_shape_from_sympy_shape(new_sympy_shape)))
|
||||
else:
|
||||
roi = self._try_get_value(node, 1)
|
||||
scales = self._try_get_value(node, 2)
|
||||
sizes = self._try_get_value(node, 3)
|
||||
if sizes is not None:
|
||||
new_sympy_shape = [sympy.simplify(sympy.floor(s)) for s in sizes]
|
||||
self._update_computed_dims(new_sympy_shape)
|
||||
elif roi is not None and scales is not None:
|
||||
rank = len(scales)
|
||||
assert len(roi) == 2*rank
|
||||
roi_start = list(roi)[:rank]
|
||||
roi_end = list(roi)[rank:]
|
||||
scales = list(scales)
|
||||
new_sympy_shape = [sympy.simplify(sympy.floor(d * (end - start) * scale)) for d, start, end, scale in zip(input_sympy_shape, roi_start, roi_end, scales)]
|
||||
self._update_computed_dims(new_sympy_shape)
|
||||
else:
|
||||
new_sympy_shape = self._new_symbolic_shape(self._get_shape_rank(node, 0), node)
|
||||
|
||||
vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
|
||||
self.known_vi_[node.input[0]].type.tensor_type.elem_type,
|
||||
self._new_symbolic_shape(self._get_shape_rank(node, 0), node)))
|
||||
get_shape_from_sympy_shape(new_sympy_shape)))
|
||||
|
||||
def _infer_Scan(self, node):
|
||||
subgraph = get_attribute(node, 'body')
|
||||
|
|
@ -1064,16 +1087,16 @@ class SymbolicShapeInference:
|
|||
self.sympy_data_ = start_sympy_data
|
||||
self.out_mp_.graph.ClearField('value_info')
|
||||
self._apply_suggested_merge(graph_input_only=True)
|
||||
input_symbols = set()
|
||||
self.input_symbols_ = set()
|
||||
for i in self.out_mp_.graph.input:
|
||||
input_dims = i.type.tensor_type.shape.dim
|
||||
for i_dim in range(len(input_dims)):
|
||||
if get_dim_from_type_proto(input_dims[i_dim]) is None:
|
||||
# some models use None for symbolic dim in input, replace it with a string
|
||||
input_dims[i_dim].dim_param = self._new_symbolic_dim(i.name, i_dim)
|
||||
input_symbols.update([d for d in get_shape_from_type_proto(i.type) if type(d) == str])
|
||||
self.input_symbols_.update([d for d in get_shape_from_type_proto(i.type) if type(d) == str])
|
||||
|
||||
for s in input_symbols:
|
||||
for s in self.input_symbols_:
|
||||
if s in self.suggested_merge_:
|
||||
s_merge = self.suggested_merge_[s]
|
||||
assert s_merge in self.symbolic_dims_
|
||||
|
|
@ -1166,7 +1189,7 @@ class SymbolicShapeInference:
|
|||
new_shape = self._new_symbolic_shape(out_rank, node, i_o)
|
||||
vi.CopyFrom(helper.make_tensor_value_info(vi.name,
|
||||
self.known_vi_[node.input[0]].type.tensor_type.elem_type,
|
||||
new_shape))
|
||||
get_shape_from_sympy_shape(new_shape)))
|
||||
|
||||
if self.verbose_ > 0:
|
||||
if is_unknown_op:
|
||||
|
|
|
|||
Loading…
Reference in a new issue