[Nuphar EP] fixes for some object detection models (#2581)

Update notebook tutorial with multi-threaded int8 GEMM from #2517
This commit is contained in:
KeDengMS 2019-12-07 13:37:00 -08:00 committed by GitHub
parent cbc398bb75
commit 0f12346d76
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 64 additions and 37 deletions

View file

@ -216,8 +216,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Fusion speed-up 437.43%\n",
" Baseline: 0.733 s, Current: 0.136 s\n"
"Fusion speed-up 434.50%\n",
" Baseline: 0.716 s, Current: 0.134 s\n"
]
}
],
@ -339,8 +339,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Nuphar Scan speed-up 1.97%\n",
" Baseline: 3.062 s, Current: 3.003 s\n"
"Nuphar Scan speed-up 7.68%\n",
" Baseline: 3.037 s, Current: 2.821 s\n"
]
}
],
@ -444,8 +444,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Quantization speed-up 196.18%\n",
" Baseline: 3.003 s, Current: 1.014 s\n"
"Quantization speed-up 278.52%\n",
" Baseline: 2.821 s, Current: 0.745 s\n"
]
}
],
@ -575,8 +575,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Nuphar BERT squad speed-up 67.20%\n",
" Baseline: 5.089 s, Current: 3.044 s\n"
"Nuphar BERT squad speed-up 65.18%\n",
" Baseline: 5.023 s, Current: 3.041 s\n"
]
}
],
@ -765,8 +765,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Nuphar quantized BiDAF speed-up 44.03%\n",
" Baseline: 0.304 s, Current: 0.211 s\n"
"Nuphar quantized BiDAF speed-up 45.63%\n",
" Baseline: 0.305 s, Current: 0.209 s\n"
]
}
],
@ -807,7 +807,7 @@
{
"data": {
"text/plain": [
"'JIT took 4.612 seconds'"
"'JIT took 4.655 seconds'"
]
},
"execution_count": 28,
@ -887,8 +887,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"AOT speed-up 952.77%\n",
" Baseline: 4.612 s, Current: 0.438 s\n"
"AOT speed-up 967.73%\n",
" Baseline: 4.655 s, Current: 0.436 s\n"
]
}
],
@ -919,8 +919,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Single thread perf w/o parallel schedule speed-up 3.80%\n",
" Baseline: 0.318 s, Current: 0.306 s\n"
"Single thread perf w/o parallel schedule speed-up 2.83%\n",
" Baseline: 0.315 s, Current: 0.307 s\n"
]
}
],
@ -947,13 +947,6 @@
"end = timer()\n",
"print_speedup('Single thread perf w/o parallel schedule', end_baseline - start_baseline, end - start)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {

View file

@ -225,6 +225,17 @@ NupharExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_vie
}
}
}
// reject when pooling on symbolic dims, since shape computation does not support it yet
it = attrs.find("kernel_shape");
ORT_ENFORCE(it != attrs.end());
int kernel_rank = it->second.ints_size();
const auto output_shape = node.OutputDefs()[0]->Shape();
int output_rank = output_shape->dim_size();
for (int d = output_rank - kernel_rank; d < output_rank; ++d) {
if (output_shape->dim(d).has_dim_param()) {
return false;
}
}
}
if (node.OpType() == "Slice") {

View file

@ -120,6 +120,7 @@ class SymbolicShapeInference:
self.run_ = True
self.suggested_merge_ = {}
self.symbolic_dims_ = {}
self.input_symbols_ = {}
self.auto_merge_ = auto_merge
self.guess_output_rank_ = guess_output_rank
self.verbose_ = verbose
@ -138,7 +139,12 @@ class SymbolicShapeInference:
if is_literal(s):
map_to = s
break
# when no literals, map to existing symbolic dims
# when no literals, map to input symbolic dims, then existing symbolic dims
if map_to is None:
for s in symbols:
if s in self.input_symbols_:
map_to = s
break
if map_to is None:
for s in symbols:
if type(self.symbolic_dims_[s]) == sympy.Symbol:
@ -228,6 +234,7 @@ class SymbolicShapeInference:
int_dim = is_int.index(1)
if self.verbose_ > 0:
print('dim {} has been merged with value {}'.format(dims[1 - int_dim], dims[int_dim]))
self._check_merged_dims(dims, allow_broadcast=False)
return dims[int_dim]
else:
return None
@ -621,14 +628,13 @@ class SymbolicShapeInference:
self._update_computed_dims(sympy_shape)
if type(sympy_shape) != list:
sympy_shape = [sympy_shape]
vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
vi.type.tensor_type.elem_type,
get_shape_from_sympy_shape(sympy_shape)))
else:
# create new dynamic shape
vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
vi.type.tensor_type.elem_type,
self._new_symbolic_shape(self._get_shape_rank(node,0), node)))
sympy_shape = self._new_symbolic_shape(self._get_shape_rank(node,0), node)
vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
vi.type.tensor_type.elem_type,
get_shape_from_sympy_shape(sympy_shape)))
def _infer_Expand(self, node):
expand_to_shape = self._try_get_value(node, 1)
@ -813,7 +819,7 @@ class SymbolicShapeInference:
assert is_literal(shape_rank)
vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
vi.type.tensor_type.elem_type,
self._new_symbolic_shape(shape_rank, node)))
get_shape_from_sympy_shape(self._new_symbolic_shape(shape_rank, node))))
else:
input_shape = self._get_shape(node, 0)
input_sympy_shape = self._get_sympy_shape(node, 0)
@ -850,19 +856,36 @@ class SymbolicShapeInference:
def _infer_Resize(self, node):
vi = self.known_vi_[node.output[0]]
if get_opset(self.out_mp_) <= 10: # only support opset 10 Resize for now
input_sympy_shape = self._get_sympy_shape(node, 0)
if get_opset(self.out_mp_) <= 10:
scales = self._try_get_value(node, 1)
if scales is not None:
input_sympy_shape = self._get_sympy_shape(node, 0)
new_sympy_shape = [sympy.simplify(sympy.floor(d*s)) for d,s in zip(input_sympy_shape, scales)]
self._update_computed_dims(new_sympy_shape)
vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
self.known_vi_[node.input[0]].type.tensor_type.elem_type,
get_shape_from_sympy_shape(new_sympy_shape)))
else:
roi = self._try_get_value(node, 1)
scales = self._try_get_value(node, 2)
sizes = self._try_get_value(node, 3)
if sizes is not None:
new_sympy_shape = [sympy.simplify(sympy.floor(s)) for s in sizes]
self._update_computed_dims(new_sympy_shape)
elif roi is not None and scales is not None:
rank = len(scales)
assert len(roi) == 2*rank
roi_start = list(roi)[:rank]
roi_end = list(roi)[rank:]
scales = list(scales)
new_sympy_shape = [sympy.simplify(sympy.floor(d * (end - start) * scale)) for d, start, end, scale in zip(input_sympy_shape, roi_start, roi_end, scales)]
self._update_computed_dims(new_sympy_shape)
else:
new_sympy_shape = self._new_symbolic_shape(self._get_shape_rank(node, 0), node)
vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
self.known_vi_[node.input[0]].type.tensor_type.elem_type,
self._new_symbolic_shape(self._get_shape_rank(node, 0), node)))
get_shape_from_sympy_shape(new_sympy_shape)))
def _infer_Scan(self, node):
subgraph = get_attribute(node, 'body')
@ -1064,16 +1087,16 @@ class SymbolicShapeInference:
self.sympy_data_ = start_sympy_data
self.out_mp_.graph.ClearField('value_info')
self._apply_suggested_merge(graph_input_only=True)
input_symbols = set()
self.input_symbols_ = set()
for i in self.out_mp_.graph.input:
input_dims = i.type.tensor_type.shape.dim
for i_dim in range(len(input_dims)):
if get_dim_from_type_proto(input_dims[i_dim]) is None:
# some models use None for symbolic dim in input, replace it with a string
input_dims[i_dim].dim_param = self._new_symbolic_dim(i.name, i_dim)
input_symbols.update([d for d in get_shape_from_type_proto(i.type) if type(d) == str])
self.input_symbols_.update([d for d in get_shape_from_type_proto(i.type) if type(d) == str])
for s in input_symbols:
for s in self.input_symbols_:
if s in self.suggested_merge_:
s_merge = self.suggested_merge_[s]
assert s_merge in self.symbolic_dims_
@ -1166,7 +1189,7 @@ class SymbolicShapeInference:
new_shape = self._new_symbolic_shape(out_rank, node, i_o)
vi.CopyFrom(helper.make_tensor_value_info(vi.name,
self.known_vi_[node.input[0]].type.tensor_type.elem_type,
new_shape))
get_shape_from_sympy_shape(new_shape)))
if self.verbose_ > 0:
if is_unknown_op: