mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-15 21:00:47 +00:00
* [C2] Don't crash kernel in case of invalid shapes for ConcatOp Enforce correctness of the shapes for input tensors so we won't access invalid index. * [Caffe2] Add analytical performance counters to Dynolog Initial diff for counting analytical flops and memory writes for C2 operators. * BBoxTransform op: Handle RoIs from multiple images per batch BBoxTransform op used during typical Faster-RCNN inference operates only on RoIs from a single image (no batching). Adding support to handle that with an optional output blob containing the batch splits (i.e., the number of RoIs belonging to each item in the batch). The code is perfectly backward compatible and shouldn't break any existing models.. * [mkl] Make MKL-DNN cooperate with memongered nets C2's MKL-DNN implementation caches input dims and reuses intermediate and output buffers across net runs, which prevents memonger from being used. This may not always be useful since input dims may vary widely in many cases and we'll end up reallocating anyway. Added an option to force reallocation when memonger is used. * [oncall] fix batch gather ops for empty input still need to bisect for the breaking change, but this shall fix the case for empty input. the error logging is like: https://interncache-ftw.fbcdn.net/t49.3276-7/23938497_293562711176943_6500112636590424064_n.txt?_nc_log=1 @[557759185:raychen] can you help to subscribe oncall from ads side. this may affect the Sigrid online trainer. * optimize BatchOneHotOp We want to iterate in row-major as opposed to column-major for better locality. * Supported exporting model with int blobs. Supported exporting model with int blobs. Needed by condensenet. * BoxWithNMSLimit op: Handle boxes from mutiple images per batch Similar to D7135360. Added support for multiple images per batch in the op. Takes an optional additional input "batch_splits" as output by BBoxTransform op, and returns new batch_splits after applying NMS and filtering. Otherwise, backward compatibility is maintained.
109 lines
3.9 KiB
Python
109 lines
3.9 KiB
Python
# Copyright (c) 2016-present, Facebook, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
##############################################################################
|
|
|
|
## @package mobile_exporter
|
|
# Module caffe2.python.mobile_exporter
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
from caffe2.python import core, utils
|
|
from caffe2.proto import caffe2_pb2
|
|
import numpy as np
|
|
|
|
|
|
def add_tensor(net, name, blob):
|
|
''' Create an operator to store the tensor 'blob',
|
|
run the operator to put the blob to workspace.
|
|
uint8 is stored as an array of string with one element.
|
|
'''
|
|
kTypeNameMapper = {
|
|
np.dtype('float32'): "GivenTensorFill",
|
|
np.dtype('int32'): "GivenTensorIntFill",
|
|
np.dtype('int64'): "GivenTensorInt64Fill",
|
|
np.dtype('uint8'): "GivenTensorStringFill",
|
|
}
|
|
|
|
shape = blob.shape
|
|
values = blob
|
|
# pass array of uint8 as a string to save storage
|
|
# storing uint8_t has a large overhead for now
|
|
if blob.dtype == np.dtype('uint8'):
|
|
shape = [1]
|
|
values = [str(blob.data)]
|
|
|
|
op = core.CreateOperator(
|
|
kTypeNameMapper[blob.dtype],
|
|
[], [name],
|
|
arg=[
|
|
utils.MakeArgument("shape", shape),
|
|
utils.MakeArgument("values", values),
|
|
]
|
|
)
|
|
net.op.extend([op])
|
|
|
|
|
|
def Export(workspace, net, params):
|
|
"""Returns init_net and predict_net suitable for writing to disk
|
|
and loading into a Predictor"""
|
|
proto = net if isinstance(net, caffe2_pb2.NetDef) else net.Proto()
|
|
predict_net = caffe2_pb2.NetDef()
|
|
predict_net.CopyFrom(proto)
|
|
init_net = caffe2_pb2.NetDef()
|
|
# Populate the init_net.
|
|
ssa, blob_versions = core.get_ssa(net)
|
|
inputs = []
|
|
for versioned_inputs, _ in ssa:
|
|
inputs += [name for name, _ in versioned_inputs]
|
|
|
|
input_blobs = [blob_name for blob_name, version in
|
|
blob_versions.items()
|
|
if version == 0 and blob_name not in params]
|
|
# Blobs that are never used as an input to another layer,
|
|
# i.e. strictly output blobs.
|
|
output_blobs = [blob_name for blob_name, version in
|
|
blob_versions.items()
|
|
if version != 0 and blob_name not in inputs]
|
|
|
|
for blob_ref in params:
|
|
blob_name = str(blob_ref)
|
|
blob = workspace.FetchBlob(blob_name)
|
|
add_tensor(init_net, blob_name, blob)
|
|
# We have to make sure the blob exists in the namespace
|
|
# and we can do so with fake data. (Which is immediately overwritten
|
|
# by any typical usage)
|
|
for blob_name in input_blobs:
|
|
init_net.op.extend(
|
|
[
|
|
core.CreateOperator(
|
|
"GivenTensorFill", [], [blob_name],
|
|
arg=[
|
|
utils.MakeArgument("shape", [1, 1]),
|
|
utils.MakeArgument("values", [0.0])
|
|
]
|
|
)
|
|
]
|
|
)
|
|
|
|
# Now we make input/output_blobs line up with what Predictor expects.
|
|
del predict_net.external_input[:]
|
|
predict_net.external_input.extend(input_blobs)
|
|
# For populating weights
|
|
predict_net.external_input.extend(proto.external_input)
|
|
# Ensure the output is also consistent with what we want
|
|
del predict_net.external_output[:]
|
|
predict_net.external_output.extend(output_blobs)
|
|
return init_net, predict_net
|