2020-09-24 00:55:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2017-02-28 07:22:06 +00:00
|
|
|
|
2017-05-05 00:19:40 +00:00
|
|
|
import hypothesis.strategies as st
|
2017-03-29 21:33:52 +00:00
|
|
|
import numpy as np
|
2017-05-03 00:22:16 +00:00
|
|
|
import numpy.testing as npt
|
2020-08-08 19:10:52 +00:00
|
|
|
from hypothesis import given, settings
|
2017-05-05 00:19:40 +00:00
|
|
|
|
|
|
|
|
import caffe2.python.hypothesis_test_util as hu
|
|
|
|
|
|
2017-02-28 07:22:06 +00:00
|
|
|
from caffe2.python import (
|
|
|
|
|
layer_model_instantiator,
|
2017-07-14 23:02:42 +00:00
|
|
|
core,
|
2017-02-28 07:22:06 +00:00
|
|
|
schema,
|
2017-03-29 21:33:52 +00:00
|
|
|
workspace,
|
2017-03-29 10:51:24 +00:00
|
|
|
)
|
2017-05-03 00:22:16 +00:00
|
|
|
from caffe2.python.layers.layers import (
|
2019-07-22 21:56:16 +00:00
|
|
|
AccessedFeatures,
|
2019-08-28 06:22:23 +00:00
|
|
|
almost_equal_schemas,
|
2019-07-22 21:56:16 +00:00
|
|
|
get_key,
|
|
|
|
|
IdList,
|
2019-08-28 06:22:23 +00:00
|
|
|
IdScoreList,
|
2017-05-03 00:22:16 +00:00
|
|
|
InstantiationContext,
|
2019-07-22 21:56:16 +00:00
|
|
|
is_request_only_scalar,
|
|
|
|
|
set_request_only,
|
2017-05-03 00:22:16 +00:00
|
|
|
)
|
|
|
|
|
from caffe2.python.layers.tags import Tags
|
2017-03-29 10:51:24 +00:00
|
|
|
from caffe2.python.layer_test_util import (
|
|
|
|
|
LayersTestCase,
|
|
|
|
|
OpSpec,
|
2017-02-28 07:22:06 +00:00
|
|
|
)
|
2018-01-26 19:28:32 +00:00
|
|
|
import logging
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
2017-02-28 07:22:06 +00:00
|
|
|
|
2017-03-29 10:51:24 +00:00
|
|
|
class TestLayers(LayersTestCase):
|
2019-07-23 21:24:21 +00:00
|
|
|
def testSparseDropoutWithReplacement(self):
|
|
|
|
|
input_record = schema.NewRecord(self.model.net, IdList)
|
|
|
|
|
self.model.output_schema = schema.Struct()
|
|
|
|
|
|
|
|
|
|
lengths_blob = input_record.field_blobs()[0]
|
|
|
|
|
values_blob = input_record.field_blobs()[1]
|
|
|
|
|
lengths = np.array([1] * 10).astype(np.int32)
|
|
|
|
|
values = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.int64)
|
|
|
|
|
workspace.FeedBlob(lengths_blob, lengths)
|
|
|
|
|
workspace.FeedBlob(values_blob, values)
|
|
|
|
|
|
|
|
|
|
out = self.model.SparseDropoutWithReplacement(
|
|
|
|
|
input_record, 0.0, 0.5, 1.0, -1, output_names_or_num=1)
|
|
|
|
|
self.assertEqual(schema.List(schema.Scalar(np.int64,)), out)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
eval_net = self.get_eval_net()
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
|
|
|
|
|
workspace.RunNetOnce(train_init_net)
|
|
|
|
|
workspace.RunNetOnce(train_net)
|
|
|
|
|
out_values = workspace.FetchBlob(out.items())
|
|
|
|
|
out_lengths = workspace.FetchBlob(out.lengths())
|
|
|
|
|
self.assertBlobsEqual(out_values, values)
|
|
|
|
|
self.assertBlobsEqual(out_lengths, lengths)
|
|
|
|
|
|
|
|
|
|
workspace.RunNetOnce(eval_net)
|
|
|
|
|
|
|
|
|
|
workspace.RunNetOnce(predict_net)
|
|
|
|
|
predict_values = workspace.FetchBlob("values_auto_0")
|
|
|
|
|
predict_lengths = workspace.FetchBlob("lengths_auto_0")
|
|
|
|
|
self.assertBlobsEqual(predict_values, np.array([-1] * 10).astype(np.int64))
|
|
|
|
|
self.assertBlobsEqual(predict_lengths, lengths)
|
|
|
|
|
|
2017-07-14 23:02:42 +00:00
|
|
|
def testAddLoss(self):
|
|
|
|
|
input_record_LR = self.new_record(
|
|
|
|
|
schema.Struct(
|
|
|
|
|
('label', schema.Scalar((np.float64, (1, )))),
|
2018-01-02 21:13:08 +00:00
|
|
|
('logit', schema.Scalar((np.float32, (2, )))),
|
2017-07-14 23:02:42 +00:00
|
|
|
('weight', schema.Scalar((np.float64, (1, ))))
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
loss_LR = self.model.BatchLRLoss(input_record_LR)
|
|
|
|
|
|
|
|
|
|
self.model.add_loss(loss_LR)
|
|
|
|
|
assert 'unnamed' in self.model.loss
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, tuple())), self.model.loss.unnamed
|
|
|
|
|
)
|
|
|
|
|
self.assertEqual(loss_LR, self.model.loss.unnamed)
|
|
|
|
|
|
|
|
|
|
self.model.add_loss(loss_LR, 'addLoss')
|
|
|
|
|
assert 'addLoss' in self.model.loss
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, tuple())), self.model.loss.addLoss
|
|
|
|
|
)
|
|
|
|
|
self.assertEqual(loss_LR, self.model.loss.addLoss)
|
|
|
|
|
|
|
|
|
|
self.model.add_loss(
|
|
|
|
|
schema.Scalar(
|
|
|
|
|
dtype=np.float32, blob=core.BlobReference('loss_blob_1')
|
|
|
|
|
), 'addLoss'
|
|
|
|
|
)
|
|
|
|
|
assert 'addLoss_auto_0' in self.model.loss
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, tuple())), self.model.loss.addLoss_auto_0
|
|
|
|
|
)
|
|
|
|
|
assert core.BlobReference('loss_blob_1') in self.model.loss.field_blobs()
|
|
|
|
|
|
|
|
|
|
self.model.add_loss(
|
|
|
|
|
schema.Struct(
|
|
|
|
|
(
|
|
|
|
|
'structName', schema.Scalar(
|
|
|
|
|
dtype=np.float32,
|
|
|
|
|
blob=core.BlobReference('loss_blob_2')
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
), 'addLoss'
|
|
|
|
|
)
|
|
|
|
|
assert 'addLoss_auto_1' in self.model.loss
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Struct(('structName', schema.Scalar((np.float32, tuple())))),
|
|
|
|
|
self.model.loss.addLoss_auto_1
|
|
|
|
|
)
|
|
|
|
|
assert core.BlobReference('loss_blob_2') in self.model.loss.field_blobs()
|
|
|
|
|
|
|
|
|
|
loss_in_tuple_0 = schema.Scalar(
|
|
|
|
|
dtype=np.float32, blob=core.BlobReference('loss_blob_in_tuple_0')
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
loss_in_tuple_1 = schema.Scalar(
|
|
|
|
|
dtype=np.float32, blob=core.BlobReference('loss_blob_in_tuple_1')
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
loss_tuple = schema.NamedTuple(
|
|
|
|
|
'loss_in_tuple', * [loss_in_tuple_0, loss_in_tuple_1]
|
|
|
|
|
)
|
|
|
|
|
self.model.add_loss(loss_tuple, 'addLoss')
|
|
|
|
|
assert 'addLoss_auto_2' in self.model.loss
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Struct(
|
|
|
|
|
('loss_in_tuple_0', schema.Scalar((np.float32, tuple()))),
|
|
|
|
|
('loss_in_tuple_1', schema.Scalar((np.float32, tuple())))
|
|
|
|
|
), self.model.loss.addLoss_auto_2
|
|
|
|
|
)
|
|
|
|
|
assert core.BlobReference('loss_blob_in_tuple_0')\
|
|
|
|
|
in self.model.loss.field_blobs()
|
|
|
|
|
assert core.BlobReference('loss_blob_in_tuple_1')\
|
|
|
|
|
in self.model.loss.field_blobs()
|
2017-03-15 17:56:36 +00:00
|
|
|
|
2019-05-22 19:13:04 +00:00
|
|
|
def testFilterMetricSchema(self):
|
|
|
|
|
self.model.add_metric_field("a:b", schema.Scalar())
|
|
|
|
|
self.model.add_metric_field("a:c", schema.Scalar())
|
|
|
|
|
self.model.add_metric_field("d", schema.Scalar())
|
|
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
self.model.metrics_schema,
|
|
|
|
|
schema.Struct(
|
|
|
|
|
("a", schema.Struct(
|
|
|
|
|
("b", schema.Scalar()),
|
|
|
|
|
("c", schema.Scalar()),
|
|
|
|
|
)),
|
|
|
|
|
("d", schema.Scalar()),
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
self.model.filter_metrics_schema({"a:b", "d"})
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
self.model.metrics_schema,
|
|
|
|
|
schema.Struct(
|
|
|
|
|
("a", schema.Struct(
|
|
|
|
|
("b", schema.Scalar()),
|
|
|
|
|
)),
|
|
|
|
|
("d", schema.Scalar()),
|
|
|
|
|
))
|
|
|
|
|
|
2018-03-12 19:22:59 +00:00
|
|
|
def testAddOutputSchema(self):
|
|
|
|
|
# add the first field
|
|
|
|
|
self.model.add_output_schema('struct', schema.Struct())
|
|
|
|
|
expected_output_schema = schema.Struct(('struct', schema.Struct()))
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
self.model.output_schema,
|
|
|
|
|
expected_output_schema,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# add the second field
|
|
|
|
|
self.model.add_output_schema('scalar', schema.Scalar(np.float64))
|
|
|
|
|
expected_output_schema = schema.Struct(
|
|
|
|
|
('struct', schema.Struct()),
|
|
|
|
|
('scalar', schema.Scalar(np.float64)),
|
|
|
|
|
)
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
self.model.output_schema,
|
|
|
|
|
expected_output_schema,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# overwrite a field should raise
|
|
|
|
|
with self.assertRaises(AssertionError):
|
|
|
|
|
self.model.add_output_schema('scalar', schema.Struct())
|
|
|
|
|
|
2017-07-10 16:55:12 +00:00
|
|
|
def _test_net(self, net, ops_list):
|
2019-07-23 21:24:21 +00:00
|
|
|
'''
|
2017-07-10 16:55:12 +00:00
|
|
|
Helper function to assert the net contains some set of operations and
|
|
|
|
|
then to run the net.
|
|
|
|
|
|
|
|
|
|
Inputs:
|
|
|
|
|
net -- the network to test and run
|
|
|
|
|
ops_list -- the list of operation specifications to check for
|
|
|
|
|
in the net
|
2019-07-23 21:24:21 +00:00
|
|
|
'''
|
2017-07-10 16:55:12 +00:00
|
|
|
ops_output = self.assertNetContainOps(net, ops_list)
|
|
|
|
|
workspace.RunNetOnce(net)
|
|
|
|
|
return ops_output
|
|
|
|
|
|
2017-03-15 17:56:36 +00:00
|
|
|
def testFCWithoutBias(self):
|
|
|
|
|
output_dims = 2
|
|
|
|
|
fc_without_bias = self.model.FCWithoutBias(
|
|
|
|
|
self.model.input_feature_schema.float_features, output_dims)
|
2017-05-03 00:22:16 +00:00
|
|
|
self.model.output_schema = fc_without_bias
|
2017-03-15 17:56:36 +00:00
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (output_dims, ))),
|
|
|
|
|
fc_without_bias
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
|
|
|
|
init_ops = self.assertNetContainOps(
|
|
|
|
|
train_init_net,
|
|
|
|
|
[
|
|
|
|
|
OpSpec("UniformFill", None, None),
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
mat_mul_spec = OpSpec(
|
|
|
|
|
"MatMul",
|
|
|
|
|
[
|
|
|
|
|
self.model.input_feature_schema.float_features(),
|
|
|
|
|
init_ops[0].output[0],
|
|
|
|
|
],
|
|
|
|
|
fc_without_bias.field_blobs()
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertNetContainOps(train_net, [mat_mul_spec])
|
|
|
|
|
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
self.assertNetContainOps(predict_net, [mat_mul_spec])
|
|
|
|
|
|
2019-11-05 05:16:46 +00:00
|
|
|
def testFCWithBootstrap(self):
|
|
|
|
|
output_dims = 1
|
|
|
|
|
fc_with_bootstrap = self.model.FCWithBootstrap(
|
|
|
|
|
self.model.input_feature_schema.float_features,
|
|
|
|
|
output_dims=output_dims,
|
|
|
|
|
num_bootstrap=2,
|
|
|
|
|
max_fc_size=-1
|
|
|
|
|
)
|
|
|
|
|
self.model.output_schema = fc_with_bootstrap
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.assertEqual(len(fc_with_bootstrap), 4)
|
|
|
|
|
|
|
|
|
|
# must be in this order
|
|
|
|
|
assert (
|
|
|
|
|
core.BlobReference("fc_with_bootstrap/bootstrap_iteration_0/indices") == fc_with_bootstrap[0].field_blobs()[0]
|
|
|
|
|
)
|
|
|
|
|
assert (
|
|
|
|
|
core.BlobReference("fc_with_bootstrap/bootstrap_iteration_0/preds") == fc_with_bootstrap[1].field_blobs()[0]
|
|
|
|
|
)
|
|
|
|
|
assert (
|
|
|
|
|
core.BlobReference("fc_with_bootstrap/bootstrap_iteration_1/indices") == fc_with_bootstrap[2].field_blobs()[0]
|
|
|
|
|
)
|
|
|
|
|
assert (
|
|
|
|
|
core.BlobReference("fc_with_bootstrap/bootstrap_iteration_1/preds") == fc_with_bootstrap[3].field_blobs()[0]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
predict_net = layer_model_instantiator.generate_predict_net(self.model)
|
|
|
|
|
|
|
|
|
|
train_proto = train_net.Proto()
|
|
|
|
|
eval_proto = predict_net.Proto()
|
|
|
|
|
|
|
|
|
|
train_ops = train_proto.op
|
|
|
|
|
eval_ops = eval_proto.op
|
|
|
|
|
|
|
|
|
|
master_train_ops = [
|
|
|
|
|
"Shape",
|
|
|
|
|
"GivenTensorInt64Fill",
|
|
|
|
|
"Gather",
|
|
|
|
|
"GivenTensorIntFill",
|
|
|
|
|
"GivenTensorIntFill",
|
|
|
|
|
"Cast",
|
|
|
|
|
"Sub",
|
|
|
|
|
"UniformIntFill",
|
|
|
|
|
"Gather",
|
|
|
|
|
"FC",
|
|
|
|
|
"UniformIntFill",
|
|
|
|
|
"Gather",
|
|
|
|
|
"FC",
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
master_eval_ops = [
|
|
|
|
|
"Shape",
|
|
|
|
|
"GivenTensorInt64Fill",
|
|
|
|
|
"Gather",
|
|
|
|
|
"GivenTensorIntFill",
|
|
|
|
|
"GivenTensorIntFill",
|
|
|
|
|
"Cast",
|
|
|
|
|
"Sub",
|
|
|
|
|
"UniformIntFill",
|
|
|
|
|
"FC",
|
|
|
|
|
"UniformIntFill",
|
|
|
|
|
"FC",
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
assert len(train_ops) == len(master_train_ops)
|
|
|
|
|
assert len(eval_ops) == len(master_eval_ops)
|
|
|
|
|
|
|
|
|
|
assert train_proto.external_input == eval_proto.external_input
|
|
|
|
|
assert train_proto.external_output == list()
|
|
|
|
|
|
|
|
|
|
# make sure all the ops are present and unchanged for train_net and eval_net
|
|
|
|
|
for idx, op in enumerate(master_train_ops):
|
|
|
|
|
assert train_ops[idx].type == op
|
|
|
|
|
|
|
|
|
|
for idx, op in enumerate(master_eval_ops):
|
|
|
|
|
assert eval_ops[idx].type == op
|
|
|
|
|
|
|
|
|
|
|
2018-11-11 21:43:08 +00:00
|
|
|
def testFCwithAxis2(self):
|
|
|
|
|
input_dim = 10
|
|
|
|
|
output_dim = 30
|
|
|
|
|
max_length = 20
|
|
|
|
|
input_record = self.new_record(
|
|
|
|
|
schema.Struct(
|
|
|
|
|
('history_sequence', schema.Scalar((np.float32, (max_length,
|
|
|
|
|
input_dim)))),
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
fc_out = self.model.FC(
|
|
|
|
|
input_record.history_sequence, output_dim,
|
|
|
|
|
axis=2)
|
|
|
|
|
self.model.output_schema = fc_out
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (max_length, output_dim))),
|
|
|
|
|
fc_out
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
2019-12-10 18:13:20 +00:00
|
|
|
def testFCTransposed(self):
|
|
|
|
|
input_dim = 10
|
|
|
|
|
output_dim = 30
|
|
|
|
|
max_length = 20
|
|
|
|
|
input_record = self.new_record(
|
|
|
|
|
schema.Struct(
|
|
|
|
|
('history_sequence', schema.Scalar((np.float32, (max_length,
|
|
|
|
|
input_dim)))),
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
fc_transposed_out = self.model.FC(
|
|
|
|
|
input_record.history_sequence, output_dim,
|
|
|
|
|
axis=2, transposed=True)
|
|
|
|
|
self.model.output_schema = fc_transposed_out
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (max_length, output_dim))),
|
|
|
|
|
fc_transposed_out
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
|
|
|
|
def testFCTransposedWithMaxFCSize(self):
|
|
|
|
|
input_dim = 10
|
|
|
|
|
output_dim = 30
|
|
|
|
|
max_length = 20
|
|
|
|
|
input_record = self.new_record(
|
|
|
|
|
schema.Struct(
|
|
|
|
|
('history_sequence', schema.Scalar((np.float32, (max_length,
|
|
|
|
|
input_dim)))),
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
fc_transposed_out = self.model.FC(
|
|
|
|
|
input_record.history_sequence, output_dim,
|
|
|
|
|
max_fc_size=input_dim * output_dim // 2,
|
|
|
|
|
axis=2, transposed=True)
|
|
|
|
|
self.model.output_schema = fc_transposed_out
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (max_length, output_dim))),
|
|
|
|
|
fc_transposed_out
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
2019-07-03 17:30:30 +00:00
|
|
|
def testSparseLookupSumPoolingWithEviction(self):
|
|
|
|
|
# Create test embedding table of 1 row
|
|
|
|
|
record = schema.NewRecord(self.model.net, schema.Struct(
|
|
|
|
|
('sparse', schema.Struct(
|
|
|
|
|
('sparse_feature_0', schema.ListWithEvicted(
|
|
|
|
|
schema.Scalar(np.int64,
|
|
|
|
|
metadata=schema.Metadata(categorical_limit=1)),)),)),
|
|
|
|
|
))
|
|
|
|
|
embedding_dim = 8
|
|
|
|
|
lengths_blob = record.sparse.sparse_feature_0.lengths.get()
|
|
|
|
|
values_blob = record.sparse.sparse_feature_0.items.get()
|
|
|
|
|
evicted_values_blob = record.sparse.sparse_feature_0._evicted_values.get()
|
|
|
|
|
lengths = np.array([1]).astype(np.int32)
|
|
|
|
|
values = np.array([0]).astype(np.int64)
|
|
|
|
|
# Need to reset row 0
|
|
|
|
|
evicted_values = np.array([0]).astype(np.int64)
|
|
|
|
|
workspace.FeedBlob(lengths_blob, lengths)
|
|
|
|
|
workspace.FeedBlob(values_blob, values)
|
|
|
|
|
workspace.FeedBlob(evicted_values_blob, evicted_values)
|
|
|
|
|
|
|
|
|
|
embedding_after_pooling = self.model.SparseLookup(
|
|
|
|
|
record.sparse.sparse_feature_0, [embedding_dim], 'Sum', weight_init=("ConstantFill", {"value": 1.0}))
|
|
|
|
|
|
|
|
|
|
self.model.output_schema = schema.Struct()
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (embedding_dim, ))),
|
|
|
|
|
embedding_after_pooling
|
|
|
|
|
)
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
workspace.RunNetOnce(train_init_net)
|
|
|
|
|
embedding_after_init = workspace.FetchBlob("sparse_lookup/w")
|
|
|
|
|
# Change row 0's value before reset
|
|
|
|
|
new_values = np.array([[2, 2, 2, 2, 2, 2, 2, 2]]).astype(np.float32)
|
|
|
|
|
workspace.FeedBlob("sparse_lookup/w", new_values)
|
|
|
|
|
workspace.RunNetOnce(train_net.Proto())
|
|
|
|
|
embedding_after_training = workspace.FetchBlob("sparse_lookup/w")
|
|
|
|
|
# Verify row 0's value does not change after reset
|
|
|
|
|
self.assertEquals(embedding_after_training.all(), embedding_after_init.all())
|
|
|
|
|
|
|
|
|
|
|
2018-01-23 03:17:11 +00:00
|
|
|
def testSparseLookupSumPooling(self):
|
2018-01-19 17:12:47 +00:00
|
|
|
record = schema.NewRecord(self.model.net, schema.Struct(
|
|
|
|
|
('sparse', schema.Struct(
|
|
|
|
|
('sparse_feature_0', schema.List(
|
|
|
|
|
schema.Scalar(np.int64,
|
|
|
|
|
metadata=schema.Metadata(categorical_limit=1000)))),
|
|
|
|
|
)),
|
|
|
|
|
))
|
|
|
|
|
embedding_dim = 64
|
|
|
|
|
embedding_after_pooling = self.model.SparseLookup(
|
|
|
|
|
record.sparse.sparse_feature_0, [embedding_dim], 'Sum')
|
2018-01-23 03:17:11 +00:00
|
|
|
self.model.output_schema = schema.Struct()
|
2018-01-19 17:12:47 +00:00
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (embedding_dim, ))),
|
|
|
|
|
embedding_after_pooling
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
|
|
|
|
init_ops = self.assertNetContainOps(
|
|
|
|
|
train_init_net,
|
|
|
|
|
[
|
|
|
|
|
OpSpec("UniformFill", None, None),
|
|
|
|
|
OpSpec("ConstantFill", None, None),
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
sparse_lookup_op_spec = OpSpec(
|
|
|
|
|
'SparseLengthsSum',
|
|
|
|
|
[
|
|
|
|
|
init_ops[0].output[0],
|
|
|
|
|
record.sparse.sparse_feature_0.items(),
|
|
|
|
|
record.sparse.sparse_feature_0.lengths(),
|
|
|
|
|
],
|
|
|
|
|
[embedding_after_pooling()]
|
|
|
|
|
)
|
|
|
|
|
self.assertNetContainOps(train_net, [sparse_lookup_op_spec])
|
2018-01-23 03:17:11 +00:00
|
|
|
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
self.assertNetContainOps(predict_net, [sparse_lookup_op_spec])
|
|
|
|
|
|
2018-02-25 14:59:14 +00:00
|
|
|
@given(
|
|
|
|
|
use_hashing=st.booleans(),
|
|
|
|
|
modulo=st.integers(min_value=100, max_value=200),
|
2019-06-12 18:20:28 +00:00
|
|
|
use_divide_mod=st.booleans(),
|
|
|
|
|
divisor=st.integers(min_value=10, max_value=20),
|
2018-02-25 14:59:14 +00:00
|
|
|
)
|
2019-06-12 18:20:28 +00:00
|
|
|
def testSparseFeatureHashIdList(self, use_hashing, modulo, use_divide_mod, divisor):
|
2018-02-25 14:59:14 +00:00
|
|
|
record = schema.NewRecord(
|
|
|
|
|
self.model.net,
|
|
|
|
|
schema.List(schema.Scalar(
|
|
|
|
|
np.int64,
|
|
|
|
|
metadata=schema.Metadata(categorical_limit=60000)
|
|
|
|
|
))
|
|
|
|
|
)
|
2019-06-12 18:20:28 +00:00
|
|
|
use_divide_mod = use_divide_mod if use_hashing is False else False
|
2018-02-25 14:59:14 +00:00
|
|
|
output_schema = self.model.SparseFeatureHash(
|
|
|
|
|
record,
|
|
|
|
|
modulo=modulo,
|
2019-06-12 18:20:28 +00:00
|
|
|
use_hashing=use_hashing,
|
|
|
|
|
use_divide_mod=use_divide_mod,
|
|
|
|
|
divisor=divisor,
|
|
|
|
|
)
|
2018-02-25 14:59:14 +00:00
|
|
|
|
|
|
|
|
self.model.output_schema = output_schema
|
|
|
|
|
|
|
|
|
|
self.assertEqual(len(self.model.layers), 1)
|
|
|
|
|
self.assertEqual(output_schema._items.metadata.categorical_limit,
|
|
|
|
|
modulo)
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
2019-06-12 18:20:28 +00:00
|
|
|
if use_divide_mod:
|
|
|
|
|
self.assertEqual(len(train_net.Proto().op), 3)
|
|
|
|
|
else:
|
|
|
|
|
self.assertEqual(len(train_net.Proto().op), 2)
|
2018-02-25 14:59:14 +00:00
|
|
|
|
|
|
|
|
@given(
|
|
|
|
|
use_hashing=st.booleans(),
|
|
|
|
|
modulo=st.integers(min_value=100, max_value=200),
|
|
|
|
|
)
|
|
|
|
|
def testSparseFeatureHashIdScoreList(self, use_hashing, modulo):
|
|
|
|
|
record = schema.NewRecord(self.model.net,
|
|
|
|
|
schema.Map(schema.Scalar(np.int64,
|
|
|
|
|
metadata=schema.Metadata(
|
|
|
|
|
categorical_limit=60000)),
|
|
|
|
|
np.float32))
|
|
|
|
|
|
|
|
|
|
output_schema = self.model.SparseFeatureHash(
|
|
|
|
|
record,
|
|
|
|
|
modulo=modulo,
|
|
|
|
|
use_hashing=use_hashing)
|
|
|
|
|
|
|
|
|
|
self.model.output_schema = output_schema
|
|
|
|
|
|
|
|
|
|
self.assertEqual(len(self.model.layers), 1)
|
|
|
|
|
self.assertEqual(output_schema._items.keys.metadata.categorical_limit,
|
|
|
|
|
modulo)
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
2018-01-23 03:17:11 +00:00
|
|
|
def testSparseLookupIncorrectPositionWeightedOnIdList(self):
|
|
|
|
|
'''
|
|
|
|
|
Currently the implementation of SparseLookup assumed input is id_score_list
|
|
|
|
|
when use PositionWeighted.
|
|
|
|
|
'''
|
|
|
|
|
record = schema.NewRecord(self.model.net, schema.Struct(
|
|
|
|
|
('sparse', schema.Struct(
|
|
|
|
|
('sparse_feature_0', schema.List(
|
|
|
|
|
schema.Scalar(np.int64,
|
|
|
|
|
metadata=schema.Metadata(categorical_limit=1000)))),
|
|
|
|
|
)),
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
embedding_dim = 64
|
|
|
|
|
with self.assertRaises(AssertionError):
|
|
|
|
|
self.model.SparseLookup(
|
|
|
|
|
record.sparse.sparse_feature_0, [embedding_dim], 'PositionWeighted')
|
|
|
|
|
|
|
|
|
|
def testSparseLookupPositionWeightedOnIdList(self):
|
|
|
|
|
record = schema.NewRecord(self.model.net, schema.Struct(
|
|
|
|
|
('sparse', schema.Struct(
|
|
|
|
|
('sparse_feature_0', schema.List(
|
|
|
|
|
schema.Scalar(np.int64,
|
|
|
|
|
metadata=schema.Metadata(categorical_limit=1000)))),
|
|
|
|
|
)),
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
# convert id_list to id_score_list with PositionWeighted layer
|
|
|
|
|
sparse_segment = record.sparse.sparse_feature_0
|
|
|
|
|
pos_w_layer = self.model.PositionWeighted(sparse_segment)
|
|
|
|
|
|
|
|
|
|
sparse_segment = schema.Map(
|
|
|
|
|
keys=get_key(sparse_segment),
|
|
|
|
|
values=pos_w_layer.position_weights,
|
|
|
|
|
lengths_blob=sparse_segment.lengths
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
embedding_dim = 64
|
|
|
|
|
embedding_after_pooling = self.model.SparseLookup(
|
|
|
|
|
sparse_segment, [embedding_dim], 'PositionWeighted')
|
|
|
|
|
self.model.output_schema = schema.Struct()
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (embedding_dim, ))),
|
|
|
|
|
embedding_after_pooling
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
|
|
|
|
self.assertNetContainOps(
|
|
|
|
|
train_init_net,
|
|
|
|
|
[
|
|
|
|
|
OpSpec("ConstantFill", None, None), # position_weights/pos_w
|
|
|
|
|
OpSpec("UniformFill", None, None),
|
|
|
|
|
OpSpec("ConstantFill", None, None),
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
self.assertNetContainOps(train_net, [
|
|
|
|
|
OpSpec("LengthsRangeFill", None, None),
|
|
|
|
|
OpSpec("Gather", None, None),
|
|
|
|
|
OpSpec("SparseLengthsWeightedSum", None, None),
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
self.assertNetContainOps(predict_net, [
|
|
|
|
|
OpSpec("LengthsRangeFill", None, None),
|
|
|
|
|
OpSpec("Gather", None, None),
|
|
|
|
|
OpSpec("SparseLengthsWeightedSum", None, None),
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
def testSparseLookupPositionWeightedOnIdScoreList(self):
|
|
|
|
|
record = schema.NewRecord(self.model.net, schema.Struct(
|
|
|
|
|
('sparse', schema.Struct(
|
|
|
|
|
('id_score_list_0', schema.Map(
|
|
|
|
|
schema.Scalar(
|
|
|
|
|
np.int64,
|
|
|
|
|
metadata=schema.Metadata(
|
|
|
|
|
categorical_limit=1000
|
|
|
|
|
),
|
|
|
|
|
),
|
|
|
|
|
np.float32
|
|
|
|
|
)),
|
|
|
|
|
)),
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
embedding_dim = 64
|
|
|
|
|
embedding_after_pooling = self.model.SparseLookup(
|
|
|
|
|
record.sparse.id_score_list_0, [embedding_dim], 'PositionWeighted')
|
|
|
|
|
self.model.output_schema = schema.Struct()
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (embedding_dim, ))),
|
|
|
|
|
embedding_after_pooling
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
|
|
|
|
init_ops = self.assertNetContainOps(
|
|
|
|
|
train_init_net,
|
2018-11-24 10:41:25 +00:00
|
|
|
[
|
|
|
|
|
OpSpec("UniformFill", None, None),
|
|
|
|
|
OpSpec("ConstantFill", None, None),
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
sparse_lookup_op_spec = OpSpec(
|
|
|
|
|
'SparseLengthsWeightedSum',
|
|
|
|
|
[
|
|
|
|
|
init_ops[0].output[0],
|
|
|
|
|
record.sparse.id_score_list_0.values(),
|
|
|
|
|
record.sparse.id_score_list_0.keys(),
|
|
|
|
|
record.sparse.id_score_list_0.lengths(),
|
|
|
|
|
],
|
|
|
|
|
[embedding_after_pooling()]
|
|
|
|
|
)
|
|
|
|
|
self.assertNetContainOps(train_net, [sparse_lookup_op_spec])
|
|
|
|
|
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
self.assertNetContainOps(predict_net, [sparse_lookup_op_spec])
|
|
|
|
|
|
|
|
|
|
def testSparseLookupIncorrectRecencyWeightedOnIdList(self):
|
|
|
|
|
'''
|
|
|
|
|
Currently the implementation of SparseLookup assumed input is id_score_list
|
|
|
|
|
when use RecencyWeighted.
|
|
|
|
|
'''
|
|
|
|
|
record = schema.NewRecord(self.model.net, schema.Struct(
|
|
|
|
|
('sparse', schema.Struct(
|
|
|
|
|
('sparse_feature_0', schema.List(
|
|
|
|
|
schema.Scalar(np.int64,
|
|
|
|
|
metadata=schema.Metadata(categorical_limit=1000)))),
|
|
|
|
|
)),
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
embedding_dim = 64
|
|
|
|
|
with self.assertRaises(AssertionError):
|
|
|
|
|
self.model.SparseLookup(
|
|
|
|
|
record.sparse.sparse_feature_0, [embedding_dim], 'RecencyWeighted')
|
|
|
|
|
|
|
|
|
|
def testSparseLookupRecencyWeightedOnIdScoreList(self):
|
|
|
|
|
record = schema.NewRecord(self.model.net, schema.Struct(
|
|
|
|
|
('sparse', schema.Struct(
|
|
|
|
|
('id_score_list_0', schema.Map(
|
|
|
|
|
schema.Scalar(
|
|
|
|
|
np.int64,
|
|
|
|
|
metadata=schema.Metadata(
|
|
|
|
|
categorical_limit=1000
|
|
|
|
|
),
|
|
|
|
|
),
|
|
|
|
|
np.float32
|
|
|
|
|
)),
|
|
|
|
|
)),
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
embedding_dim = 64
|
|
|
|
|
embedding_after_pooling = self.model.SparseLookup(
|
|
|
|
|
record.sparse.id_score_list_0, [embedding_dim], 'RecencyWeighted')
|
|
|
|
|
self.model.output_schema = schema.Struct()
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (embedding_dim, ))),
|
|
|
|
|
embedding_after_pooling
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
|
|
|
|
init_ops = self.assertNetContainOps(
|
|
|
|
|
train_init_net,
|
2018-01-23 03:17:11 +00:00
|
|
|
[
|
|
|
|
|
OpSpec("UniformFill", None, None),
|
|
|
|
|
OpSpec("ConstantFill", None, None),
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
sparse_lookup_op_spec = OpSpec(
|
|
|
|
|
'SparseLengthsWeightedSum',
|
|
|
|
|
[
|
|
|
|
|
init_ops[0].output[0],
|
|
|
|
|
record.sparse.id_score_list_0.values(),
|
|
|
|
|
record.sparse.id_score_list_0.keys(),
|
|
|
|
|
record.sparse.id_score_list_0.lengths(),
|
|
|
|
|
],
|
|
|
|
|
[embedding_after_pooling()]
|
|
|
|
|
)
|
|
|
|
|
self.assertNetContainOps(train_net, [sparse_lookup_op_spec])
|
2018-01-19 17:12:47 +00:00
|
|
|
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
self.assertNetContainOps(predict_net, [sparse_lookup_op_spec])
|
|
|
|
|
|
Update from Facebook (#8887)
* add opencl + fpga context
adds an opencl context inside caffe2/fb which can be used for fpga access
* [Caffe2] Force tensor inference checks to be triggered during testing
We've started to rely on TensorInference functions more for different analysis. This diff ensures that the TensorInference function's result matches what is expected from the definition of the operator.
* Enable building //caffe2:torch with @mode/opt
In @mode/opt, python runs out of a PAR, which breaks a lot of
assumptions in the code about where templates/ folders live relative
to __file__. Rather than introduce hacks with parutil, I simply turn
template_path into a parameter for all the relevant functions and
thread it through from the top level.
* [Caffe2] Fix cost models for DotProduct and Div. Update Tensor Inference for dot product
As title. DotProduct states that output is a 1-D tensor (https://caffe2.ai/docs/operators-catalogue.html#dotproduct) though code suggests it is either 0- or 1-D depending on inputs. TensorInference defined to support implementation.
* [SG-MoE] Add an option to make the experts NOT as components
* [nomnigraph] Rename and fixup convertToNeuralNetOperator API
This will make things a bit cleaner
* no longer symlink THNN.h and THCUNN.h
* forced decoder network (onnx export)
Closes https://github.com/pytorch/translate/pull/95
Add networks in ensemble_export.py to create a forced decoding network from PyTorch NMT checkpoints. This network takes an arbitrary numberized (source, target) pair and returns the model score for the translation, including penalties.
Vocabulary reduction networks are also supported, but note that target indices which are not in the possible_translation_tokens generated for the source input will be trea
* Revert schema change to fix production models
Revert schema change to fix production models
* MockLogDeviceReader - rebase on FIX
# Goal
1), Build a make_mock_log_device_reader using make_mock_reader
2), Replace the real log_device_reader here: https://fburl.com/raihwf1p
# Log by D8151734
Real log_device_reader:
```
I0529 20:29:05.373108 954994 tensor.h:839] Tensor print_net/log of type std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >. Dims: (): read_net/ParseOpenTrainingRow:0
I0529 20:29:05.373244 954994 tensor.h:839] Tensor read_net/ParseOpenTrainin
* [C2/D2][1/n]: Nonnegative-Constrained Optimization -- log barrier
implement log barrier as a regularization method
* Add teacher weight screening.
Add teacher weight sceening according to teacher labels. If teacher label is zero, we do not use the distill loss in the objective function.
* Add NormalizerContext
See task for more detail. This implementation is a copy of what exists for RegularizerContext except for how the parameters are defined in the model_definition thrift file.
I'll try an alternative implementation which overrides the default arguments of functions instead like for argscopes in tensorflow.
https://github.com/pytorch/pytorch/compare/master...MaximeBoucher:update-from-facebook-0939578c068c?expand=1
* Adding cosine similarity option in dot processor
Add pairwise cosine similarity option in dot product.
Add an option to concate dot product and cosine similarity.
Add test cases.
* [nomnigraph][redo] Concat elim for sparseNN
Same as D7962948, which was reverted because Operator Schema was not
defined
* [pytorch] Revert pytorch/pytorch#7918 'Release GIL when copying to shared memory', breaks ASAN
Revert this pytorch diff that breaks ASAN when running Filament in dev mode; in opt mode it gives "bad file descriptor" errors. Looks like a race when copying tensors to shared memory in multiple mp.Queue's (which spawn separate threads).
https://github.com/pytorch/pytorch/pull/7918/files
* [nomnigraph][mobile] Enable nomnigraph by default, use -Oz on nomnigraph related code to reduce code size
enables nomnigraph and reduces codesize
* [Warmup] Allow both offline incremental training and online training
Change plan name on saving side and reading side to support both training type
This diff depends on D8128530 and D8168651.
* Revert D7802642: [Warmup] Allow both offline incremental training and online training
This reverts commit afc213cf9b36cecf75333a788391c4d09f4afccc
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Add legacy grad logic to fix div op on old graphs.
Add legacy grad logic to fix div op on old graphs.
* Correctly propagate operator failures
Propagate errors from operators that throw exceptions and return false
* Revert D8374829: [caffe2][nomnigraph][redo] Concat elim for sparseNN
This reverts commit 6dda028c463e54bb5c32188bbbe9202107e188a5
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [Caffe2] Added extra_info to core.DeviceOption(), enforced extra_info to be inherited in scope.DeviceScope
extra_info is a newly defined field in DeviceOption proto. This diff added extra_info to the core.DeviceOption(). And, In scope.DeviceScope(), this diff enforce the new scope to inherit the extra_info from old scope.
* [opt] hgdirsync wasn't enabled, merge diverged code
Here's the damage, P59732616 basically xplat was left behind but had
the change from assert to CAFFE_ENFORCE
* OMP parallelism over RoIs for RoIAlign op
Simpler to parallelize over RoIs. Shouldn't affect other uses as it relies on
the number of OMP threads set during startup.
PR: https://github.com/pytorch/pytorch/pull/8562
* Use int64_t for shape in FillOps
to avoid overflow of int32
* Implement Rotated RoIAlign op
Based on Rotated RPNs as explained in https://arxiv.org/abs/1703.01086.
The idea is simple - orientation/angle is added as an RPN
anchor parameter and then the angle is further regressed similar to bbox
coords. There are some additional changes related to NMS and IoU, but besides
that it's a direct extension to Faster-RCNN. Further details in https://fb.quip.com/sZHlA1iMfWPZ.
RoIs are represented in [center_x, center_y, width, height, angle] format.
`angle` repre
* Rotated RoIAlign op CUDA forward implementation
CUDA forward impl for D8415490
* RoIAlignRotated op CUDA backward pass implementation
TSIA
* All remaining fixes to eliminate process_github.sh
Most of this diff has already been reviewed separately, except for the parts relating to _thnn/utils.py and _utils._internal.py
remove skipIf(True, 'Fbcode') line from process_github.sh
replace sed of cpp file with #ifdef to control cudnnDestroy use
undo sync-time deletion of .gitattributes, remove process_github.sh
switch to using _utils._internal rather than try-import-except
This diff also fixes the open-source bug where rebuilds have
* Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
Original commit changeset: 7707d2efe60e The original diff is backout becuase the online trainer package is backed out. This code would only work with new online trainer package
* [easy] improve error log in adagrad op
as title
* re-allow use of thnn_h_path
This fixes cffi usage in OSS
* [4/4] [tum] paralyzing layerNorm for GPU full sync
as title
* add compile=False to pytorch tests, remove hack with pyc
* Add shape and type inference for RowWiseArgMax operator
See title
* Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
This reverts commit 78167eeef0af16b60f72c82f9dcdda9b41b4dcbd
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [fix-flaky-test] mock_hive_reader_test flaky, because GlobalCounter collects local counts intervally
# Problem
`MockHiveReader` uses `GlobalCounter` to limit `max_examples`.
GlobalCounter on server node collect local counts from worker nodes every 1 sec.
This 1 sec delay makes it impossible to limit exactly to the `max_examples`, it will definitely exceed `max_examples`.
# Plan
Given,
```
Expected num_examples = max_examples + num_examples/sec (Read Speed) x 1 sec (GlobalCounter Sync Int
* [Caffe2] Fix FCGradient cost inference. Prevent overflow in cost inference
FCGradient missed a factor 2 in the `num_outputs == 3` case. Overflow was occurring with flop calculation for FC. Changed types to `uint64_t` to prevent future problems.
* Fix binary ops with empty inputs
Fix binary ops with empty inputs
* Support the filling of input blob with provided data
as title for Biz Integrity case
* Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
Original commit changeset: 30c55dd38816 Original diff is reverted due to introducing bad integration test. Fixed the integration test.
* [c2][easy] improve pack ops error loggings
as desc.
* Add ShapeTypeInference for LpNorm operator
As desc
* Shard test_nn to reduce runtime for each test target
Closes https://github.com/pytorch/pytorch/pull/8793
The current test_nn would time out and be disabled in GreenWarden, and we need to have an option to split it up in order to pass the stress test. Right now GreenWarden roughly allows running 100 test cases in test_nn before timing out, and here we have an option to divide test_nn into 30 shards (with ~40 tests in each shard) to allow for some test suite growth in the future.
* Change default caffe2_streams_per_gpu to 1
* Remove IN_SANDCASTLE from common.py and test_nn.py
We prefer to disable the failing tests through Sandcastle UI instead.
* Add a new class for an updated prof_dag.proto
This diff contains:
- An updated prof_dag.proto that contains blob profiles.
- A class to deserialize this information (serialization is in a follow up diff)
- Update to separate profiling information from NeuralNet (and use it as part of the class above).
- Unit tests
* Lambdarank for SparseNN
This diff adds a lambda_rank_layer for SparseNN.
changes include
1) Adds support for multi sessions in c2 op
2) Adds support for two different loss functions in c2 op
3) Unit tests for op
* Revert D8586950: Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
This reverts commit 012220ed63eccc35659a57b31d16a3625da6317b
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [easy] A few fixups to multithread predictor benchmark
(1) support perf on T6 server
(2) remove dead code
* fix a bug about the map size
as title
* Fix reduce sum on in-place case.
Fix reduce sum on in-place case.
* [Warmup] Reland reverted diff Allow both offline incremental training and online training
Closes https://github.com/pytorch/pytorch/pull/8827
fix net transform integration test. Allow offline and online trainer to coexist D7802642.
* Add StoreHandlerNotAvailableException
Add an exception for a store that is not available or has been
deleted.
* Use exception handling for fault tolerance, missing KV store
Remove status blobs to communication ops so that exceptions propagate on
failure.
* [C2/D2][2/n]: Nonnegative-Constrained Optimization -- bounded grad proj
for simple bounded constrained optimization, incl non-negative box constraints.
* [GanH]: Adaptive Weighting with More Estimations
With implemented postivity optimization, we now learn adaptive weights with different
parameterizations.
This improves parameter estimation and training stability.
* Revert some changes for landing
* Remove AutoNoGIL in StorageSharing
* Temporarily disable net_tests
* Revert "[Caffe2] Force tensor inference checks to be triggered during testing"
This reverts commit 67ef05c22b2f71b4a489695384932f968384a2a4.
* Revert "Fix reduce sum on in-place case."
This reverts commit 6cb8a8e1b3db7b6d20941b0053e3f3836068eb64.
* Revert "Revert "Fix reduce sum on in-place case.""
This reverts commit 130a257c0893dc09f4bd6e6a45d112261807fd2c.
2018-06-26 21:55:48 +00:00
|
|
|
def testPairwiseSimilarityWithAllEmbeddings(self):
|
2018-01-26 19:28:32 +00:00
|
|
|
embedding_dim = 64
|
|
|
|
|
N = 5
|
|
|
|
|
record = schema.NewRecord(self.model.net, schema.Struct(
|
|
|
|
|
('all_embeddings', schema.Scalar(
|
|
|
|
|
((np.float32, (N, embedding_dim)))
|
|
|
|
|
)),
|
|
|
|
|
))
|
Update from Facebook (#8887)
* add opencl + fpga context
adds an opencl context inside caffe2/fb which can be used for fpga access
* [Caffe2] Force tensor inference checks to be triggered during testing
We've started to rely on TensorInference functions more for different analysis. This diff ensures that the TensorInference function's result matches what is expected from the definition of the operator.
* Enable building //caffe2:torch with @mode/opt
In @mode/opt, python runs out of a PAR, which breaks a lot of
assumptions in the code about where templates/ folders live relative
to __file__. Rather than introduce hacks with parutil, I simply turn
template_path into a parameter for all the relevant functions and
thread it through from the top level.
* [Caffe2] Fix cost models for DotProduct and Div. Update Tensor Inference for dot product
As title. DotProduct states that output is a 1-D tensor (https://caffe2.ai/docs/operators-catalogue.html#dotproduct) though code suggests it is either 0- or 1-D depending on inputs. TensorInference defined to support implementation.
* [SG-MoE] Add an option to make the experts NOT as components
* [nomnigraph] Rename and fixup convertToNeuralNetOperator API
This will make things a bit cleaner
* no longer symlink THNN.h and THCUNN.h
* forced decoder network (onnx export)
Closes https://github.com/pytorch/translate/pull/95
Add networks in ensemble_export.py to create a forced decoding network from PyTorch NMT checkpoints. This network takes an arbitrary numberized (source, target) pair and returns the model score for the translation, including penalties.
Vocabulary reduction networks are also supported, but note that target indices which are not in the possible_translation_tokens generated for the source input will be trea
* Revert schema change to fix production models
Revert schema change to fix production models
* MockLogDeviceReader - rebase on FIX
# Goal
1), Build a make_mock_log_device_reader using make_mock_reader
2), Replace the real log_device_reader here: https://fburl.com/raihwf1p
# Log by D8151734
Real log_device_reader:
```
I0529 20:29:05.373108 954994 tensor.h:839] Tensor print_net/log of type std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >. Dims: (): read_net/ParseOpenTrainingRow:0
I0529 20:29:05.373244 954994 tensor.h:839] Tensor read_net/ParseOpenTrainin
* [C2/D2][1/n]: Nonnegative-Constrained Optimization -- log barrier
implement log barrier as a regularization method
* Add teacher weight screening.
Add teacher weight sceening according to teacher labels. If teacher label is zero, we do not use the distill loss in the objective function.
* Add NormalizerContext
See task for more detail. This implementation is a copy of what exists for RegularizerContext except for how the parameters are defined in the model_definition thrift file.
I'll try an alternative implementation which overrides the default arguments of functions instead like for argscopes in tensorflow.
https://github.com/pytorch/pytorch/compare/master...MaximeBoucher:update-from-facebook-0939578c068c?expand=1
* Adding cosine similarity option in dot processor
Add pairwise cosine similarity option in dot product.
Add an option to concate dot product and cosine similarity.
Add test cases.
* [nomnigraph][redo] Concat elim for sparseNN
Same as D7962948, which was reverted because Operator Schema was not
defined
* [pytorch] Revert pytorch/pytorch#7918 'Release GIL when copying to shared memory', breaks ASAN
Revert this pytorch diff that breaks ASAN when running Filament in dev mode; in opt mode it gives "bad file descriptor" errors. Looks like a race when copying tensors to shared memory in multiple mp.Queue's (which spawn separate threads).
https://github.com/pytorch/pytorch/pull/7918/files
* [nomnigraph][mobile] Enable nomnigraph by default, use -Oz on nomnigraph related code to reduce code size
enables nomnigraph and reduces codesize
* [Warmup] Allow both offline incremental training and online training
Change plan name on saving side and reading side to support both training type
This diff depends on D8128530 and D8168651.
* Revert D7802642: [Warmup] Allow both offline incremental training and online training
This reverts commit afc213cf9b36cecf75333a788391c4d09f4afccc
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Add legacy grad logic to fix div op on old graphs.
Add legacy grad logic to fix div op on old graphs.
* Correctly propagate operator failures
Propagate errors from operators that throw exceptions and return false
* Revert D8374829: [caffe2][nomnigraph][redo] Concat elim for sparseNN
This reverts commit 6dda028c463e54bb5c32188bbbe9202107e188a5
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [Caffe2] Added extra_info to core.DeviceOption(), enforced extra_info to be inherited in scope.DeviceScope
extra_info is a newly defined field in DeviceOption proto. This diff added extra_info to the core.DeviceOption(). And, In scope.DeviceScope(), this diff enforce the new scope to inherit the extra_info from old scope.
* [opt] hgdirsync wasn't enabled, merge diverged code
Here's the damage, P59732616 basically xplat was left behind but had
the change from assert to CAFFE_ENFORCE
* OMP parallelism over RoIs for RoIAlign op
Simpler to parallelize over RoIs. Shouldn't affect other uses as it relies on
the number of OMP threads set during startup.
PR: https://github.com/pytorch/pytorch/pull/8562
* Use int64_t for shape in FillOps
to avoid overflow of int32
* Implement Rotated RoIAlign op
Based on Rotated RPNs as explained in https://arxiv.org/abs/1703.01086.
The idea is simple - orientation/angle is added as an RPN
anchor parameter and then the angle is further regressed similar to bbox
coords. There are some additional changes related to NMS and IoU, but besides
that it's a direct extension to Faster-RCNN. Further details in https://fb.quip.com/sZHlA1iMfWPZ.
RoIs are represented in [center_x, center_y, width, height, angle] format.
`angle` repre
* Rotated RoIAlign op CUDA forward implementation
CUDA forward impl for D8415490
* RoIAlignRotated op CUDA backward pass implementation
TSIA
* All remaining fixes to eliminate process_github.sh
Most of this diff has already been reviewed separately, except for the parts relating to _thnn/utils.py and _utils._internal.py
remove skipIf(True, 'Fbcode') line from process_github.sh
replace sed of cpp file with #ifdef to control cudnnDestroy use
undo sync-time deletion of .gitattributes, remove process_github.sh
switch to using _utils._internal rather than try-import-except
This diff also fixes the open-source bug where rebuilds have
* Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
Original commit changeset: 7707d2efe60e The original diff is backout becuase the online trainer package is backed out. This code would only work with new online trainer package
* [easy] improve error log in adagrad op
as title
* re-allow use of thnn_h_path
This fixes cffi usage in OSS
* [4/4] [tum] paralyzing layerNorm for GPU full sync
as title
* add compile=False to pytorch tests, remove hack with pyc
* Add shape and type inference for RowWiseArgMax operator
See title
* Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
This reverts commit 78167eeef0af16b60f72c82f9dcdda9b41b4dcbd
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [fix-flaky-test] mock_hive_reader_test flaky, because GlobalCounter collects local counts intervally
# Problem
`MockHiveReader` uses `GlobalCounter` to limit `max_examples`.
GlobalCounter on server node collect local counts from worker nodes every 1 sec.
This 1 sec delay makes it impossible to limit exactly to the `max_examples`, it will definitely exceed `max_examples`.
# Plan
Given,
```
Expected num_examples = max_examples + num_examples/sec (Read Speed) x 1 sec (GlobalCounter Sync Int
* [Caffe2] Fix FCGradient cost inference. Prevent overflow in cost inference
FCGradient missed a factor 2 in the `num_outputs == 3` case. Overflow was occurring with flop calculation for FC. Changed types to `uint64_t` to prevent future problems.
* Fix binary ops with empty inputs
Fix binary ops with empty inputs
* Support the filling of input blob with provided data
as title for Biz Integrity case
* Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
Original commit changeset: 30c55dd38816 Original diff is reverted due to introducing bad integration test. Fixed the integration test.
* [c2][easy] improve pack ops error loggings
as desc.
* Add ShapeTypeInference for LpNorm operator
As desc
* Shard test_nn to reduce runtime for each test target
Closes https://github.com/pytorch/pytorch/pull/8793
The current test_nn would time out and be disabled in GreenWarden, and we need to have an option to split it up in order to pass the stress test. Right now GreenWarden roughly allows running 100 test cases in test_nn before timing out, and here we have an option to divide test_nn into 30 shards (with ~40 tests in each shard) to allow for some test suite growth in the future.
* Change default caffe2_streams_per_gpu to 1
* Remove IN_SANDCASTLE from common.py and test_nn.py
We prefer to disable the failing tests through Sandcastle UI instead.
* Add a new class for an updated prof_dag.proto
This diff contains:
- An updated prof_dag.proto that contains blob profiles.
- A class to deserialize this information (serialization is in a follow up diff)
- Update to separate profiling information from NeuralNet (and use it as part of the class above).
- Unit tests
* Lambdarank for SparseNN
This diff adds a lambda_rank_layer for SparseNN.
changes include
1) Adds support for multi sessions in c2 op
2) Adds support for two different loss functions in c2 op
3) Unit tests for op
* Revert D8586950: Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
This reverts commit 012220ed63eccc35659a57b31d16a3625da6317b
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [easy] A few fixups to multithread predictor benchmark
(1) support perf on T6 server
(2) remove dead code
* fix a bug about the map size
as title
* Fix reduce sum on in-place case.
Fix reduce sum on in-place case.
* [Warmup] Reland reverted diff Allow both offline incremental training and online training
Closes https://github.com/pytorch/pytorch/pull/8827
fix net transform integration test. Allow offline and online trainer to coexist D7802642.
* Add StoreHandlerNotAvailableException
Add an exception for a store that is not available or has been
deleted.
* Use exception handling for fault tolerance, missing KV store
Remove status blobs to communication ops so that exceptions propagate on
failure.
* [C2/D2][2/n]: Nonnegative-Constrained Optimization -- bounded grad proj
for simple bounded constrained optimization, incl non-negative box constraints.
* [GanH]: Adaptive Weighting with More Estimations
With implemented postivity optimization, we now learn adaptive weights with different
parameterizations.
This improves parameter estimation and training stability.
* Revert some changes for landing
* Remove AutoNoGIL in StorageSharing
* Temporarily disable net_tests
* Revert "[Caffe2] Force tensor inference checks to be triggered during testing"
This reverts commit 67ef05c22b2f71b4a489695384932f968384a2a4.
* Revert "Fix reduce sum on in-place case."
This reverts commit 6cb8a8e1b3db7b6d20941b0053e3f3836068eb64.
* Revert "Revert "Fix reduce sum on in-place case.""
This reverts commit 130a257c0893dc09f4bd6e6a45d112261807fd2c.
2018-06-26 21:55:48 +00:00
|
|
|
current = self.model.PairwiseSimilarity(
|
2018-01-26 19:28:32 +00:00
|
|
|
record, N * N)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (N * N, ))),
|
|
|
|
|
current
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
self.assertNetContainOps(train_init_net, [])
|
|
|
|
|
self.assertNetContainOps(train_net, [
|
|
|
|
|
OpSpec("BatchMatMul", None, None),
|
|
|
|
|
OpSpec("Flatten", None, None),
|
|
|
|
|
])
|
|
|
|
|
|
Update from Facebook (#8887)
* add opencl + fpga context
adds an opencl context inside caffe2/fb which can be used for fpga access
* [Caffe2] Force tensor inference checks to be triggered during testing
We've started to rely on TensorInference functions more for different analysis. This diff ensures that the TensorInference function's result matches what is expected from the definition of the operator.
* Enable building //caffe2:torch with @mode/opt
In @mode/opt, python runs out of a PAR, which breaks a lot of
assumptions in the code about where templates/ folders live relative
to __file__. Rather than introduce hacks with parutil, I simply turn
template_path into a parameter for all the relevant functions and
thread it through from the top level.
* [Caffe2] Fix cost models for DotProduct and Div. Update Tensor Inference for dot product
As title. DotProduct states that output is a 1-D tensor (https://caffe2.ai/docs/operators-catalogue.html#dotproduct) though code suggests it is either 0- or 1-D depending on inputs. TensorInference defined to support implementation.
* [SG-MoE] Add an option to make the experts NOT as components
* [nomnigraph] Rename and fixup convertToNeuralNetOperator API
This will make things a bit cleaner
* no longer symlink THNN.h and THCUNN.h
* forced decoder network (onnx export)
Closes https://github.com/pytorch/translate/pull/95
Add networks in ensemble_export.py to create a forced decoding network from PyTorch NMT checkpoints. This network takes an arbitrary numberized (source, target) pair and returns the model score for the translation, including penalties.
Vocabulary reduction networks are also supported, but note that target indices which are not in the possible_translation_tokens generated for the source input will be trea
* Revert schema change to fix production models
Revert schema change to fix production models
* MockLogDeviceReader - rebase on FIX
# Goal
1), Build a make_mock_log_device_reader using make_mock_reader
2), Replace the real log_device_reader here: https://fburl.com/raihwf1p
# Log by D8151734
Real log_device_reader:
```
I0529 20:29:05.373108 954994 tensor.h:839] Tensor print_net/log of type std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >. Dims: (): read_net/ParseOpenTrainingRow:0
I0529 20:29:05.373244 954994 tensor.h:839] Tensor read_net/ParseOpenTrainin
* [C2/D2][1/n]: Nonnegative-Constrained Optimization -- log barrier
implement log barrier as a regularization method
* Add teacher weight screening.
Add teacher weight sceening according to teacher labels. If teacher label is zero, we do not use the distill loss in the objective function.
* Add NormalizerContext
See task for more detail. This implementation is a copy of what exists for RegularizerContext except for how the parameters are defined in the model_definition thrift file.
I'll try an alternative implementation which overrides the default arguments of functions instead like for argscopes in tensorflow.
https://github.com/pytorch/pytorch/compare/master...MaximeBoucher:update-from-facebook-0939578c068c?expand=1
* Adding cosine similarity option in dot processor
Add pairwise cosine similarity option in dot product.
Add an option to concate dot product and cosine similarity.
Add test cases.
* [nomnigraph][redo] Concat elim for sparseNN
Same as D7962948, which was reverted because Operator Schema was not
defined
* [pytorch] Revert pytorch/pytorch#7918 'Release GIL when copying to shared memory', breaks ASAN
Revert this pytorch diff that breaks ASAN when running Filament in dev mode; in opt mode it gives "bad file descriptor" errors. Looks like a race when copying tensors to shared memory in multiple mp.Queue's (which spawn separate threads).
https://github.com/pytorch/pytorch/pull/7918/files
* [nomnigraph][mobile] Enable nomnigraph by default, use -Oz on nomnigraph related code to reduce code size
enables nomnigraph and reduces codesize
* [Warmup] Allow both offline incremental training and online training
Change plan name on saving side and reading side to support both training type
This diff depends on D8128530 and D8168651.
* Revert D7802642: [Warmup] Allow both offline incremental training and online training
This reverts commit afc213cf9b36cecf75333a788391c4d09f4afccc
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Add legacy grad logic to fix div op on old graphs.
Add legacy grad logic to fix div op on old graphs.
* Correctly propagate operator failures
Propagate errors from operators that throw exceptions and return false
* Revert D8374829: [caffe2][nomnigraph][redo] Concat elim for sparseNN
This reverts commit 6dda028c463e54bb5c32188bbbe9202107e188a5
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [Caffe2] Added extra_info to core.DeviceOption(), enforced extra_info to be inherited in scope.DeviceScope
extra_info is a newly defined field in DeviceOption proto. This diff added extra_info to the core.DeviceOption(). And, In scope.DeviceScope(), this diff enforce the new scope to inherit the extra_info from old scope.
* [opt] hgdirsync wasn't enabled, merge diverged code
Here's the damage, P59732616 basically xplat was left behind but had
the change from assert to CAFFE_ENFORCE
* OMP parallelism over RoIs for RoIAlign op
Simpler to parallelize over RoIs. Shouldn't affect other uses as it relies on
the number of OMP threads set during startup.
PR: https://github.com/pytorch/pytorch/pull/8562
* Use int64_t for shape in FillOps
to avoid overflow of int32
* Implement Rotated RoIAlign op
Based on Rotated RPNs as explained in https://arxiv.org/abs/1703.01086.
The idea is simple - orientation/angle is added as an RPN
anchor parameter and then the angle is further regressed similar to bbox
coords. There are some additional changes related to NMS and IoU, but besides
that it's a direct extension to Faster-RCNN. Further details in https://fb.quip.com/sZHlA1iMfWPZ.
RoIs are represented in [center_x, center_y, width, height, angle] format.
`angle` repre
* Rotated RoIAlign op CUDA forward implementation
CUDA forward impl for D8415490
* RoIAlignRotated op CUDA backward pass implementation
TSIA
* All remaining fixes to eliminate process_github.sh
Most of this diff has already been reviewed separately, except for the parts relating to _thnn/utils.py and _utils._internal.py
remove skipIf(True, 'Fbcode') line from process_github.sh
replace sed of cpp file with #ifdef to control cudnnDestroy use
undo sync-time deletion of .gitattributes, remove process_github.sh
switch to using _utils._internal rather than try-import-except
This diff also fixes the open-source bug where rebuilds have
* Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
Original commit changeset: 7707d2efe60e The original diff is backout becuase the online trainer package is backed out. This code would only work with new online trainer package
* [easy] improve error log in adagrad op
as title
* re-allow use of thnn_h_path
This fixes cffi usage in OSS
* [4/4] [tum] paralyzing layerNorm for GPU full sync
as title
* add compile=False to pytorch tests, remove hack with pyc
* Add shape and type inference for RowWiseArgMax operator
See title
* Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
This reverts commit 78167eeef0af16b60f72c82f9dcdda9b41b4dcbd
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [fix-flaky-test] mock_hive_reader_test flaky, because GlobalCounter collects local counts intervally
# Problem
`MockHiveReader` uses `GlobalCounter` to limit `max_examples`.
GlobalCounter on server node collect local counts from worker nodes every 1 sec.
This 1 sec delay makes it impossible to limit exactly to the `max_examples`, it will definitely exceed `max_examples`.
# Plan
Given,
```
Expected num_examples = max_examples + num_examples/sec (Read Speed) x 1 sec (GlobalCounter Sync Int
* [Caffe2] Fix FCGradient cost inference. Prevent overflow in cost inference
FCGradient missed a factor 2 in the `num_outputs == 3` case. Overflow was occurring with flop calculation for FC. Changed types to `uint64_t` to prevent future problems.
* Fix binary ops with empty inputs
Fix binary ops with empty inputs
* Support the filling of input blob with provided data
as title for Biz Integrity case
* Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
Original commit changeset: 30c55dd38816 Original diff is reverted due to introducing bad integration test. Fixed the integration test.
* [c2][easy] improve pack ops error loggings
as desc.
* Add ShapeTypeInference for LpNorm operator
As desc
* Shard test_nn to reduce runtime for each test target
Closes https://github.com/pytorch/pytorch/pull/8793
The current test_nn would time out and be disabled in GreenWarden, and we need to have an option to split it up in order to pass the stress test. Right now GreenWarden roughly allows running 100 test cases in test_nn before timing out, and here we have an option to divide test_nn into 30 shards (with ~40 tests in each shard) to allow for some test suite growth in the future.
* Change default caffe2_streams_per_gpu to 1
* Remove IN_SANDCASTLE from common.py and test_nn.py
We prefer to disable the failing tests through Sandcastle UI instead.
* Add a new class for an updated prof_dag.proto
This diff contains:
- An updated prof_dag.proto that contains blob profiles.
- A class to deserialize this information (serialization is in a follow up diff)
- Update to separate profiling information from NeuralNet (and use it as part of the class above).
- Unit tests
* Lambdarank for SparseNN
This diff adds a lambda_rank_layer for SparseNN.
changes include
1) Adds support for multi sessions in c2 op
2) Adds support for two different loss functions in c2 op
3) Unit tests for op
* Revert D8586950: Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
This reverts commit 012220ed63eccc35659a57b31d16a3625da6317b
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [easy] A few fixups to multithread predictor benchmark
(1) support perf on T6 server
(2) remove dead code
* fix a bug about the map size
as title
* Fix reduce sum on in-place case.
Fix reduce sum on in-place case.
* [Warmup] Reland reverted diff Allow both offline incremental training and online training
Closes https://github.com/pytorch/pytorch/pull/8827
fix net transform integration test. Allow offline and online trainer to coexist D7802642.
* Add StoreHandlerNotAvailableException
Add an exception for a store that is not available or has been
deleted.
* Use exception handling for fault tolerance, missing KV store
Remove status blobs to communication ops so that exceptions propagate on
failure.
* [C2/D2][2/n]: Nonnegative-Constrained Optimization -- bounded grad proj
for simple bounded constrained optimization, incl non-negative box constraints.
* [GanH]: Adaptive Weighting with More Estimations
With implemented postivity optimization, we now learn adaptive weights with different
parameterizations.
This improves parameter estimation and training stability.
* Revert some changes for landing
* Remove AutoNoGIL in StorageSharing
* Temporarily disable net_tests
* Revert "[Caffe2] Force tensor inference checks to be triggered during testing"
This reverts commit 67ef05c22b2f71b4a489695384932f968384a2a4.
* Revert "Fix reduce sum on in-place case."
This reverts commit 6cb8a8e1b3db7b6d20941b0053e3f3836068eb64.
* Revert "Revert "Fix reduce sum on in-place case.""
This reverts commit 130a257c0893dc09f4bd6e6a45d112261807fd2c.
2018-06-26 21:55:48 +00:00
|
|
|
def testPairwiseSimilarityWithXandYEmbeddings(self):
|
2018-01-26 19:28:32 +00:00
|
|
|
embedding_dim = 64
|
|
|
|
|
record = schema.NewRecord(self.model.net, schema.Struct(
|
|
|
|
|
('x_embeddings', schema.Scalar(
|
|
|
|
|
((np.float32, (5, embedding_dim)))
|
|
|
|
|
)),
|
|
|
|
|
('y_embeddings', schema.Scalar(
|
|
|
|
|
((np.float32, (6, embedding_dim)))
|
|
|
|
|
)),
|
|
|
|
|
))
|
Update from Facebook (#8887)
* add opencl + fpga context
adds an opencl context inside caffe2/fb which can be used for fpga access
* [Caffe2] Force tensor inference checks to be triggered during testing
We've started to rely on TensorInference functions more for different analysis. This diff ensures that the TensorInference function's result matches what is expected from the definition of the operator.
* Enable building //caffe2:torch with @mode/opt
In @mode/opt, python runs out of a PAR, which breaks a lot of
assumptions in the code about where templates/ folders live relative
to __file__. Rather than introduce hacks with parutil, I simply turn
template_path into a parameter for all the relevant functions and
thread it through from the top level.
* [Caffe2] Fix cost models for DotProduct and Div. Update Tensor Inference for dot product
As title. DotProduct states that output is a 1-D tensor (https://caffe2.ai/docs/operators-catalogue.html#dotproduct) though code suggests it is either 0- or 1-D depending on inputs. TensorInference defined to support implementation.
* [SG-MoE] Add an option to make the experts NOT as components
* [nomnigraph] Rename and fixup convertToNeuralNetOperator API
This will make things a bit cleaner
* no longer symlink THNN.h and THCUNN.h
* forced decoder network (onnx export)
Closes https://github.com/pytorch/translate/pull/95
Add networks in ensemble_export.py to create a forced decoding network from PyTorch NMT checkpoints. This network takes an arbitrary numberized (source, target) pair and returns the model score for the translation, including penalties.
Vocabulary reduction networks are also supported, but note that target indices which are not in the possible_translation_tokens generated for the source input will be trea
* Revert schema change to fix production models
Revert schema change to fix production models
* MockLogDeviceReader - rebase on FIX
# Goal
1), Build a make_mock_log_device_reader using make_mock_reader
2), Replace the real log_device_reader here: https://fburl.com/raihwf1p
# Log by D8151734
Real log_device_reader:
```
I0529 20:29:05.373108 954994 tensor.h:839] Tensor print_net/log of type std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >. Dims: (): read_net/ParseOpenTrainingRow:0
I0529 20:29:05.373244 954994 tensor.h:839] Tensor read_net/ParseOpenTrainin
* [C2/D2][1/n]: Nonnegative-Constrained Optimization -- log barrier
implement log barrier as a regularization method
* Add teacher weight screening.
Add teacher weight sceening according to teacher labels. If teacher label is zero, we do not use the distill loss in the objective function.
* Add NormalizerContext
See task for more detail. This implementation is a copy of what exists for RegularizerContext except for how the parameters are defined in the model_definition thrift file.
I'll try an alternative implementation which overrides the default arguments of functions instead like for argscopes in tensorflow.
https://github.com/pytorch/pytorch/compare/master...MaximeBoucher:update-from-facebook-0939578c068c?expand=1
* Adding cosine similarity option in dot processor
Add pairwise cosine similarity option in dot product.
Add an option to concate dot product and cosine similarity.
Add test cases.
* [nomnigraph][redo] Concat elim for sparseNN
Same as D7962948, which was reverted because Operator Schema was not
defined
* [pytorch] Revert pytorch/pytorch#7918 'Release GIL when copying to shared memory', breaks ASAN
Revert this pytorch diff that breaks ASAN when running Filament in dev mode; in opt mode it gives "bad file descriptor" errors. Looks like a race when copying tensors to shared memory in multiple mp.Queue's (which spawn separate threads).
https://github.com/pytorch/pytorch/pull/7918/files
* [nomnigraph][mobile] Enable nomnigraph by default, use -Oz on nomnigraph related code to reduce code size
enables nomnigraph and reduces codesize
* [Warmup] Allow both offline incremental training and online training
Change plan name on saving side and reading side to support both training type
This diff depends on D8128530 and D8168651.
* Revert D7802642: [Warmup] Allow both offline incremental training and online training
This reverts commit afc213cf9b36cecf75333a788391c4d09f4afccc
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Add legacy grad logic to fix div op on old graphs.
Add legacy grad logic to fix div op on old graphs.
* Correctly propagate operator failures
Propagate errors from operators that throw exceptions and return false
* Revert D8374829: [caffe2][nomnigraph][redo] Concat elim for sparseNN
This reverts commit 6dda028c463e54bb5c32188bbbe9202107e188a5
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [Caffe2] Added extra_info to core.DeviceOption(), enforced extra_info to be inherited in scope.DeviceScope
extra_info is a newly defined field in DeviceOption proto. This diff added extra_info to the core.DeviceOption(). And, In scope.DeviceScope(), this diff enforce the new scope to inherit the extra_info from old scope.
* [opt] hgdirsync wasn't enabled, merge diverged code
Here's the damage, P59732616 basically xplat was left behind but had
the change from assert to CAFFE_ENFORCE
* OMP parallelism over RoIs for RoIAlign op
Simpler to parallelize over RoIs. Shouldn't affect other uses as it relies on
the number of OMP threads set during startup.
PR: https://github.com/pytorch/pytorch/pull/8562
* Use int64_t for shape in FillOps
to avoid overflow of int32
* Implement Rotated RoIAlign op
Based on Rotated RPNs as explained in https://arxiv.org/abs/1703.01086.
The idea is simple - orientation/angle is added as an RPN
anchor parameter and then the angle is further regressed similar to bbox
coords. There are some additional changes related to NMS and IoU, but besides
that it's a direct extension to Faster-RCNN. Further details in https://fb.quip.com/sZHlA1iMfWPZ.
RoIs are represented in [center_x, center_y, width, height, angle] format.
`angle` repre
* Rotated RoIAlign op CUDA forward implementation
CUDA forward impl for D8415490
* RoIAlignRotated op CUDA backward pass implementation
TSIA
* All remaining fixes to eliminate process_github.sh
Most of this diff has already been reviewed separately, except for the parts relating to _thnn/utils.py and _utils._internal.py
remove skipIf(True, 'Fbcode') line from process_github.sh
replace sed of cpp file with #ifdef to control cudnnDestroy use
undo sync-time deletion of .gitattributes, remove process_github.sh
switch to using _utils._internal rather than try-import-except
This diff also fixes the open-source bug where rebuilds have
* Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
Original commit changeset: 7707d2efe60e The original diff is backout becuase the online trainer package is backed out. This code would only work with new online trainer package
* [easy] improve error log in adagrad op
as title
* re-allow use of thnn_h_path
This fixes cffi usage in OSS
* [4/4] [tum] paralyzing layerNorm for GPU full sync
as title
* add compile=False to pytorch tests, remove hack with pyc
* Add shape and type inference for RowWiseArgMax operator
See title
* Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
This reverts commit 78167eeef0af16b60f72c82f9dcdda9b41b4dcbd
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [fix-flaky-test] mock_hive_reader_test flaky, because GlobalCounter collects local counts intervally
# Problem
`MockHiveReader` uses `GlobalCounter` to limit `max_examples`.
GlobalCounter on server node collect local counts from worker nodes every 1 sec.
This 1 sec delay makes it impossible to limit exactly to the `max_examples`, it will definitely exceed `max_examples`.
# Plan
Given,
```
Expected num_examples = max_examples + num_examples/sec (Read Speed) x 1 sec (GlobalCounter Sync Int
* [Caffe2] Fix FCGradient cost inference. Prevent overflow in cost inference
FCGradient missed a factor 2 in the `num_outputs == 3` case. Overflow was occurring with flop calculation for FC. Changed types to `uint64_t` to prevent future problems.
* Fix binary ops with empty inputs
Fix binary ops with empty inputs
* Support the filling of input blob with provided data
as title for Biz Integrity case
* Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
Original commit changeset: 30c55dd38816 Original diff is reverted due to introducing bad integration test. Fixed the integration test.
* [c2][easy] improve pack ops error loggings
as desc.
* Add ShapeTypeInference for LpNorm operator
As desc
* Shard test_nn to reduce runtime for each test target
Closes https://github.com/pytorch/pytorch/pull/8793
The current test_nn would time out and be disabled in GreenWarden, and we need to have an option to split it up in order to pass the stress test. Right now GreenWarden roughly allows running 100 test cases in test_nn before timing out, and here we have an option to divide test_nn into 30 shards (with ~40 tests in each shard) to allow for some test suite growth in the future.
* Change default caffe2_streams_per_gpu to 1
* Remove IN_SANDCASTLE from common.py and test_nn.py
We prefer to disable the failing tests through Sandcastle UI instead.
* Add a new class for an updated prof_dag.proto
This diff contains:
- An updated prof_dag.proto that contains blob profiles.
- A class to deserialize this information (serialization is in a follow up diff)
- Update to separate profiling information from NeuralNet (and use it as part of the class above).
- Unit tests
* Lambdarank for SparseNN
This diff adds a lambda_rank_layer for SparseNN.
changes include
1) Adds support for multi sessions in c2 op
2) Adds support for two different loss functions in c2 op
3) Unit tests for op
* Revert D8586950: Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
This reverts commit 012220ed63eccc35659a57b31d16a3625da6317b
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [easy] A few fixups to multithread predictor benchmark
(1) support perf on T6 server
(2) remove dead code
* fix a bug about the map size
as title
* Fix reduce sum on in-place case.
Fix reduce sum on in-place case.
* [Warmup] Reland reverted diff Allow both offline incremental training and online training
Closes https://github.com/pytorch/pytorch/pull/8827
fix net transform integration test. Allow offline and online trainer to coexist D7802642.
* Add StoreHandlerNotAvailableException
Add an exception for a store that is not available or has been
deleted.
* Use exception handling for fault tolerance, missing KV store
Remove status blobs to communication ops so that exceptions propagate on
failure.
* [C2/D2][2/n]: Nonnegative-Constrained Optimization -- bounded grad proj
for simple bounded constrained optimization, incl non-negative box constraints.
* [GanH]: Adaptive Weighting with More Estimations
With implemented postivity optimization, we now learn adaptive weights with different
parameterizations.
This improves parameter estimation and training stability.
* Revert some changes for landing
* Remove AutoNoGIL in StorageSharing
* Temporarily disable net_tests
* Revert "[Caffe2] Force tensor inference checks to be triggered during testing"
This reverts commit 67ef05c22b2f71b4a489695384932f968384a2a4.
* Revert "Fix reduce sum on in-place case."
This reverts commit 6cb8a8e1b3db7b6d20941b0053e3f3836068eb64.
* Revert "Revert "Fix reduce sum on in-place case.""
This reverts commit 130a257c0893dc09f4bd6e6a45d112261807fd2c.
2018-06-26 21:55:48 +00:00
|
|
|
current = self.model.PairwiseSimilarity(
|
2018-01-26 19:28:32 +00:00
|
|
|
record, 5 * 6)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (5 * 6, ))),
|
|
|
|
|
current
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
self.assertNetContainOps(train_init_net, [])
|
|
|
|
|
self.assertNetContainOps(train_net, [
|
|
|
|
|
OpSpec("BatchMatMul", None, None),
|
|
|
|
|
OpSpec("Flatten", None, None),
|
|
|
|
|
])
|
|
|
|
|
|
Update from Facebook (#8887)
* add opencl + fpga context
adds an opencl context inside caffe2/fb which can be used for fpga access
* [Caffe2] Force tensor inference checks to be triggered during testing
We've started to rely on TensorInference functions more for different analysis. This diff ensures that the TensorInference function's result matches what is expected from the definition of the operator.
* Enable building //caffe2:torch with @mode/opt
In @mode/opt, python runs out of a PAR, which breaks a lot of
assumptions in the code about where templates/ folders live relative
to __file__. Rather than introduce hacks with parutil, I simply turn
template_path into a parameter for all the relevant functions and
thread it through from the top level.
* [Caffe2] Fix cost models for DotProduct and Div. Update Tensor Inference for dot product
As title. DotProduct states that output is a 1-D tensor (https://caffe2.ai/docs/operators-catalogue.html#dotproduct) though code suggests it is either 0- or 1-D depending on inputs. TensorInference defined to support implementation.
* [SG-MoE] Add an option to make the experts NOT as components
* [nomnigraph] Rename and fixup convertToNeuralNetOperator API
This will make things a bit cleaner
* no longer symlink THNN.h and THCUNN.h
* forced decoder network (onnx export)
Closes https://github.com/pytorch/translate/pull/95
Add networks in ensemble_export.py to create a forced decoding network from PyTorch NMT checkpoints. This network takes an arbitrary numberized (source, target) pair and returns the model score for the translation, including penalties.
Vocabulary reduction networks are also supported, but note that target indices which are not in the possible_translation_tokens generated for the source input will be trea
* Revert schema change to fix production models
Revert schema change to fix production models
* MockLogDeviceReader - rebase on FIX
# Goal
1), Build a make_mock_log_device_reader using make_mock_reader
2), Replace the real log_device_reader here: https://fburl.com/raihwf1p
# Log by D8151734
Real log_device_reader:
```
I0529 20:29:05.373108 954994 tensor.h:839] Tensor print_net/log of type std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >. Dims: (): read_net/ParseOpenTrainingRow:0
I0529 20:29:05.373244 954994 tensor.h:839] Tensor read_net/ParseOpenTrainin
* [C2/D2][1/n]: Nonnegative-Constrained Optimization -- log barrier
implement log barrier as a regularization method
* Add teacher weight screening.
Add teacher weight sceening according to teacher labels. If teacher label is zero, we do not use the distill loss in the objective function.
* Add NormalizerContext
See task for more detail. This implementation is a copy of what exists for RegularizerContext except for how the parameters are defined in the model_definition thrift file.
I'll try an alternative implementation which overrides the default arguments of functions instead like for argscopes in tensorflow.
https://github.com/pytorch/pytorch/compare/master...MaximeBoucher:update-from-facebook-0939578c068c?expand=1
* Adding cosine similarity option in dot processor
Add pairwise cosine similarity option in dot product.
Add an option to concate dot product and cosine similarity.
Add test cases.
* [nomnigraph][redo] Concat elim for sparseNN
Same as D7962948, which was reverted because Operator Schema was not
defined
* [pytorch] Revert pytorch/pytorch#7918 'Release GIL when copying to shared memory', breaks ASAN
Revert this pytorch diff that breaks ASAN when running Filament in dev mode; in opt mode it gives "bad file descriptor" errors. Looks like a race when copying tensors to shared memory in multiple mp.Queue's (which spawn separate threads).
https://github.com/pytorch/pytorch/pull/7918/files
* [nomnigraph][mobile] Enable nomnigraph by default, use -Oz on nomnigraph related code to reduce code size
enables nomnigraph and reduces codesize
* [Warmup] Allow both offline incremental training and online training
Change plan name on saving side and reading side to support both training type
This diff depends on D8128530 and D8168651.
* Revert D7802642: [Warmup] Allow both offline incremental training and online training
This reverts commit afc213cf9b36cecf75333a788391c4d09f4afccc
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Add legacy grad logic to fix div op on old graphs.
Add legacy grad logic to fix div op on old graphs.
* Correctly propagate operator failures
Propagate errors from operators that throw exceptions and return false
* Revert D8374829: [caffe2][nomnigraph][redo] Concat elim for sparseNN
This reverts commit 6dda028c463e54bb5c32188bbbe9202107e188a5
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [Caffe2] Added extra_info to core.DeviceOption(), enforced extra_info to be inherited in scope.DeviceScope
extra_info is a newly defined field in DeviceOption proto. This diff added extra_info to the core.DeviceOption(). And, In scope.DeviceScope(), this diff enforce the new scope to inherit the extra_info from old scope.
* [opt] hgdirsync wasn't enabled, merge diverged code
Here's the damage, P59732616 basically xplat was left behind but had
the change from assert to CAFFE_ENFORCE
* OMP parallelism over RoIs for RoIAlign op
Simpler to parallelize over RoIs. Shouldn't affect other uses as it relies on
the number of OMP threads set during startup.
PR: https://github.com/pytorch/pytorch/pull/8562
* Use int64_t for shape in FillOps
to avoid overflow of int32
* Implement Rotated RoIAlign op
Based on Rotated RPNs as explained in https://arxiv.org/abs/1703.01086.
The idea is simple - orientation/angle is added as an RPN
anchor parameter and then the angle is further regressed similar to bbox
coords. There are some additional changes related to NMS and IoU, but besides
that it's a direct extension to Faster-RCNN. Further details in https://fb.quip.com/sZHlA1iMfWPZ.
RoIs are represented in [center_x, center_y, width, height, angle] format.
`angle` repre
* Rotated RoIAlign op CUDA forward implementation
CUDA forward impl for D8415490
* RoIAlignRotated op CUDA backward pass implementation
TSIA
* All remaining fixes to eliminate process_github.sh
Most of this diff has already been reviewed separately, except for the parts relating to _thnn/utils.py and _utils._internal.py
remove skipIf(True, 'Fbcode') line from process_github.sh
replace sed of cpp file with #ifdef to control cudnnDestroy use
undo sync-time deletion of .gitattributes, remove process_github.sh
switch to using _utils._internal rather than try-import-except
This diff also fixes the open-source bug where rebuilds have
* Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
Original commit changeset: 7707d2efe60e The original diff is backout becuase the online trainer package is backed out. This code would only work with new online trainer package
* [easy] improve error log in adagrad op
as title
* re-allow use of thnn_h_path
This fixes cffi usage in OSS
* [4/4] [tum] paralyzing layerNorm for GPU full sync
as title
* add compile=False to pytorch tests, remove hack with pyc
* Add shape and type inference for RowWiseArgMax operator
See title
* Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
This reverts commit 78167eeef0af16b60f72c82f9dcdda9b41b4dcbd
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [fix-flaky-test] mock_hive_reader_test flaky, because GlobalCounter collects local counts intervally
# Problem
`MockHiveReader` uses `GlobalCounter` to limit `max_examples`.
GlobalCounter on server node collect local counts from worker nodes every 1 sec.
This 1 sec delay makes it impossible to limit exactly to the `max_examples`, it will definitely exceed `max_examples`.
# Plan
Given,
```
Expected num_examples = max_examples + num_examples/sec (Read Speed) x 1 sec (GlobalCounter Sync Int
* [Caffe2] Fix FCGradient cost inference. Prevent overflow in cost inference
FCGradient missed a factor 2 in the `num_outputs == 3` case. Overflow was occurring with flop calculation for FC. Changed types to `uint64_t` to prevent future problems.
* Fix binary ops with empty inputs
Fix binary ops with empty inputs
* Support the filling of input blob with provided data
as title for Biz Integrity case
* Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
Original commit changeset: 30c55dd38816 Original diff is reverted due to introducing bad integration test. Fixed the integration test.
* [c2][easy] improve pack ops error loggings
as desc.
* Add ShapeTypeInference for LpNorm operator
As desc
* Shard test_nn to reduce runtime for each test target
Closes https://github.com/pytorch/pytorch/pull/8793
The current test_nn would time out and be disabled in GreenWarden, and we need to have an option to split it up in order to pass the stress test. Right now GreenWarden roughly allows running 100 test cases in test_nn before timing out, and here we have an option to divide test_nn into 30 shards (with ~40 tests in each shard) to allow for some test suite growth in the future.
* Change default caffe2_streams_per_gpu to 1
* Remove IN_SANDCASTLE from common.py and test_nn.py
We prefer to disable the failing tests through Sandcastle UI instead.
* Add a new class for an updated prof_dag.proto
This diff contains:
- An updated prof_dag.proto that contains blob profiles.
- A class to deserialize this information (serialization is in a follow up diff)
- Update to separate profiling information from NeuralNet (and use it as part of the class above).
- Unit tests
* Lambdarank for SparseNN
This diff adds a lambda_rank_layer for SparseNN.
changes include
1) Adds support for multi sessions in c2 op
2) Adds support for two different loss functions in c2 op
3) Unit tests for op
* Revert D8586950: Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
This reverts commit 012220ed63eccc35659a57b31d16a3625da6317b
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [easy] A few fixups to multithread predictor benchmark
(1) support perf on T6 server
(2) remove dead code
* fix a bug about the map size
as title
* Fix reduce sum on in-place case.
Fix reduce sum on in-place case.
* [Warmup] Reland reverted diff Allow both offline incremental training and online training
Closes https://github.com/pytorch/pytorch/pull/8827
fix net transform integration test. Allow offline and online trainer to coexist D7802642.
* Add StoreHandlerNotAvailableException
Add an exception for a store that is not available or has been
deleted.
* Use exception handling for fault tolerance, missing KV store
Remove status blobs to communication ops so that exceptions propagate on
failure.
* [C2/D2][2/n]: Nonnegative-Constrained Optimization -- bounded grad proj
for simple bounded constrained optimization, incl non-negative box constraints.
* [GanH]: Adaptive Weighting with More Estimations
With implemented postivity optimization, we now learn adaptive weights with different
parameterizations.
This improves parameter estimation and training stability.
* Revert some changes for landing
* Remove AutoNoGIL in StorageSharing
* Temporarily disable net_tests
* Revert "[Caffe2] Force tensor inference checks to be triggered during testing"
This reverts commit 67ef05c22b2f71b4a489695384932f968384a2a4.
* Revert "Fix reduce sum on in-place case."
This reverts commit 6cb8a8e1b3db7b6d20941b0053e3f3836068eb64.
* Revert "Revert "Fix reduce sum on in-place case.""
This reverts commit 130a257c0893dc09f4bd6e6a45d112261807fd2c.
2018-06-26 21:55:48 +00:00
|
|
|
def testPairwiseSimilarityWithXandYEmbeddingsAndGather(self):
|
2018-01-26 19:28:32 +00:00
|
|
|
embedding_dim = 64
|
|
|
|
|
|
|
|
|
|
output_idx = [1, 3, 5]
|
|
|
|
|
output_idx_blob = self.model.add_global_constant(
|
2018-02-04 08:47:30 +00:00
|
|
|
str(self.model.net.NextScopedBlob('pairwise_dot_product_gather')),
|
2018-01-26 19:28:32 +00:00
|
|
|
output_idx,
|
|
|
|
|
dtype=np.int32,
|
|
|
|
|
)
|
|
|
|
|
indices_to_gather = schema.Scalar(
|
|
|
|
|
(np.int32, len(output_idx)),
|
|
|
|
|
output_idx_blob,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
record = schema.NewRecord(self.model.net, schema.Struct(
|
|
|
|
|
('x_embeddings', schema.Scalar(
|
|
|
|
|
((np.float32, (5, embedding_dim)))
|
|
|
|
|
)),
|
|
|
|
|
('y_embeddings', schema.Scalar(
|
|
|
|
|
((np.float32, (6, embedding_dim)))
|
|
|
|
|
)),
|
|
|
|
|
('indices_to_gather', indices_to_gather),
|
|
|
|
|
))
|
Update from Facebook (#8887)
* add opencl + fpga context
adds an opencl context inside caffe2/fb which can be used for fpga access
* [Caffe2] Force tensor inference checks to be triggered during testing
We've started to rely on TensorInference functions more for different analysis. This diff ensures that the TensorInference function's result matches what is expected from the definition of the operator.
* Enable building //caffe2:torch with @mode/opt
In @mode/opt, python runs out of a PAR, which breaks a lot of
assumptions in the code about where templates/ folders live relative
to __file__. Rather than introduce hacks with parutil, I simply turn
template_path into a parameter for all the relevant functions and
thread it through from the top level.
* [Caffe2] Fix cost models for DotProduct and Div. Update Tensor Inference for dot product
As title. DotProduct states that output is a 1-D tensor (https://caffe2.ai/docs/operators-catalogue.html#dotproduct) though code suggests it is either 0- or 1-D depending on inputs. TensorInference defined to support implementation.
* [SG-MoE] Add an option to make the experts NOT as components
* [nomnigraph] Rename and fixup convertToNeuralNetOperator API
This will make things a bit cleaner
* no longer symlink THNN.h and THCUNN.h
* forced decoder network (onnx export)
Closes https://github.com/pytorch/translate/pull/95
Add networks in ensemble_export.py to create a forced decoding network from PyTorch NMT checkpoints. This network takes an arbitrary numberized (source, target) pair and returns the model score for the translation, including penalties.
Vocabulary reduction networks are also supported, but note that target indices which are not in the possible_translation_tokens generated for the source input will be trea
* Revert schema change to fix production models
Revert schema change to fix production models
* MockLogDeviceReader - rebase on FIX
# Goal
1), Build a make_mock_log_device_reader using make_mock_reader
2), Replace the real log_device_reader here: https://fburl.com/raihwf1p
# Log by D8151734
Real log_device_reader:
```
I0529 20:29:05.373108 954994 tensor.h:839] Tensor print_net/log of type std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >. Dims: (): read_net/ParseOpenTrainingRow:0
I0529 20:29:05.373244 954994 tensor.h:839] Tensor read_net/ParseOpenTrainin
* [C2/D2][1/n]: Nonnegative-Constrained Optimization -- log barrier
implement log barrier as a regularization method
* Add teacher weight screening.
Add teacher weight sceening according to teacher labels. If teacher label is zero, we do not use the distill loss in the objective function.
* Add NormalizerContext
See task for more detail. This implementation is a copy of what exists for RegularizerContext except for how the parameters are defined in the model_definition thrift file.
I'll try an alternative implementation which overrides the default arguments of functions instead like for argscopes in tensorflow.
https://github.com/pytorch/pytorch/compare/master...MaximeBoucher:update-from-facebook-0939578c068c?expand=1
* Adding cosine similarity option in dot processor
Add pairwise cosine similarity option in dot product.
Add an option to concate dot product and cosine similarity.
Add test cases.
* [nomnigraph][redo] Concat elim for sparseNN
Same as D7962948, which was reverted because Operator Schema was not
defined
* [pytorch] Revert pytorch/pytorch#7918 'Release GIL when copying to shared memory', breaks ASAN
Revert this pytorch diff that breaks ASAN when running Filament in dev mode; in opt mode it gives "bad file descriptor" errors. Looks like a race when copying tensors to shared memory in multiple mp.Queue's (which spawn separate threads).
https://github.com/pytorch/pytorch/pull/7918/files
* [nomnigraph][mobile] Enable nomnigraph by default, use -Oz on nomnigraph related code to reduce code size
enables nomnigraph and reduces codesize
* [Warmup] Allow both offline incremental training and online training
Change plan name on saving side and reading side to support both training type
This diff depends on D8128530 and D8168651.
* Revert D7802642: [Warmup] Allow both offline incremental training and online training
This reverts commit afc213cf9b36cecf75333a788391c4d09f4afccc
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Add legacy grad logic to fix div op on old graphs.
Add legacy grad logic to fix div op on old graphs.
* Correctly propagate operator failures
Propagate errors from operators that throw exceptions and return false
* Revert D8374829: [caffe2][nomnigraph][redo] Concat elim for sparseNN
This reverts commit 6dda028c463e54bb5c32188bbbe9202107e188a5
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [Caffe2] Added extra_info to core.DeviceOption(), enforced extra_info to be inherited in scope.DeviceScope
extra_info is a newly defined field in DeviceOption proto. This diff added extra_info to the core.DeviceOption(). And, In scope.DeviceScope(), this diff enforce the new scope to inherit the extra_info from old scope.
* [opt] hgdirsync wasn't enabled, merge diverged code
Here's the damage, P59732616 basically xplat was left behind but had
the change from assert to CAFFE_ENFORCE
* OMP parallelism over RoIs for RoIAlign op
Simpler to parallelize over RoIs. Shouldn't affect other uses as it relies on
the number of OMP threads set during startup.
PR: https://github.com/pytorch/pytorch/pull/8562
* Use int64_t for shape in FillOps
to avoid overflow of int32
* Implement Rotated RoIAlign op
Based on Rotated RPNs as explained in https://arxiv.org/abs/1703.01086.
The idea is simple - orientation/angle is added as an RPN
anchor parameter and then the angle is further regressed similar to bbox
coords. There are some additional changes related to NMS and IoU, but besides
that it's a direct extension to Faster-RCNN. Further details in https://fb.quip.com/sZHlA1iMfWPZ.
RoIs are represented in [center_x, center_y, width, height, angle] format.
`angle` repre
* Rotated RoIAlign op CUDA forward implementation
CUDA forward impl for D8415490
* RoIAlignRotated op CUDA backward pass implementation
TSIA
* All remaining fixes to eliminate process_github.sh
Most of this diff has already been reviewed separately, except for the parts relating to _thnn/utils.py and _utils._internal.py
remove skipIf(True, 'Fbcode') line from process_github.sh
replace sed of cpp file with #ifdef to control cudnnDestroy use
undo sync-time deletion of .gitattributes, remove process_github.sh
switch to using _utils._internal rather than try-import-except
This diff also fixes the open-source bug where rebuilds have
* Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
Original commit changeset: 7707d2efe60e The original diff is backout becuase the online trainer package is backed out. This code would only work with new online trainer package
* [easy] improve error log in adagrad op
as title
* re-allow use of thnn_h_path
This fixes cffi usage in OSS
* [4/4] [tum] paralyzing layerNorm for GPU full sync
as title
* add compile=False to pytorch tests, remove hack with pyc
* Add shape and type inference for RowWiseArgMax operator
See title
* Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
This reverts commit 78167eeef0af16b60f72c82f9dcdda9b41b4dcbd
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [fix-flaky-test] mock_hive_reader_test flaky, because GlobalCounter collects local counts intervally
# Problem
`MockHiveReader` uses `GlobalCounter` to limit `max_examples`.
GlobalCounter on server node collect local counts from worker nodes every 1 sec.
This 1 sec delay makes it impossible to limit exactly to the `max_examples`, it will definitely exceed `max_examples`.
# Plan
Given,
```
Expected num_examples = max_examples + num_examples/sec (Read Speed) x 1 sec (GlobalCounter Sync Int
* [Caffe2] Fix FCGradient cost inference. Prevent overflow in cost inference
FCGradient missed a factor 2 in the `num_outputs == 3` case. Overflow was occurring with flop calculation for FC. Changed types to `uint64_t` to prevent future problems.
* Fix binary ops with empty inputs
Fix binary ops with empty inputs
* Support the filling of input blob with provided data
as title for Biz Integrity case
* Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
Original commit changeset: 30c55dd38816 Original diff is reverted due to introducing bad integration test. Fixed the integration test.
* [c2][easy] improve pack ops error loggings
as desc.
* Add ShapeTypeInference for LpNorm operator
As desc
* Shard test_nn to reduce runtime for each test target
Closes https://github.com/pytorch/pytorch/pull/8793
The current test_nn would time out and be disabled in GreenWarden, and we need to have an option to split it up in order to pass the stress test. Right now GreenWarden roughly allows running 100 test cases in test_nn before timing out, and here we have an option to divide test_nn into 30 shards (with ~40 tests in each shard) to allow for some test suite growth in the future.
* Change default caffe2_streams_per_gpu to 1
* Remove IN_SANDCASTLE from common.py and test_nn.py
We prefer to disable the failing tests through Sandcastle UI instead.
* Add a new class for an updated prof_dag.proto
This diff contains:
- An updated prof_dag.proto that contains blob profiles.
- A class to deserialize this information (serialization is in a follow up diff)
- Update to separate profiling information from NeuralNet (and use it as part of the class above).
- Unit tests
* Lambdarank for SparseNN
This diff adds a lambda_rank_layer for SparseNN.
changes include
1) Adds support for multi sessions in c2 op
2) Adds support for two different loss functions in c2 op
3) Unit tests for op
* Revert D8586950: Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
This reverts commit 012220ed63eccc35659a57b31d16a3625da6317b
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [easy] A few fixups to multithread predictor benchmark
(1) support perf on T6 server
(2) remove dead code
* fix a bug about the map size
as title
* Fix reduce sum on in-place case.
Fix reduce sum on in-place case.
* [Warmup] Reland reverted diff Allow both offline incremental training and online training
Closes https://github.com/pytorch/pytorch/pull/8827
fix net transform integration test. Allow offline and online trainer to coexist D7802642.
* Add StoreHandlerNotAvailableException
Add an exception for a store that is not available or has been
deleted.
* Use exception handling for fault tolerance, missing KV store
Remove status blobs to communication ops so that exceptions propagate on
failure.
* [C2/D2][2/n]: Nonnegative-Constrained Optimization -- bounded grad proj
for simple bounded constrained optimization, incl non-negative box constraints.
* [GanH]: Adaptive Weighting with More Estimations
With implemented postivity optimization, we now learn adaptive weights with different
parameterizations.
This improves parameter estimation and training stability.
* Revert some changes for landing
* Remove AutoNoGIL in StorageSharing
* Temporarily disable net_tests
* Revert "[Caffe2] Force tensor inference checks to be triggered during testing"
This reverts commit 67ef05c22b2f71b4a489695384932f968384a2a4.
* Revert "Fix reduce sum on in-place case."
This reverts commit 6cb8a8e1b3db7b6d20941b0053e3f3836068eb64.
* Revert "Revert "Fix reduce sum on in-place case.""
This reverts commit 130a257c0893dc09f4bd6e6a45d112261807fd2c.
2018-06-26 21:55:48 +00:00
|
|
|
current = self.model.PairwiseSimilarity(
|
2018-01-26 19:28:32 +00:00
|
|
|
record, len(output_idx))
|
|
|
|
|
|
|
|
|
|
# This assert is not necessary,
|
Update from Facebook (#8887)
* add opencl + fpga context
adds an opencl context inside caffe2/fb which can be used for fpga access
* [Caffe2] Force tensor inference checks to be triggered during testing
We've started to rely on TensorInference functions more for different analysis. This diff ensures that the TensorInference function's result matches what is expected from the definition of the operator.
* Enable building //caffe2:torch with @mode/opt
In @mode/opt, python runs out of a PAR, which breaks a lot of
assumptions in the code about where templates/ folders live relative
to __file__. Rather than introduce hacks with parutil, I simply turn
template_path into a parameter for all the relevant functions and
thread it through from the top level.
* [Caffe2] Fix cost models for DotProduct and Div. Update Tensor Inference for dot product
As title. DotProduct states that output is a 1-D tensor (https://caffe2.ai/docs/operators-catalogue.html#dotproduct) though code suggests it is either 0- or 1-D depending on inputs. TensorInference defined to support implementation.
* [SG-MoE] Add an option to make the experts NOT as components
* [nomnigraph] Rename and fixup convertToNeuralNetOperator API
This will make things a bit cleaner
* no longer symlink THNN.h and THCUNN.h
* forced decoder network (onnx export)
Closes https://github.com/pytorch/translate/pull/95
Add networks in ensemble_export.py to create a forced decoding network from PyTorch NMT checkpoints. This network takes an arbitrary numberized (source, target) pair and returns the model score for the translation, including penalties.
Vocabulary reduction networks are also supported, but note that target indices which are not in the possible_translation_tokens generated for the source input will be trea
* Revert schema change to fix production models
Revert schema change to fix production models
* MockLogDeviceReader - rebase on FIX
# Goal
1), Build a make_mock_log_device_reader using make_mock_reader
2), Replace the real log_device_reader here: https://fburl.com/raihwf1p
# Log by D8151734
Real log_device_reader:
```
I0529 20:29:05.373108 954994 tensor.h:839] Tensor print_net/log of type std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >. Dims: (): read_net/ParseOpenTrainingRow:0
I0529 20:29:05.373244 954994 tensor.h:839] Tensor read_net/ParseOpenTrainin
* [C2/D2][1/n]: Nonnegative-Constrained Optimization -- log barrier
implement log barrier as a regularization method
* Add teacher weight screening.
Add teacher weight sceening according to teacher labels. If teacher label is zero, we do not use the distill loss in the objective function.
* Add NormalizerContext
See task for more detail. This implementation is a copy of what exists for RegularizerContext except for how the parameters are defined in the model_definition thrift file.
I'll try an alternative implementation which overrides the default arguments of functions instead like for argscopes in tensorflow.
https://github.com/pytorch/pytorch/compare/master...MaximeBoucher:update-from-facebook-0939578c068c?expand=1
* Adding cosine similarity option in dot processor
Add pairwise cosine similarity option in dot product.
Add an option to concate dot product and cosine similarity.
Add test cases.
* [nomnigraph][redo] Concat elim for sparseNN
Same as D7962948, which was reverted because Operator Schema was not
defined
* [pytorch] Revert pytorch/pytorch#7918 'Release GIL when copying to shared memory', breaks ASAN
Revert this pytorch diff that breaks ASAN when running Filament in dev mode; in opt mode it gives "bad file descriptor" errors. Looks like a race when copying tensors to shared memory in multiple mp.Queue's (which spawn separate threads).
https://github.com/pytorch/pytorch/pull/7918/files
* [nomnigraph][mobile] Enable nomnigraph by default, use -Oz on nomnigraph related code to reduce code size
enables nomnigraph and reduces codesize
* [Warmup] Allow both offline incremental training and online training
Change plan name on saving side and reading side to support both training type
This diff depends on D8128530 and D8168651.
* Revert D7802642: [Warmup] Allow both offline incremental training and online training
This reverts commit afc213cf9b36cecf75333a788391c4d09f4afccc
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Add legacy grad logic to fix div op on old graphs.
Add legacy grad logic to fix div op on old graphs.
* Correctly propagate operator failures
Propagate errors from operators that throw exceptions and return false
* Revert D8374829: [caffe2][nomnigraph][redo] Concat elim for sparseNN
This reverts commit 6dda028c463e54bb5c32188bbbe9202107e188a5
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [Caffe2] Added extra_info to core.DeviceOption(), enforced extra_info to be inherited in scope.DeviceScope
extra_info is a newly defined field in DeviceOption proto. This diff added extra_info to the core.DeviceOption(). And, In scope.DeviceScope(), this diff enforce the new scope to inherit the extra_info from old scope.
* [opt] hgdirsync wasn't enabled, merge diverged code
Here's the damage, P59732616 basically xplat was left behind but had
the change from assert to CAFFE_ENFORCE
* OMP parallelism over RoIs for RoIAlign op
Simpler to parallelize over RoIs. Shouldn't affect other uses as it relies on
the number of OMP threads set during startup.
PR: https://github.com/pytorch/pytorch/pull/8562
* Use int64_t for shape in FillOps
to avoid overflow of int32
* Implement Rotated RoIAlign op
Based on Rotated RPNs as explained in https://arxiv.org/abs/1703.01086.
The idea is simple - orientation/angle is added as an RPN
anchor parameter and then the angle is further regressed similar to bbox
coords. There are some additional changes related to NMS and IoU, but besides
that it's a direct extension to Faster-RCNN. Further details in https://fb.quip.com/sZHlA1iMfWPZ.
RoIs are represented in [center_x, center_y, width, height, angle] format.
`angle` repre
* Rotated RoIAlign op CUDA forward implementation
CUDA forward impl for D8415490
* RoIAlignRotated op CUDA backward pass implementation
TSIA
* All remaining fixes to eliminate process_github.sh
Most of this diff has already been reviewed separately, except for the parts relating to _thnn/utils.py and _utils._internal.py
remove skipIf(True, 'Fbcode') line from process_github.sh
replace sed of cpp file with #ifdef to control cudnnDestroy use
undo sync-time deletion of .gitattributes, remove process_github.sh
switch to using _utils._internal rather than try-import-except
This diff also fixes the open-source bug where rebuilds have
* Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
Original commit changeset: 7707d2efe60e The original diff is backout becuase the online trainer package is backed out. This code would only work with new online trainer package
* [easy] improve error log in adagrad op
as title
* re-allow use of thnn_h_path
This fixes cffi usage in OSS
* [4/4] [tum] paralyzing layerNorm for GPU full sync
as title
* add compile=False to pytorch tests, remove hack with pyc
* Add shape and type inference for RowWiseArgMax operator
See title
* Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
This reverts commit 78167eeef0af16b60f72c82f9dcdda9b41b4dcbd
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [fix-flaky-test] mock_hive_reader_test flaky, because GlobalCounter collects local counts intervally
# Problem
`MockHiveReader` uses `GlobalCounter` to limit `max_examples`.
GlobalCounter on server node collect local counts from worker nodes every 1 sec.
This 1 sec delay makes it impossible to limit exactly to the `max_examples`, it will definitely exceed `max_examples`.
# Plan
Given,
```
Expected num_examples = max_examples + num_examples/sec (Read Speed) x 1 sec (GlobalCounter Sync Int
* [Caffe2] Fix FCGradient cost inference. Prevent overflow in cost inference
FCGradient missed a factor 2 in the `num_outputs == 3` case. Overflow was occurring with flop calculation for FC. Changed types to `uint64_t` to prevent future problems.
* Fix binary ops with empty inputs
Fix binary ops with empty inputs
* Support the filling of input blob with provided data
as title for Biz Integrity case
* Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
Original commit changeset: 30c55dd38816 Original diff is reverted due to introducing bad integration test. Fixed the integration test.
* [c2][easy] improve pack ops error loggings
as desc.
* Add ShapeTypeInference for LpNorm operator
As desc
* Shard test_nn to reduce runtime for each test target
Closes https://github.com/pytorch/pytorch/pull/8793
The current test_nn would time out and be disabled in GreenWarden, and we need to have an option to split it up in order to pass the stress test. Right now GreenWarden roughly allows running 100 test cases in test_nn before timing out, and here we have an option to divide test_nn into 30 shards (with ~40 tests in each shard) to allow for some test suite growth in the future.
* Change default caffe2_streams_per_gpu to 1
* Remove IN_SANDCASTLE from common.py and test_nn.py
We prefer to disable the failing tests through Sandcastle UI instead.
* Add a new class for an updated prof_dag.proto
This diff contains:
- An updated prof_dag.proto that contains blob profiles.
- A class to deserialize this information (serialization is in a follow up diff)
- Update to separate profiling information from NeuralNet (and use it as part of the class above).
- Unit tests
* Lambdarank for SparseNN
This diff adds a lambda_rank_layer for SparseNN.
changes include
1) Adds support for multi sessions in c2 op
2) Adds support for two different loss functions in c2 op
3) Unit tests for op
* Revert D8586950: Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
This reverts commit 012220ed63eccc35659a57b31d16a3625da6317b
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [easy] A few fixups to multithread predictor benchmark
(1) support perf on T6 server
(2) remove dead code
* fix a bug about the map size
as title
* Fix reduce sum on in-place case.
Fix reduce sum on in-place case.
* [Warmup] Reland reverted diff Allow both offline incremental training and online training
Closes https://github.com/pytorch/pytorch/pull/8827
fix net transform integration test. Allow offline and online trainer to coexist D7802642.
* Add StoreHandlerNotAvailableException
Add an exception for a store that is not available or has been
deleted.
* Use exception handling for fault tolerance, missing KV store
Remove status blobs to communication ops so that exceptions propagate on
failure.
* [C2/D2][2/n]: Nonnegative-Constrained Optimization -- bounded grad proj
for simple bounded constrained optimization, incl non-negative box constraints.
* [GanH]: Adaptive Weighting with More Estimations
With implemented postivity optimization, we now learn adaptive weights with different
parameterizations.
This improves parameter estimation and training stability.
* Revert some changes for landing
* Remove AutoNoGIL in StorageSharing
* Temporarily disable net_tests
* Revert "[Caffe2] Force tensor inference checks to be triggered during testing"
This reverts commit 67ef05c22b2f71b4a489695384932f968384a2a4.
* Revert "Fix reduce sum on in-place case."
This reverts commit 6cb8a8e1b3db7b6d20941b0053e3f3836068eb64.
* Revert "Revert "Fix reduce sum on in-place case.""
This reverts commit 130a257c0893dc09f4bd6e6a45d112261807fd2c.
2018-06-26 21:55:48 +00:00
|
|
|
# output size is passed into PairwiseSimilarity
|
2018-01-26 19:28:32 +00:00
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (len(output_idx), ))),
|
|
|
|
|
current
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
self.assertNetContainOps(train_init_net, [])
|
|
|
|
|
self.assertNetContainOps(train_net, [
|
|
|
|
|
OpSpec("BatchMatMul", None, None),
|
|
|
|
|
OpSpec("Flatten", None, None),
|
|
|
|
|
OpSpec("BatchGather", None, None),
|
|
|
|
|
])
|
|
|
|
|
|
Update from Facebook (#8887)
* add opencl + fpga context
adds an opencl context inside caffe2/fb which can be used for fpga access
* [Caffe2] Force tensor inference checks to be triggered during testing
We've started to rely on TensorInference functions more for different analysis. This diff ensures that the TensorInference function's result matches what is expected from the definition of the operator.
* Enable building //caffe2:torch with @mode/opt
In @mode/opt, python runs out of a PAR, which breaks a lot of
assumptions in the code about where templates/ folders live relative
to __file__. Rather than introduce hacks with parutil, I simply turn
template_path into a parameter for all the relevant functions and
thread it through from the top level.
* [Caffe2] Fix cost models for DotProduct and Div. Update Tensor Inference for dot product
As title. DotProduct states that output is a 1-D tensor (https://caffe2.ai/docs/operators-catalogue.html#dotproduct) though code suggests it is either 0- or 1-D depending on inputs. TensorInference defined to support implementation.
* [SG-MoE] Add an option to make the experts NOT as components
* [nomnigraph] Rename and fixup convertToNeuralNetOperator API
This will make things a bit cleaner
* no longer symlink THNN.h and THCUNN.h
* forced decoder network (onnx export)
Closes https://github.com/pytorch/translate/pull/95
Add networks in ensemble_export.py to create a forced decoding network from PyTorch NMT checkpoints. This network takes an arbitrary numberized (source, target) pair and returns the model score for the translation, including penalties.
Vocabulary reduction networks are also supported, but note that target indices which are not in the possible_translation_tokens generated for the source input will be trea
* Revert schema change to fix production models
Revert schema change to fix production models
* MockLogDeviceReader - rebase on FIX
# Goal
1), Build a make_mock_log_device_reader using make_mock_reader
2), Replace the real log_device_reader here: https://fburl.com/raihwf1p
# Log by D8151734
Real log_device_reader:
```
I0529 20:29:05.373108 954994 tensor.h:839] Tensor print_net/log of type std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >. Dims: (): read_net/ParseOpenTrainingRow:0
I0529 20:29:05.373244 954994 tensor.h:839] Tensor read_net/ParseOpenTrainin
* [C2/D2][1/n]: Nonnegative-Constrained Optimization -- log barrier
implement log barrier as a regularization method
* Add teacher weight screening.
Add teacher weight sceening according to teacher labels. If teacher label is zero, we do not use the distill loss in the objective function.
* Add NormalizerContext
See task for more detail. This implementation is a copy of what exists for RegularizerContext except for how the parameters are defined in the model_definition thrift file.
I'll try an alternative implementation which overrides the default arguments of functions instead like for argscopes in tensorflow.
https://github.com/pytorch/pytorch/compare/master...MaximeBoucher:update-from-facebook-0939578c068c?expand=1
* Adding cosine similarity option in dot processor
Add pairwise cosine similarity option in dot product.
Add an option to concate dot product and cosine similarity.
Add test cases.
* [nomnigraph][redo] Concat elim for sparseNN
Same as D7962948, which was reverted because Operator Schema was not
defined
* [pytorch] Revert pytorch/pytorch#7918 'Release GIL when copying to shared memory', breaks ASAN
Revert this pytorch diff that breaks ASAN when running Filament in dev mode; in opt mode it gives "bad file descriptor" errors. Looks like a race when copying tensors to shared memory in multiple mp.Queue's (which spawn separate threads).
https://github.com/pytorch/pytorch/pull/7918/files
* [nomnigraph][mobile] Enable nomnigraph by default, use -Oz on nomnigraph related code to reduce code size
enables nomnigraph and reduces codesize
* [Warmup] Allow both offline incremental training and online training
Change plan name on saving side and reading side to support both training type
This diff depends on D8128530 and D8168651.
* Revert D7802642: [Warmup] Allow both offline incremental training and online training
This reverts commit afc213cf9b36cecf75333a788391c4d09f4afccc
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Add legacy grad logic to fix div op on old graphs.
Add legacy grad logic to fix div op on old graphs.
* Correctly propagate operator failures
Propagate errors from operators that throw exceptions and return false
* Revert D8374829: [caffe2][nomnigraph][redo] Concat elim for sparseNN
This reverts commit 6dda028c463e54bb5c32188bbbe9202107e188a5
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [Caffe2] Added extra_info to core.DeviceOption(), enforced extra_info to be inherited in scope.DeviceScope
extra_info is a newly defined field in DeviceOption proto. This diff added extra_info to the core.DeviceOption(). And, In scope.DeviceScope(), this diff enforce the new scope to inherit the extra_info from old scope.
* [opt] hgdirsync wasn't enabled, merge diverged code
Here's the damage, P59732616 basically xplat was left behind but had
the change from assert to CAFFE_ENFORCE
* OMP parallelism over RoIs for RoIAlign op
Simpler to parallelize over RoIs. Shouldn't affect other uses as it relies on
the number of OMP threads set during startup.
PR: https://github.com/pytorch/pytorch/pull/8562
* Use int64_t for shape in FillOps
to avoid overflow of int32
* Implement Rotated RoIAlign op
Based on Rotated RPNs as explained in https://arxiv.org/abs/1703.01086.
The idea is simple - orientation/angle is added as an RPN
anchor parameter and then the angle is further regressed similar to bbox
coords. There are some additional changes related to NMS and IoU, but besides
that it's a direct extension to Faster-RCNN. Further details in https://fb.quip.com/sZHlA1iMfWPZ.
RoIs are represented in [center_x, center_y, width, height, angle] format.
`angle` repre
* Rotated RoIAlign op CUDA forward implementation
CUDA forward impl for D8415490
* RoIAlignRotated op CUDA backward pass implementation
TSIA
* All remaining fixes to eliminate process_github.sh
Most of this diff has already been reviewed separately, except for the parts relating to _thnn/utils.py and _utils._internal.py
remove skipIf(True, 'Fbcode') line from process_github.sh
replace sed of cpp file with #ifdef to control cudnnDestroy use
undo sync-time deletion of .gitattributes, remove process_github.sh
switch to using _utils._internal rather than try-import-except
This diff also fixes the open-source bug where rebuilds have
* Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
Original commit changeset: 7707d2efe60e The original diff is backout becuase the online trainer package is backed out. This code would only work with new online trainer package
* [easy] improve error log in adagrad op
as title
* re-allow use of thnn_h_path
This fixes cffi usage in OSS
* [4/4] [tum] paralyzing layerNorm for GPU full sync
as title
* add compile=False to pytorch tests, remove hack with pyc
* Add shape and type inference for RowWiseArgMax operator
See title
* Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
This reverts commit 78167eeef0af16b60f72c82f9dcdda9b41b4dcbd
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [fix-flaky-test] mock_hive_reader_test flaky, because GlobalCounter collects local counts intervally
# Problem
`MockHiveReader` uses `GlobalCounter` to limit `max_examples`.
GlobalCounter on server node collect local counts from worker nodes every 1 sec.
This 1 sec delay makes it impossible to limit exactly to the `max_examples`, it will definitely exceed `max_examples`.
# Plan
Given,
```
Expected num_examples = max_examples + num_examples/sec (Read Speed) x 1 sec (GlobalCounter Sync Int
* [Caffe2] Fix FCGradient cost inference. Prevent overflow in cost inference
FCGradient missed a factor 2 in the `num_outputs == 3` case. Overflow was occurring with flop calculation for FC. Changed types to `uint64_t` to prevent future problems.
* Fix binary ops with empty inputs
Fix binary ops with empty inputs
* Support the filling of input blob with provided data
as title for Biz Integrity case
* Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
Original commit changeset: 30c55dd38816 Original diff is reverted due to introducing bad integration test. Fixed the integration test.
* [c2][easy] improve pack ops error loggings
as desc.
* Add ShapeTypeInference for LpNorm operator
As desc
* Shard test_nn to reduce runtime for each test target
Closes https://github.com/pytorch/pytorch/pull/8793
The current test_nn would time out and be disabled in GreenWarden, and we need to have an option to split it up in order to pass the stress test. Right now GreenWarden roughly allows running 100 test cases in test_nn before timing out, and here we have an option to divide test_nn into 30 shards (with ~40 tests in each shard) to allow for some test suite growth in the future.
* Change default caffe2_streams_per_gpu to 1
* Remove IN_SANDCASTLE from common.py and test_nn.py
We prefer to disable the failing tests through Sandcastle UI instead.
* Add a new class for an updated prof_dag.proto
This diff contains:
- An updated prof_dag.proto that contains blob profiles.
- A class to deserialize this information (serialization is in a follow up diff)
- Update to separate profiling information from NeuralNet (and use it as part of the class above).
- Unit tests
* Lambdarank for SparseNN
This diff adds a lambda_rank_layer for SparseNN.
changes include
1) Adds support for multi sessions in c2 op
2) Adds support for two different loss functions in c2 op
3) Unit tests for op
* Revert D8586950: Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
This reverts commit 012220ed63eccc35659a57b31d16a3625da6317b
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [easy] A few fixups to multithread predictor benchmark
(1) support perf on T6 server
(2) remove dead code
* fix a bug about the map size
as title
* Fix reduce sum on in-place case.
Fix reduce sum on in-place case.
* [Warmup] Reland reverted diff Allow both offline incremental training and online training
Closes https://github.com/pytorch/pytorch/pull/8827
fix net transform integration test. Allow offline and online trainer to coexist D7802642.
* Add StoreHandlerNotAvailableException
Add an exception for a store that is not available or has been
deleted.
* Use exception handling for fault tolerance, missing KV store
Remove status blobs to communication ops so that exceptions propagate on
failure.
* [C2/D2][2/n]: Nonnegative-Constrained Optimization -- bounded grad proj
for simple bounded constrained optimization, incl non-negative box constraints.
* [GanH]: Adaptive Weighting with More Estimations
With implemented postivity optimization, we now learn adaptive weights with different
parameterizations.
This improves parameter estimation and training stability.
* Revert some changes for landing
* Remove AutoNoGIL in StorageSharing
* Temporarily disable net_tests
* Revert "[Caffe2] Force tensor inference checks to be triggered during testing"
This reverts commit 67ef05c22b2f71b4a489695384932f968384a2a4.
* Revert "Fix reduce sum on in-place case."
This reverts commit 6cb8a8e1b3db7b6d20941b0053e3f3836068eb64.
* Revert "Revert "Fix reduce sum on in-place case.""
This reverts commit 130a257c0893dc09f4bd6e6a45d112261807fd2c.
2018-06-26 21:55:48 +00:00
|
|
|
def testPairwiseSimilarityIncorrectInput(self):
|
2018-01-26 19:28:32 +00:00
|
|
|
embedding_dim = 64
|
|
|
|
|
record = schema.NewRecord(self.model.net, schema.Struct(
|
|
|
|
|
('x_embeddings', schema.Scalar(
|
|
|
|
|
((np.float32, (5, embedding_dim)))
|
|
|
|
|
)),
|
|
|
|
|
))
|
|
|
|
|
with self.assertRaises(AssertionError):
|
Update from Facebook (#8887)
* add opencl + fpga context
adds an opencl context inside caffe2/fb which can be used for fpga access
* [Caffe2] Force tensor inference checks to be triggered during testing
We've started to rely on TensorInference functions more for different analysis. This diff ensures that the TensorInference function's result matches what is expected from the definition of the operator.
* Enable building //caffe2:torch with @mode/opt
In @mode/opt, python runs out of a PAR, which breaks a lot of
assumptions in the code about where templates/ folders live relative
to __file__. Rather than introduce hacks with parutil, I simply turn
template_path into a parameter for all the relevant functions and
thread it through from the top level.
* [Caffe2] Fix cost models for DotProduct and Div. Update Tensor Inference for dot product
As title. DotProduct states that output is a 1-D tensor (https://caffe2.ai/docs/operators-catalogue.html#dotproduct) though code suggests it is either 0- or 1-D depending on inputs. TensorInference defined to support implementation.
* [SG-MoE] Add an option to make the experts NOT as components
* [nomnigraph] Rename and fixup convertToNeuralNetOperator API
This will make things a bit cleaner
* no longer symlink THNN.h and THCUNN.h
* forced decoder network (onnx export)
Closes https://github.com/pytorch/translate/pull/95
Add networks in ensemble_export.py to create a forced decoding network from PyTorch NMT checkpoints. This network takes an arbitrary numberized (source, target) pair and returns the model score for the translation, including penalties.
Vocabulary reduction networks are also supported, but note that target indices which are not in the possible_translation_tokens generated for the source input will be trea
* Revert schema change to fix production models
Revert schema change to fix production models
* MockLogDeviceReader - rebase on FIX
# Goal
1), Build a make_mock_log_device_reader using make_mock_reader
2), Replace the real log_device_reader here: https://fburl.com/raihwf1p
# Log by D8151734
Real log_device_reader:
```
I0529 20:29:05.373108 954994 tensor.h:839] Tensor print_net/log of type std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >. Dims: (): read_net/ParseOpenTrainingRow:0
I0529 20:29:05.373244 954994 tensor.h:839] Tensor read_net/ParseOpenTrainin
* [C2/D2][1/n]: Nonnegative-Constrained Optimization -- log barrier
implement log barrier as a regularization method
* Add teacher weight screening.
Add teacher weight sceening according to teacher labels. If teacher label is zero, we do not use the distill loss in the objective function.
* Add NormalizerContext
See task for more detail. This implementation is a copy of what exists for RegularizerContext except for how the parameters are defined in the model_definition thrift file.
I'll try an alternative implementation which overrides the default arguments of functions instead like for argscopes in tensorflow.
https://github.com/pytorch/pytorch/compare/master...MaximeBoucher:update-from-facebook-0939578c068c?expand=1
* Adding cosine similarity option in dot processor
Add pairwise cosine similarity option in dot product.
Add an option to concate dot product and cosine similarity.
Add test cases.
* [nomnigraph][redo] Concat elim for sparseNN
Same as D7962948, which was reverted because Operator Schema was not
defined
* [pytorch] Revert pytorch/pytorch#7918 'Release GIL when copying to shared memory', breaks ASAN
Revert this pytorch diff that breaks ASAN when running Filament in dev mode; in opt mode it gives "bad file descriptor" errors. Looks like a race when copying tensors to shared memory in multiple mp.Queue's (which spawn separate threads).
https://github.com/pytorch/pytorch/pull/7918/files
* [nomnigraph][mobile] Enable nomnigraph by default, use -Oz on nomnigraph related code to reduce code size
enables nomnigraph and reduces codesize
* [Warmup] Allow both offline incremental training and online training
Change plan name on saving side and reading side to support both training type
This diff depends on D8128530 and D8168651.
* Revert D7802642: [Warmup] Allow both offline incremental training and online training
This reverts commit afc213cf9b36cecf75333a788391c4d09f4afccc
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Add legacy grad logic to fix div op on old graphs.
Add legacy grad logic to fix div op on old graphs.
* Correctly propagate operator failures
Propagate errors from operators that throw exceptions and return false
* Revert D8374829: [caffe2][nomnigraph][redo] Concat elim for sparseNN
This reverts commit 6dda028c463e54bb5c32188bbbe9202107e188a5
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [Caffe2] Added extra_info to core.DeviceOption(), enforced extra_info to be inherited in scope.DeviceScope
extra_info is a newly defined field in DeviceOption proto. This diff added extra_info to the core.DeviceOption(). And, In scope.DeviceScope(), this diff enforce the new scope to inherit the extra_info from old scope.
* [opt] hgdirsync wasn't enabled, merge diverged code
Here's the damage, P59732616 basically xplat was left behind but had
the change from assert to CAFFE_ENFORCE
* OMP parallelism over RoIs for RoIAlign op
Simpler to parallelize over RoIs. Shouldn't affect other uses as it relies on
the number of OMP threads set during startup.
PR: https://github.com/pytorch/pytorch/pull/8562
* Use int64_t for shape in FillOps
to avoid overflow of int32
* Implement Rotated RoIAlign op
Based on Rotated RPNs as explained in https://arxiv.org/abs/1703.01086.
The idea is simple - orientation/angle is added as an RPN
anchor parameter and then the angle is further regressed similar to bbox
coords. There are some additional changes related to NMS and IoU, but besides
that it's a direct extension to Faster-RCNN. Further details in https://fb.quip.com/sZHlA1iMfWPZ.
RoIs are represented in [center_x, center_y, width, height, angle] format.
`angle` repre
* Rotated RoIAlign op CUDA forward implementation
CUDA forward impl for D8415490
* RoIAlignRotated op CUDA backward pass implementation
TSIA
* All remaining fixes to eliminate process_github.sh
Most of this diff has already been reviewed separately, except for the parts relating to _thnn/utils.py and _utils._internal.py
remove skipIf(True, 'Fbcode') line from process_github.sh
replace sed of cpp file with #ifdef to control cudnnDestroy use
undo sync-time deletion of .gitattributes, remove process_github.sh
switch to using _utils._internal rather than try-import-except
This diff also fixes the open-source bug where rebuilds have
* Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
Original commit changeset: 7707d2efe60e The original diff is backout becuase the online trainer package is backed out. This code would only work with new online trainer package
* [easy] improve error log in adagrad op
as title
* re-allow use of thnn_h_path
This fixes cffi usage in OSS
* [4/4] [tum] paralyzing layerNorm for GPU full sync
as title
* add compile=False to pytorch tests, remove hack with pyc
* Add shape and type inference for RowWiseArgMax operator
See title
* Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
This reverts commit 78167eeef0af16b60f72c82f9dcdda9b41b4dcbd
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [fix-flaky-test] mock_hive_reader_test flaky, because GlobalCounter collects local counts intervally
# Problem
`MockHiveReader` uses `GlobalCounter` to limit `max_examples`.
GlobalCounter on server node collect local counts from worker nodes every 1 sec.
This 1 sec delay makes it impossible to limit exactly to the `max_examples`, it will definitely exceed `max_examples`.
# Plan
Given,
```
Expected num_examples = max_examples + num_examples/sec (Read Speed) x 1 sec (GlobalCounter Sync Int
* [Caffe2] Fix FCGradient cost inference. Prevent overflow in cost inference
FCGradient missed a factor 2 in the `num_outputs == 3` case. Overflow was occurring with flop calculation for FC. Changed types to `uint64_t` to prevent future problems.
* Fix binary ops with empty inputs
Fix binary ops with empty inputs
* Support the filling of input blob with provided data
as title for Biz Integrity case
* Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
Original commit changeset: 30c55dd38816 Original diff is reverted due to introducing bad integration test. Fixed the integration test.
* [c2][easy] improve pack ops error loggings
as desc.
* Add ShapeTypeInference for LpNorm operator
As desc
* Shard test_nn to reduce runtime for each test target
Closes https://github.com/pytorch/pytorch/pull/8793
The current test_nn would time out and be disabled in GreenWarden, and we need to have an option to split it up in order to pass the stress test. Right now GreenWarden roughly allows running 100 test cases in test_nn before timing out, and here we have an option to divide test_nn into 30 shards (with ~40 tests in each shard) to allow for some test suite growth in the future.
* Change default caffe2_streams_per_gpu to 1
* Remove IN_SANDCASTLE from common.py and test_nn.py
We prefer to disable the failing tests through Sandcastle UI instead.
* Add a new class for an updated prof_dag.proto
This diff contains:
- An updated prof_dag.proto that contains blob profiles.
- A class to deserialize this information (serialization is in a follow up diff)
- Update to separate profiling information from NeuralNet (and use it as part of the class above).
- Unit tests
* Lambdarank for SparseNN
This diff adds a lambda_rank_layer for SparseNN.
changes include
1) Adds support for multi sessions in c2 op
2) Adds support for two different loss functions in c2 op
3) Unit tests for op
* Revert D8586950: Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
This reverts commit 012220ed63eccc35659a57b31d16a3625da6317b
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [easy] A few fixups to multithread predictor benchmark
(1) support perf on T6 server
(2) remove dead code
* fix a bug about the map size
as title
* Fix reduce sum on in-place case.
Fix reduce sum on in-place case.
* [Warmup] Reland reverted diff Allow both offline incremental training and online training
Closes https://github.com/pytorch/pytorch/pull/8827
fix net transform integration test. Allow offline and online trainer to coexist D7802642.
* Add StoreHandlerNotAvailableException
Add an exception for a store that is not available or has been
deleted.
* Use exception handling for fault tolerance, missing KV store
Remove status blobs to communication ops so that exceptions propagate on
failure.
* [C2/D2][2/n]: Nonnegative-Constrained Optimization -- bounded grad proj
for simple bounded constrained optimization, incl non-negative box constraints.
* [GanH]: Adaptive Weighting with More Estimations
With implemented postivity optimization, we now learn adaptive weights with different
parameterizations.
This improves parameter estimation and training stability.
* Revert some changes for landing
* Remove AutoNoGIL in StorageSharing
* Temporarily disable net_tests
* Revert "[Caffe2] Force tensor inference checks to be triggered during testing"
This reverts commit 67ef05c22b2f71b4a489695384932f968384a2a4.
* Revert "Fix reduce sum on in-place case."
This reverts commit 6cb8a8e1b3db7b6d20941b0053e3f3836068eb64.
* Revert "Revert "Fix reduce sum on in-place case.""
This reverts commit 130a257c0893dc09f4bd6e6a45d112261807fd2c.
2018-06-26 21:55:48 +00:00
|
|
|
self.model.PairwiseSimilarity(
|
2018-01-26 19:28:32 +00:00
|
|
|
record, 25)
|
|
|
|
|
|
|
|
|
|
record = schema.NewRecord(self.model.net, schema.Struct(
|
|
|
|
|
('all_embeddings', schema.List(np.float32))
|
|
|
|
|
))
|
|
|
|
|
with self.assertRaises(AssertionError):
|
Update from Facebook (#8887)
* add opencl + fpga context
adds an opencl context inside caffe2/fb which can be used for fpga access
* [Caffe2] Force tensor inference checks to be triggered during testing
We've started to rely on TensorInference functions more for different analysis. This diff ensures that the TensorInference function's result matches what is expected from the definition of the operator.
* Enable building //caffe2:torch with @mode/opt
In @mode/opt, python runs out of a PAR, which breaks a lot of
assumptions in the code about where templates/ folders live relative
to __file__. Rather than introduce hacks with parutil, I simply turn
template_path into a parameter for all the relevant functions and
thread it through from the top level.
* [Caffe2] Fix cost models for DotProduct and Div. Update Tensor Inference for dot product
As title. DotProduct states that output is a 1-D tensor (https://caffe2.ai/docs/operators-catalogue.html#dotproduct) though code suggests it is either 0- or 1-D depending on inputs. TensorInference defined to support implementation.
* [SG-MoE] Add an option to make the experts NOT as components
* [nomnigraph] Rename and fixup convertToNeuralNetOperator API
This will make things a bit cleaner
* no longer symlink THNN.h and THCUNN.h
* forced decoder network (onnx export)
Closes https://github.com/pytorch/translate/pull/95
Add networks in ensemble_export.py to create a forced decoding network from PyTorch NMT checkpoints. This network takes an arbitrary numberized (source, target) pair and returns the model score for the translation, including penalties.
Vocabulary reduction networks are also supported, but note that target indices which are not in the possible_translation_tokens generated for the source input will be trea
* Revert schema change to fix production models
Revert schema change to fix production models
* MockLogDeviceReader - rebase on FIX
# Goal
1), Build a make_mock_log_device_reader using make_mock_reader
2), Replace the real log_device_reader here: https://fburl.com/raihwf1p
# Log by D8151734
Real log_device_reader:
```
I0529 20:29:05.373108 954994 tensor.h:839] Tensor print_net/log of type std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >. Dims: (): read_net/ParseOpenTrainingRow:0
I0529 20:29:05.373244 954994 tensor.h:839] Tensor read_net/ParseOpenTrainin
* [C2/D2][1/n]: Nonnegative-Constrained Optimization -- log barrier
implement log barrier as a regularization method
* Add teacher weight screening.
Add teacher weight sceening according to teacher labels. If teacher label is zero, we do not use the distill loss in the objective function.
* Add NormalizerContext
See task for more detail. This implementation is a copy of what exists for RegularizerContext except for how the parameters are defined in the model_definition thrift file.
I'll try an alternative implementation which overrides the default arguments of functions instead like for argscopes in tensorflow.
https://github.com/pytorch/pytorch/compare/master...MaximeBoucher:update-from-facebook-0939578c068c?expand=1
* Adding cosine similarity option in dot processor
Add pairwise cosine similarity option in dot product.
Add an option to concate dot product and cosine similarity.
Add test cases.
* [nomnigraph][redo] Concat elim for sparseNN
Same as D7962948, which was reverted because Operator Schema was not
defined
* [pytorch] Revert pytorch/pytorch#7918 'Release GIL when copying to shared memory', breaks ASAN
Revert this pytorch diff that breaks ASAN when running Filament in dev mode; in opt mode it gives "bad file descriptor" errors. Looks like a race when copying tensors to shared memory in multiple mp.Queue's (which spawn separate threads).
https://github.com/pytorch/pytorch/pull/7918/files
* [nomnigraph][mobile] Enable nomnigraph by default, use -Oz on nomnigraph related code to reduce code size
enables nomnigraph and reduces codesize
* [Warmup] Allow both offline incremental training and online training
Change plan name on saving side and reading side to support both training type
This diff depends on D8128530 and D8168651.
* Revert D7802642: [Warmup] Allow both offline incremental training and online training
This reverts commit afc213cf9b36cecf75333a788391c4d09f4afccc
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Add legacy grad logic to fix div op on old graphs.
Add legacy grad logic to fix div op on old graphs.
* Correctly propagate operator failures
Propagate errors from operators that throw exceptions and return false
* Revert D8374829: [caffe2][nomnigraph][redo] Concat elim for sparseNN
This reverts commit 6dda028c463e54bb5c32188bbbe9202107e188a5
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [Caffe2] Added extra_info to core.DeviceOption(), enforced extra_info to be inherited in scope.DeviceScope
extra_info is a newly defined field in DeviceOption proto. This diff added extra_info to the core.DeviceOption(). And, In scope.DeviceScope(), this diff enforce the new scope to inherit the extra_info from old scope.
* [opt] hgdirsync wasn't enabled, merge diverged code
Here's the damage, P59732616 basically xplat was left behind but had
the change from assert to CAFFE_ENFORCE
* OMP parallelism over RoIs for RoIAlign op
Simpler to parallelize over RoIs. Shouldn't affect other uses as it relies on
the number of OMP threads set during startup.
PR: https://github.com/pytorch/pytorch/pull/8562
* Use int64_t for shape in FillOps
to avoid overflow of int32
* Implement Rotated RoIAlign op
Based on Rotated RPNs as explained in https://arxiv.org/abs/1703.01086.
The idea is simple - orientation/angle is added as an RPN
anchor parameter and then the angle is further regressed similar to bbox
coords. There are some additional changes related to NMS and IoU, but besides
that it's a direct extension to Faster-RCNN. Further details in https://fb.quip.com/sZHlA1iMfWPZ.
RoIs are represented in [center_x, center_y, width, height, angle] format.
`angle` repre
* Rotated RoIAlign op CUDA forward implementation
CUDA forward impl for D8415490
* RoIAlignRotated op CUDA backward pass implementation
TSIA
* All remaining fixes to eliminate process_github.sh
Most of this diff has already been reviewed separately, except for the parts relating to _thnn/utils.py and _utils._internal.py
remove skipIf(True, 'Fbcode') line from process_github.sh
replace sed of cpp file with #ifdef to control cudnnDestroy use
undo sync-time deletion of .gitattributes, remove process_github.sh
switch to using _utils._internal rather than try-import-except
This diff also fixes the open-source bug where rebuilds have
* Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
Original commit changeset: 7707d2efe60e The original diff is backout becuase the online trainer package is backed out. This code would only work with new online trainer package
* [easy] improve error log in adagrad op
as title
* re-allow use of thnn_h_path
This fixes cffi usage in OSS
* [4/4] [tum] paralyzing layerNorm for GPU full sync
as title
* add compile=False to pytorch tests, remove hack with pyc
* Add shape and type inference for RowWiseArgMax operator
See title
* Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
This reverts commit 78167eeef0af16b60f72c82f9dcdda9b41b4dcbd
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [fix-flaky-test] mock_hive_reader_test flaky, because GlobalCounter collects local counts intervally
# Problem
`MockHiveReader` uses `GlobalCounter` to limit `max_examples`.
GlobalCounter on server node collect local counts from worker nodes every 1 sec.
This 1 sec delay makes it impossible to limit exactly to the `max_examples`, it will definitely exceed `max_examples`.
# Plan
Given,
```
Expected num_examples = max_examples + num_examples/sec (Read Speed) x 1 sec (GlobalCounter Sync Int
* [Caffe2] Fix FCGradient cost inference. Prevent overflow in cost inference
FCGradient missed a factor 2 in the `num_outputs == 3` case. Overflow was occurring with flop calculation for FC. Changed types to `uint64_t` to prevent future problems.
* Fix binary ops with empty inputs
Fix binary ops with empty inputs
* Support the filling of input blob with provided data
as title for Biz Integrity case
* Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
Original commit changeset: 30c55dd38816 Original diff is reverted due to introducing bad integration test. Fixed the integration test.
* [c2][easy] improve pack ops error loggings
as desc.
* Add ShapeTypeInference for LpNorm operator
As desc
* Shard test_nn to reduce runtime for each test target
Closes https://github.com/pytorch/pytorch/pull/8793
The current test_nn would time out and be disabled in GreenWarden, and we need to have an option to split it up in order to pass the stress test. Right now GreenWarden roughly allows running 100 test cases in test_nn before timing out, and here we have an option to divide test_nn into 30 shards (with ~40 tests in each shard) to allow for some test suite growth in the future.
* Change default caffe2_streams_per_gpu to 1
* Remove IN_SANDCASTLE from common.py and test_nn.py
We prefer to disable the failing tests through Sandcastle UI instead.
* Add a new class for an updated prof_dag.proto
This diff contains:
- An updated prof_dag.proto that contains blob profiles.
- A class to deserialize this information (serialization is in a follow up diff)
- Update to separate profiling information from NeuralNet (and use it as part of the class above).
- Unit tests
* Lambdarank for SparseNN
This diff adds a lambda_rank_layer for SparseNN.
changes include
1) Adds support for multi sessions in c2 op
2) Adds support for two different loss functions in c2 op
3) Unit tests for op
* Revert D8586950: Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
This reverts commit 012220ed63eccc35659a57b31d16a3625da6317b
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [easy] A few fixups to multithread predictor benchmark
(1) support perf on T6 server
(2) remove dead code
* fix a bug about the map size
as title
* Fix reduce sum on in-place case.
Fix reduce sum on in-place case.
* [Warmup] Reland reverted diff Allow both offline incremental training and online training
Closes https://github.com/pytorch/pytorch/pull/8827
fix net transform integration test. Allow offline and online trainer to coexist D7802642.
* Add StoreHandlerNotAvailableException
Add an exception for a store that is not available or has been
deleted.
* Use exception handling for fault tolerance, missing KV store
Remove status blobs to communication ops so that exceptions propagate on
failure.
* [C2/D2][2/n]: Nonnegative-Constrained Optimization -- bounded grad proj
for simple bounded constrained optimization, incl non-negative box constraints.
* [GanH]: Adaptive Weighting with More Estimations
With implemented postivity optimization, we now learn adaptive weights with different
parameterizations.
This improves parameter estimation and training stability.
* Revert some changes for landing
* Remove AutoNoGIL in StorageSharing
* Temporarily disable net_tests
* Revert "[Caffe2] Force tensor inference checks to be triggered during testing"
This reverts commit 67ef05c22b2f71b4a489695384932f968384a2a4.
* Revert "Fix reduce sum on in-place case."
This reverts commit 6cb8a8e1b3db7b6d20941b0053e3f3836068eb64.
* Revert "Revert "Fix reduce sum on in-place case.""
This reverts commit 130a257c0893dc09f4bd6e6a45d112261807fd2c.
2018-06-26 21:55:48 +00:00
|
|
|
self.model.PairwiseSimilarity(
|
2018-01-26 19:28:32 +00:00
|
|
|
record, 25)
|
|
|
|
|
|
|
|
|
|
def testConcat(self):
|
|
|
|
|
embedding_dim = 64
|
|
|
|
|
input_record = self.new_record(schema.Struct(
|
|
|
|
|
('input1', schema.Scalar((np.float32, (embedding_dim, )))),
|
|
|
|
|
('input2', schema.Scalar((np.float32, (embedding_dim, )))),
|
|
|
|
|
('input3', schema.Scalar((np.float32, (embedding_dim, )))),
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
output = self.model.Concat(input_record)
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, ((len(input_record.fields) * embedding_dim, )))),
|
|
|
|
|
output
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Note that in Concat layer we assume first dimension is batch.
|
|
|
|
|
# so input is B * embedding_dim
|
|
|
|
|
# add_axis=1 make it B * 1 * embedding_dim
|
|
|
|
|
# concat on axis=1 make it B * N * embedding_dim
|
|
|
|
|
output = self.model.Concat(input_record, axis=1, add_axis=1)
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, ((len(input_record.fields), embedding_dim)))),
|
|
|
|
|
output
|
|
|
|
|
)
|
|
|
|
|
|
2017-03-28 06:27:34 +00:00
|
|
|
def testSamplingTrain(self):
|
|
|
|
|
output_dims = 1000
|
|
|
|
|
|
|
|
|
|
indices = self.new_record(schema.Scalar((np.int32, (10,))))
|
2017-04-04 00:49:16 +00:00
|
|
|
sampling_prob = self.new_record(schema.Scalar((np.float32, (10, ))))
|
2017-03-28 06:27:34 +00:00
|
|
|
|
|
|
|
|
sampled_fc = self.model.SamplingTrain(
|
|
|
|
|
schema.Struct(
|
|
|
|
|
('input', self.model.input_feature_schema.float_features),
|
|
|
|
|
('indices', indices),
|
2017-04-04 00:49:16 +00:00
|
|
|
('sampling_prob', sampling_prob),
|
2017-03-28 06:27:34 +00:00
|
|
|
),
|
|
|
|
|
"FC",
|
2017-04-04 00:49:16 +00:00
|
|
|
output_dims,
|
2017-03-28 06:27:34 +00:00
|
|
|
)
|
2017-05-03 00:22:16 +00:00
|
|
|
self.model.output_schema = sampled_fc
|
2017-03-28 06:27:34 +00:00
|
|
|
|
|
|
|
|
# Check that we don't add prediction layer into the model
|
|
|
|
|
self.assertEqual(1, len(self.model.layers))
|
|
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (output_dims, ))),
|
|
|
|
|
sampled_fc
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
|
|
|
|
init_ops = self.assertNetContainOps(
|
|
|
|
|
train_init_net,
|
|
|
|
|
[
|
|
|
|
|
OpSpec("UniformFill", None, None),
|
|
|
|
|
OpSpec("UniformFill", None, None),
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
sampled_fc_layer = self.model.layers[0]
|
|
|
|
|
|
|
|
|
|
gather_w_spec = OpSpec(
|
|
|
|
|
"Gather",
|
|
|
|
|
[
|
|
|
|
|
init_ops[0].output[0],
|
|
|
|
|
indices(),
|
|
|
|
|
],
|
|
|
|
|
[
|
|
|
|
|
sampled_fc_layer._prediction_layer.train_param_blobs[0]
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
gather_b_spec = OpSpec(
|
|
|
|
|
"Gather",
|
|
|
|
|
[
|
|
|
|
|
init_ops[1].output[0],
|
|
|
|
|
indices(),
|
|
|
|
|
],
|
|
|
|
|
[
|
|
|
|
|
sampled_fc_layer._prediction_layer.train_param_blobs[1]
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
train_fc_spec = OpSpec(
|
|
|
|
|
"FC",
|
|
|
|
|
[
|
|
|
|
|
self.model.input_feature_schema.float_features(),
|
|
|
|
|
] + sampled_fc_layer._prediction_layer.train_param_blobs,
|
|
|
|
|
sampled_fc.field_blobs()
|
|
|
|
|
)
|
2017-04-04 00:49:16 +00:00
|
|
|
log_spec = OpSpec("Log", [sampling_prob()], [None])
|
|
|
|
|
sub_spec = OpSpec(
|
|
|
|
|
"Sub",
|
|
|
|
|
[sampled_fc.field_blobs()[0], None],
|
|
|
|
|
sampled_fc.field_blobs()
|
|
|
|
|
)
|
2017-03-28 06:27:34 +00:00
|
|
|
|
2017-04-04 00:49:16 +00:00
|
|
|
train_ops = self.assertNetContainOps(
|
|
|
|
|
train_net,
|
|
|
|
|
[gather_w_spec, gather_b_spec, train_fc_spec, log_spec, sub_spec])
|
|
|
|
|
|
|
|
|
|
self.assertEqual(train_ops[3].output[0], train_ops[4].input[1])
|
2017-03-28 06:27:34 +00:00
|
|
|
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
self.assertNetContainOps(
|
|
|
|
|
predict_net,
|
|
|
|
|
[
|
|
|
|
|
OpSpec(
|
|
|
|
|
"FC",
|
|
|
|
|
[
|
|
|
|
|
self.model.input_feature_schema.float_features(),
|
|
|
|
|
init_ops[0].output[0],
|
|
|
|
|
init_ops[1].output[0],
|
|
|
|
|
],
|
|
|
|
|
sampled_fc.field_blobs()
|
|
|
|
|
)
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
2017-04-03 06:34:41 +00:00
|
|
|
def testBatchLRLoss(self):
|
|
|
|
|
input_record = self.new_record(schema.Struct(
|
|
|
|
|
('label', schema.Scalar((np.float64, (1,)))),
|
2018-01-02 21:13:08 +00:00
|
|
|
('logit', schema.Scalar((np.float32, (2,)))),
|
2017-04-03 06:34:41 +00:00
|
|
|
('weight', schema.Scalar((np.float64, (1,))))
|
|
|
|
|
))
|
|
|
|
|
loss = self.model.BatchLRLoss(input_record)
|
|
|
|
|
self.assertEqual(schema.Scalar((np.float32, tuple())), loss)
|
|
|
|
|
|
2019-04-11 14:27:46 +00:00
|
|
|
def testBatchLRLossWithUncertainty(self):
|
|
|
|
|
input_record = self.new_record(schema.Struct(
|
|
|
|
|
('label', schema.Scalar((np.float64, (1,)))),
|
|
|
|
|
('logit', schema.Scalar((np.float32, (2,)))),
|
|
|
|
|
('weight', schema.Scalar((np.float64, (1,)))),
|
|
|
|
|
('log_variance', schema.Scalar((np.float64, (1,)))),
|
|
|
|
|
))
|
|
|
|
|
loss = self.model.BatchLRLoss(input_record)
|
|
|
|
|
self.assertEqual(schema.Scalar((np.float32, tuple())), loss)
|
|
|
|
|
|
2017-10-25 22:50:29 +00:00
|
|
|
def testMarginRankLoss(self):
|
|
|
|
|
input_record = self.new_record(schema.Struct(
|
|
|
|
|
('pos_prediction', schema.Scalar((np.float32, (1,)))),
|
|
|
|
|
('neg_prediction', schema.List(np.float32)),
|
|
|
|
|
))
|
|
|
|
|
pos_items = np.array([0.1, 0.2, 0.3], dtype=np.float32)
|
|
|
|
|
neg_lengths = np.array([1, 2, 3], dtype=np.int32)
|
|
|
|
|
neg_items = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype=np.float32)
|
|
|
|
|
schema.FeedRecord(
|
|
|
|
|
input_record,
|
|
|
|
|
[pos_items, neg_lengths, neg_items]
|
|
|
|
|
)
|
|
|
|
|
loss = self.model.MarginRankLoss(input_record)
|
|
|
|
|
self.run_train_net_forward_only()
|
|
|
|
|
self.assertEqual(schema.Scalar((np.float32, tuple())), loss)
|
|
|
|
|
|
2019-08-16 06:18:56 +00:00
|
|
|
def testBPRLoss(self):
|
|
|
|
|
input_record = self.new_record(schema.Struct(
|
|
|
|
|
('pos_prediction', schema.Scalar((np.float32, (1,)))),
|
|
|
|
|
('neg_prediction', schema.List(np.float32)),
|
|
|
|
|
))
|
|
|
|
|
pos_items = np.array([0.8, 0.9], dtype=np.float32)
|
|
|
|
|
neg_lengths = np.array([1, 2], dtype=np.int32)
|
|
|
|
|
neg_items = np.array([0.1, 0.2, 0.3], dtype=np.float32)
|
|
|
|
|
schema.FeedRecord(
|
|
|
|
|
input_record,
|
|
|
|
|
[pos_items, neg_lengths, neg_items]
|
|
|
|
|
)
|
|
|
|
|
loss = self.model.BPRLoss(input_record)
|
|
|
|
|
self.run_train_net_forward_only()
|
|
|
|
|
self.assertEqual(schema.Scalar((np.float32, tuple())), loss)
|
|
|
|
|
result = workspace.FetchBlob('bpr_loss/output')
|
|
|
|
|
np.testing.assert_array_almost_equal(np.array(1.24386, dtype=np.float32), result)
|
|
|
|
|
|
2017-04-26 19:35:52 +00:00
|
|
|
def testBatchMSELoss(self):
|
|
|
|
|
input_record = self.new_record(schema.Struct(
|
|
|
|
|
('label', schema.Scalar((np.float64, (1,)))),
|
|
|
|
|
('prediction', schema.Scalar((np.float32, (2,)))),
|
|
|
|
|
))
|
|
|
|
|
loss = self.model.BatchMSELoss(input_record)
|
|
|
|
|
self.assertEqual(schema.Scalar((np.float32, tuple())), loss)
|
|
|
|
|
|
2019-06-18 00:36:15 +00:00
|
|
|
def testBatchHuberLoss(self):
|
|
|
|
|
input_record = self.new_record(schema.Struct(
|
|
|
|
|
('label', schema.Scalar((np.float32, (1,)))),
|
|
|
|
|
('prediction', schema.Scalar((np.float32, (2,)))),
|
|
|
|
|
))
|
|
|
|
|
loss = self.model.BatchHuberLoss(input_record)
|
|
|
|
|
self.assertEqual(schema.Scalar((np.float32, tuple())), loss)
|
|
|
|
|
|
2017-03-17 16:32:52 +00:00
|
|
|
def testBatchSigmoidCrossEntropyLoss(self):
|
|
|
|
|
input_record = self.new_record(schema.Struct(
|
|
|
|
|
('label', schema.Scalar((np.float32, (32,)))),
|
|
|
|
|
('prediction', schema.Scalar((np.float32, (32,))))
|
|
|
|
|
))
|
|
|
|
|
loss = self.model.BatchSigmoidCrossEntropyLoss(input_record)
|
|
|
|
|
self.assertEqual(schema.Scalar((np.float32, tuple())), loss)
|
|
|
|
|
|
2017-03-17 17:05:11 +00:00
|
|
|
def testBatchSoftmaxLoss(self):
|
|
|
|
|
input_record = self.new_record(schema.Struct(
|
|
|
|
|
('label', schema.Scalar((np.float32, tuple()))),
|
|
|
|
|
('prediction', schema.Scalar((np.float32, (32,))))
|
|
|
|
|
))
|
|
|
|
|
loss = self.model.BatchSoftmaxLoss(input_record)
|
|
|
|
|
self.assertEqual(schema.Struct(
|
|
|
|
|
('softmax', schema.Scalar((np.float32, (32,)))),
|
|
|
|
|
('loss', schema.Scalar(np.float32)),
|
|
|
|
|
), loss)
|
|
|
|
|
|
2017-06-21 17:24:09 +00:00
|
|
|
def testBatchSoftmaxLossWeight(self):
|
|
|
|
|
input_record = self.new_record(schema.Struct(
|
|
|
|
|
('label', schema.Scalar((np.float32, tuple()))),
|
|
|
|
|
('prediction', schema.Scalar((np.float32, (32,)))),
|
|
|
|
|
('weight', schema.Scalar((np.float64, (1,))))
|
|
|
|
|
))
|
|
|
|
|
loss = self.model.BatchSoftmaxLoss(input_record)
|
|
|
|
|
self.assertEqual(schema.Struct(
|
|
|
|
|
('softmax', schema.Scalar((np.float32, (32,)))),
|
|
|
|
|
('loss', schema.Scalar(np.float32)),
|
|
|
|
|
), loss)
|
|
|
|
|
|
2017-05-26 23:46:04 +00:00
|
|
|
@given(
|
|
|
|
|
X=hu.arrays(dims=[2, 5]),
|
|
|
|
|
)
|
|
|
|
|
def testBatchNormalization(self, X):
|
|
|
|
|
input_record = self.new_record(schema.Scalar((np.float32, (5,))))
|
|
|
|
|
schema.FeedRecord(input_record, [X])
|
|
|
|
|
bn_output = self.model.BatchNormalization(input_record)
|
|
|
|
|
self.assertEqual(schema.Scalar((np.float32, (5,))), bn_output)
|
|
|
|
|
self.model.output_schema = schema.Struct()
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
|
|
|
|
init_ops = self.assertNetContainOps(
|
|
|
|
|
train_init_net,
|
|
|
|
|
[
|
|
|
|
|
OpSpec("ConstantFill", None, None),
|
|
|
|
|
OpSpec("ConstantFill", None, None),
|
|
|
|
|
OpSpec("ConstantFill", None, None),
|
|
|
|
|
OpSpec("ConstantFill", None, None),
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
input_blob = input_record.field_blobs()[0]
|
|
|
|
|
output_blob = bn_output.field_blobs()[0]
|
|
|
|
|
|
|
|
|
|
expand_dims_spec = OpSpec(
|
|
|
|
|
"ExpandDims",
|
|
|
|
|
[input_blob],
|
implement drelu and unittest
Summary:
In this revision, I mainly implemented the DRelu activation. See https://arxiv.org/pdf/1706.06978v1.pdf for details.
To sum up, different from standard relu and purely, which divide the scope into two parts with boundary at zero, DRelu calculate another value p to divide the activation into two part. P is the softmax value of the output of Batch Normalization. For f(x)=x part in relu, you can find similar patten in f(x)=px, and for f(x)=0 part in rely, you can find similar pattern in f(x)=a(1-p)x, in which a is a parameter to tune. Drelu activation result is the sum of these two parts, f(x) = a(1-p)x + px.
To implement DRelu, I take BatchNormalization as super class and then use the above formula for computation. In order to allow users to choose activation methods, which usually takes place when calling add_mlp function in processor_util.py, I pass the parameter transfer in model_option from UI to the details, just as what dropout do. Currently, I place it in extra_option, but can modify it if AML team needs to redesign the UI.
I also add units test for DRelu. We check the shape of output and also do the numeric unit tests.
For Unit test, I first check the numeric value of BatchNormalization, since there is no similar test before. I then compute the value of DRelu outputs and compare the results with current DRelu layer.
Reviewed By: chocjy
Differential Revision: D5341464
fbshipit-source-id: 896b4dcc49cfd5493d97a8b448401b19e9c80630
2017-07-20 18:37:39 +00:00
|
|
|
None,
|
2017-05-26 23:46:04 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_bn_spec = OpSpec(
|
|
|
|
|
"SpatialBN",
|
implement drelu and unittest
Summary:
In this revision, I mainly implemented the DRelu activation. See https://arxiv.org/pdf/1706.06978v1.pdf for details.
To sum up, different from standard relu and purely, which divide the scope into two parts with boundary at zero, DRelu calculate another value p to divide the activation into two part. P is the softmax value of the output of Batch Normalization. For f(x)=x part in relu, you can find similar patten in f(x)=px, and for f(x)=0 part in rely, you can find similar pattern in f(x)=a(1-p)x, in which a is a parameter to tune. Drelu activation result is the sum of these two parts, f(x) = a(1-p)x + px.
To implement DRelu, I take BatchNormalization as super class and then use the above formula for computation. In order to allow users to choose activation methods, which usually takes place when calling add_mlp function in processor_util.py, I pass the parameter transfer in model_option from UI to the details, just as what dropout do. Currently, I place it in extra_option, but can modify it if AML team needs to redesign the UI.
I also add units test for DRelu. We check the shape of output and also do the numeric unit tests.
For Unit test, I first check the numeric value of BatchNormalization, since there is no similar test before. I then compute the value of DRelu outputs and compare the results with current DRelu layer.
Reviewed By: chocjy
Differential Revision: D5341464
fbshipit-source-id: 896b4dcc49cfd5493d97a8b448401b19e9c80630
2017-07-20 18:37:39 +00:00
|
|
|
[None, init_ops[0].output[0], init_ops[1].output[0],
|
2017-05-26 23:46:04 +00:00
|
|
|
init_ops[2].output[0], init_ops[3].output[0]],
|
|
|
|
|
[output_blob, init_ops[2].output[0], init_ops[3].output[0], None, None],
|
|
|
|
|
{'is_test': 0, 'order': 'NCHW', 'momentum': 0.9},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
test_bn_spec = OpSpec(
|
|
|
|
|
"SpatialBN",
|
implement drelu and unittest
Summary:
In this revision, I mainly implemented the DRelu activation. See https://arxiv.org/pdf/1706.06978v1.pdf for details.
To sum up, different from standard relu and purely, which divide the scope into two parts with boundary at zero, DRelu calculate another value p to divide the activation into two part. P is the softmax value of the output of Batch Normalization. For f(x)=x part in relu, you can find similar patten in f(x)=px, and for f(x)=0 part in rely, you can find similar pattern in f(x)=a(1-p)x, in which a is a parameter to tune. Drelu activation result is the sum of these two parts, f(x) = a(1-p)x + px.
To implement DRelu, I take BatchNormalization as super class and then use the above formula for computation. In order to allow users to choose activation methods, which usually takes place when calling add_mlp function in processor_util.py, I pass the parameter transfer in model_option from UI to the details, just as what dropout do. Currently, I place it in extra_option, but can modify it if AML team needs to redesign the UI.
I also add units test for DRelu. We check the shape of output and also do the numeric unit tests.
For Unit test, I first check the numeric value of BatchNormalization, since there is no similar test before. I then compute the value of DRelu outputs and compare the results with current DRelu layer.
Reviewed By: chocjy
Differential Revision: D5341464
fbshipit-source-id: 896b4dcc49cfd5493d97a8b448401b19e9c80630
2017-07-20 18:37:39 +00:00
|
|
|
[None, init_ops[0].output[0], init_ops[1].output[0],
|
2017-05-26 23:46:04 +00:00
|
|
|
init_ops[2].output[0], init_ops[3].output[0]],
|
|
|
|
|
[output_blob],
|
|
|
|
|
{'is_test': 1, 'order': 'NCHW', 'momentum': 0.9},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
squeeze_spec = OpSpec(
|
|
|
|
|
"Squeeze",
|
|
|
|
|
[output_blob],
|
|
|
|
|
[output_blob],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertNetContainOps(
|
|
|
|
|
train_net,
|
|
|
|
|
[expand_dims_spec, train_bn_spec, squeeze_spec]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
eval_net = self.get_eval_net()
|
|
|
|
|
|
|
|
|
|
self.assertNetContainOps(
|
|
|
|
|
eval_net,
|
|
|
|
|
[expand_dims_spec, test_bn_spec, squeeze_spec]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
|
|
|
|
|
self.assertNetContainOps(
|
|
|
|
|
predict_net,
|
|
|
|
|
[expand_dims_spec, test_bn_spec, squeeze_spec]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
workspace.RunNetOnce(train_init_net)
|
|
|
|
|
workspace.RunNetOnce(train_net)
|
|
|
|
|
|
|
|
|
|
schema.FeedRecord(input_record, [X])
|
|
|
|
|
workspace.RunNetOnce(eval_net)
|
|
|
|
|
|
|
|
|
|
schema.FeedRecord(input_record, [X])
|
|
|
|
|
workspace.RunNetOnce(predict_net)
|
|
|
|
|
|
Update from facebook (#8384)
* [fix] fixup the bias multiplier data access issue
Hotfix for failues in conv_transpose
* [D2][Easy]: lint regularizer
lint with black
* [GanH]: Split mu in adaptive weight for diagnose
* [Dper] Add the ability to split FC weights into multiple smaller ones
* fix SumReduceLikeOp for empty blob
as desc.
* add ctc_greedy_decoder for caffe2
ctc_greedy_decoder same as tf's
* Update event callback handling
Allow multiple callbacks per event
* Add WeightedSum layer
The motivation is to do weighted sum in HoNet/crossnet, in the next diff, I'll replace model.Add with model.WeightedSum in
honet: https://fburl.com/f4rmolg2
crossnet: https://fburl.com/v7awn8se, https://fburl.com/63filbnm
* Replicate DAG's behavior
Some callers expect RunAsync to block, replicate that behavior in case of
explicit 'dag' net type
* [dper] layernorm layer
as title
* Override dag, async_dag, async_polling
Overriding dag, async_dag and async_polling with async_scheduling
* Name the thread pools
Caffe thread pools currently inherit the thread names from the thread that starts them, which can be misleading. Give them an explicit name instead.
* [Caffe2] FilleOp should support int64_t dimensions
Change argument type to int64_t for shape argument of FillerOp (used in ConstantFill, XavierFill, etc)
* Remove caffe2/caffe2/contrib/torch/
It's not used anywhere and depends on old lua torch that conflicts with Aten. Given PT1 it's not relevant any more (though it was nice and clever code!)
#accept2ship
* Fix linearWarmup multiplier check
The multiplier needs to be non-negative, not strictly positive.
* Revert D3314316
This is after 2 years and we do not seem to have a use case for this one, so
for the sake of clean API design we should potentially remove this. This would
allow us to potentially pass in arguments to optionally construct an object,
although it is indeed a little bit unclear how we can reuse existing objects if
constructor arguments are passed in. In any case, we may want to remove this
dangling feature.
* Speedup generate proposals by partial_sort.
Speedup generate proposals by partial_sort.
FACEBOOK:
- Saw speed improvement for training with this op.
- Yanghan benchmarked the op on a small dataset and see consistent 100% improvement on speed (6ms -> 3ms) on 420 input resolution. See next diff for details.
* More parallel processing friendly for CPP version of GenerateProposals.
More parallel processing friendly for CPP version of GenerateProposals.
* [DT] [43/n] Lift stop conditions inside reader code back to flow control
1. Split multi_reader function into local_reader and remote_reader
2. Lifted stop conditions inside Limiter back to flow control
3. Split epoch flow building logic into 3 cases:
- single machine (1 reader, 1 trainer on trainer0 node, no PS)
- (1 reader + 1 trainer) on trainer0 node, has PS
- multiple readers, readers do not share nodes with trainers, might have PS or not
* Resolve conflicts for torch/_thnn/utils.py
* [Caffe2] Handle image decoding errors
Image decoding errors can make the whole training fail. This diff is to handle them
1.Catch imdecode exceptions and check if decoded image has zero columns or rows. This is counted as decoding errors.
2.Replace the image with empty in case of error
3.Count the number of errors and throw runtime exception if the rate reaches given number
The empty image data is kept. It might introduce noise in the training data.
* Update MKL exporter to IDEEP ops
TSIA
* [Caffe2] GlobalInit is thread safe, fixing the comment
With the mutex and lock, GlobalInit is thread safe.
Update the comments.
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* [DT]: fix predictor save
similar to D6610058, here we add the fix for distributed online training
* Remove net_singlethread_async_gpu.cc
Closes https://github.com/caffe2/caffe2/pull/2528
This removes net_singlethread_async_gpu.cc as part of our effort to clean
CUDAContext and the net executors.
* Inline DFS task execution
Add a DFS inline task execution mode in executor
* Add c10 folder to fbcode
This adds the c10 folder and its test cases to fbcode. Build flags are mostly taken from aten.
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* [Fix] sparse regularization in distributed training
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* Improve shard logging in net tracing code
Make it handle arbitrary shard ids instead of just one digit ids.
* [Caffe2] Call GlobalInit in predictor only in mobile
FACEBOOK:
Calling GlobalInit long after the program starts may not be safe. There are issues if the following happens:
User does not call GlobalInit and initFacebook after program starts
User sets a flag manually: https://fburl.com/mcsumw7d
User calls OSS predictor.
OSS predictor calls GlobalInit
GlobalInit calls initFacebook
initFacebook resets all flags: https://fburl.com/tolszha1
Thus, the user manually set flags are overwritten
This would happen anytime GlobalInit is called long after the program starts.
I suppose the intention of the user in this case is not to call GlobalInit throughout the program,
but use Caffe2 regardless (is that desired?)
But adding GlobalInit in the OSS predictor would automatically call GlobalInit when using Caffe2.
This issue doesn't exist in mobile, since initFacebook is not called on mobile.
For now, guard the GlobalInit in predictor for mobile only.
May want to ensure the GlobalInit is always called at the start of the program. @[3501714:kutta] has seen weird issues when not calling GlobalInit at the start of the program on server side. He has made some progress on this.
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Add empty fix for SumLikeReduceOp
Add empty fix for SumLikeReduceOp
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* Add thread_name.cc to the CMake file
* No need to subtract 1. Fix test segfaults
* Fix NetTest, ObserverTest
Fix tests
(cherry picked from commit 3767e66c3f365596cba3d46d3e7322c933a0ab41)
* CTCGreedyDecoderOp only has CPU implementation, test should only run on CPU
* Add a variable to avoid conversion resizing issue
* [fix] fixup the bias multiplier data access issue
Hotfix for failues in conv_transpose
* [D2][Easy]: lint regularizer
lint with black
* [GanH]: Split mu in adaptive weight for diagnose
* [Dper] Add the ability to split FC weights into multiple smaller ones
* fix SumReduceLikeOp for empty blob
as desc.
* add ctc_greedy_decoder for caffe2
ctc_greedy_decoder same as tf's
* Update event callback handling
Allow multiple callbacks per event
* Add WeightedSum layer
The motivation is to do weighted sum in HoNet/crossnet, in the next diff, I'll replace model.Add with model.WeightedSum in
honet: https://fburl.com/f4rmolg2
crossnet: https://fburl.com/v7awn8se, https://fburl.com/63filbnm
* Replicate DAG's behavior
Some callers expect RunAsync to block, replicate that behavior in case of
explicit 'dag' net type
* [dper] layernorm layer
as title
* Override dag, async_dag, async_polling
Overriding dag, async_dag and async_polling with async_scheduling
* Name the thread pools
Caffe thread pools currently inherit the thread names from the thread that starts them, which can be misleading. Give them an explicit name instead.
* [Caffe2] FilleOp should support int64_t dimensions
Change argument type to int64_t for shape argument of FillerOp (used in ConstantFill, XavierFill, etc)
* Remove caffe2/caffe2/contrib/torch/
It's not used anywhere and depends on old lua torch that conflicts with Aten. Given PT1 it's not relevant any more (though it was nice and clever code!)
#accept2ship
* Fix linearWarmup multiplier check
The multiplier needs to be non-negative, not strictly positive.
* Revert D3314316
This is after 2 years and we do not seem to have a use case for this one, so
for the sake of clean API design we should potentially remove this. This would
allow us to potentially pass in arguments to optionally construct an object,
although it is indeed a little bit unclear how we can reuse existing objects if
constructor arguments are passed in. In any case, we may want to remove this
dangling feature.
* Speedup generate proposals by partial_sort.
Speedup generate proposals by partial_sort.
FACEBOOK:
- Saw speed improvement for training with this op.
- Yanghan benchmarked the op on a small dataset and see consistent 100% improvement on speed (6ms -> 3ms) on 420 input resolution. See next diff for details.
* More parallel processing friendly for CPP version of GenerateProposals.
More parallel processing friendly for CPP version of GenerateProposals.
* [DT] [43/n] Lift stop conditions inside reader code back to flow control
1. Split multi_reader function into local_reader and remote_reader
2. Lifted stop conditions inside Limiter back to flow control
3. Split epoch flow building logic into 3 cases:
- single machine (1 reader, 1 trainer on trainer0 node, no PS)
- (1 reader + 1 trainer) on trainer0 node, has PS
- multiple readers, readers do not share nodes with trainers, might have PS or not
* Resolve conflicts for torch/_thnn/utils.py
* [Caffe2] Handle image decoding errors
Image decoding errors can make the whole training fail. This diff is to handle them
1.Catch imdecode exceptions and check if decoded image has zero columns or rows. This is counted as decoding errors.
2.Replace the image with empty in case of error
3.Count the number of errors and throw runtime exception if the rate reaches given number
The empty image data is kept. It might introduce noise in the training data.
* Update MKL exporter to IDEEP ops
TSIA
* [Caffe2] GlobalInit is thread safe, fixing the comment
With the mutex and lock, GlobalInit is thread safe.
Update the comments.
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* [DT]: fix predictor save
similar to D6610058, here we add the fix for distributed online training
* Remove net_singlethread_async_gpu.cc
Closes https://github.com/caffe2/caffe2/pull/2528
This removes net_singlethread_async_gpu.cc as part of our effort to clean
CUDAContext and the net executors.
* Inline DFS task execution
Add a DFS inline task execution mode in executor
* Add c10 folder to fbcode
This adds the c10 folder and its test cases to fbcode. Build flags are mostly taken from aten.
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* [Fix] sparse regularization in distributed training
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* Improve shard logging in net tracing code
Make it handle arbitrary shard ids instead of just one digit ids.
* [Caffe2] Call GlobalInit in predictor only in mobile
FACEBOOK:
Calling GlobalInit long after the program starts may not be safe. There are issues if the following happens:
User does not call GlobalInit and initFacebook after program starts
User sets a flag manually: https://fburl.com/mcsumw7d
User calls OSS predictor.
OSS predictor calls GlobalInit
GlobalInit calls initFacebook
initFacebook resets all flags: https://fburl.com/tolszha1
Thus, the user manually set flags are overwritten
This would happen anytime GlobalInit is called long after the program starts.
I suppose the intention of the user in this case is not to call GlobalInit throughout the program,
but use Caffe2 regardless (is that desired?)
But adding GlobalInit in the OSS predictor would automatically call GlobalInit when using Caffe2.
This issue doesn't exist in mobile, since initFacebook is not called on mobile.
For now, guard the GlobalInit in predictor for mobile only.
May want to ensure the GlobalInit is always called at the start of the program. @[3501714:kutta] has seen weird issues when not calling GlobalInit at the start of the program on server side. He has made some progress on this.
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Add empty fix for SumLikeReduceOp
Add empty fix for SumLikeReduceOp
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* Add thread_name.cc to the CMake file
* No need to subtract 1. Fix test segfaults
* Fix NetTest, ObserverTest
Fix tests
(cherry picked from commit 3767e66c3f365596cba3d46d3e7322c933a0ab41)
* CTCGreedyDecoderOp only has CPU implementation, test should only run on CPU
* Add a variable to avoid conversion resizing issue
* Remove the code per soumith's comments
* Remove the code per soumith's comments
* Remove blank lines in the end of file
* Resolve conflicts for torch/_thnn/utils.py
* Update MKL exporter to IDEEP ops
TSIA
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* [caffe2] uprade IDEEP and hotfix for conv op accuracy issue (#8364)
* [IDEEP] Upgrade IDEEP version
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* [IDEEP] Fix accuracy issue in conv op
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* Fix build error due to lack of src in CMakeLists
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* Remove the code per soumith's comments
* [ONNX] Add an ATen fallback pathway for ONNX export (#8273)
* ATen fallback for ONNX export
* Move to enum
* Fix model test
* Add comment
* Address comments
BC interface
* Remove imaginary file (#8415)
* [Caffe2] Enable AMD/MIOPEN ops for Caffe2 (#8306)
* Add hip support for caffe2 core
* Add MIOPEN header/wrapper to caffe2 core
* Add HIP device into caffe2 PB
* top level makefile change for rocm/hip
* makefile scaffolding for AMD/RocM/HIP
* Makefile scafodding for AMD/RocM/HIP; add makefile/utility for HIP files
* caffe2 PB update for AMD/ROCM HIP device
* Add AMD/RocM/Thrust dependency
* HIP threadpool update
* Fix makefile macro
* makefile fix: duplicate test/binary name
* makefile clean-up
* makefile clean-up
* add HIP operator registry
* add utilities for hip device
* Add USE_HIP to config summary
* makefile fix for BUILD_TEST
* merge latest
* Fix indentation
* code clean-up
* Guard builds without HIP and use the same cmake script as PyTorch to find HIP
* Setup rocm environment variables in build.sh (ideally should be done in the docker images)
* setup locale
* set HIP_PLATFORM
* Revert "set HIP_PLATFORM"
This reverts commit 8ec58db2b390c9259220c49fa34cd403568300ad.
* continue the build script environment variables mess
* HCC_AMDGPU_TARGET
* Cleanup the mess, has been fixed in the lastest docker images
* Assign protobuf field hip_gpu_id a new field number for backward compatibility
* change name to avoid conflict
* Fix duplicated thread pool flag
* Refactor cmake files to not add hip includes and libs globally
* Fix the wrong usage of environment variables detection in cmake
* Add MIOPEN CNN operators
* Revert "Add MIOPEN CNN operators"
This reverts commit 6e89ad4385b5b8967a7854c4adda52c012cee42a.
* Add MIOPEN pooling operator
* Add MIOPEN activation operator
* Add MIOPEN softmax operator
* Add MIOPEN spatial batch norm operator
* Add MIOPEN loacl response normalization operator
* Add MIOPEN conv operator
* Clean-up LRN ops
* enable fp16 in MIOPEN pool ops
* Enable fp16 for MIOPEN relu op
* Enable fp16 for MIOPEN spatial batch norm op
* code clean-up
* revert float16 support
* Create Caffe2 python binding for AMD/ROCM/HIP
* Add op fallback for HIP operator
* add hip src/test files in cmake
* exclude hip src/test files
* fix python binding for hip backend
* fix MIOPEN pooling op workspace
* hack to compile miopen operators
* fix include path for MIOPEN ops
* Fix include path
* Add HIP math utilities
* Fix path for HIP math utils
* cmake fix
* Cmake fix / hipcc for hip files
* suppress hipcc warning
* cmake fix /replcae USE_HIP with USE_ROCM
* revert LoadHIP.cmake change
* fix include for thrust/cub-hip
* include path fix for conversion.h
* Updated with latest upstream changes
* clang format fixes
* Context_hip updates
* Fixed typo in rocblas handle get function
* Updated hipified math utils
* Updated math hip test util
* Updated context hip test
* Updated common_hip
* Updated net async dag for HIP
* Added MIOPEN in operator hip test
* fix
* C2 dependencies clean-up
* fix include path for building custom protobuf
* Decouple miopen pool op and conv_pool_op base
* cmake refactor
* fix operator_hip_test
* move all hip/miopen ops files into caffe2/operators/hip
* sanitize cmake
* permission issue
* remove extra parenthesis
* remove artifact from resolving merge conflict
* cont. sanitize cmake files
* fix syntax error
* sanitize conversion.h
* .
* Revert "."
This reverts commit 56020cb0e996a31ae27bf1f8f491955ed0b121b9.
* clang-format
* Enable some reduce operators' ONNX backend tests (#8418)
* fix old comment to point to the right file (#8416)
* Stop pinning nccl version. (#8421)
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* Expose logsumexp docs and mark log_sum_exp in distributions for internal use (#8428)
* Enable some of the ONNX backend test on broadcasting (#8423)
* Enable some of the ONNX backend test on broadcasting
* enable gemm broadcast
* Expose proto utils and ONNX (#8073)
* Expose proto utils and ONNX from PyTorch libcaffe2.so
* Try to use protobuf from _C.so
* Fix ONNX proto header include
* Adjust order of imports for ONNX until nanopb goes away
* Set and use ONNX_NAMESPACE for PyTorch builds
* Show protobuf summary for all builds
* Add ONNX_NAMESPACE for cpp_build
* Statically link libprotobuf.a into libtorch.so
* Set ONNX_NAMESPACE on Windows build
* Move core/dispatch up as well
* Add /MD flag for Windows build of _C
* Potential Windows fix for ONNX and protobuf
* Add direct linkage from _C to ONNX on Windows
* Only include protobuf wrapper for PyTorch
* Pass extra_compile_args to _nvrtc ext build
* Remove installation of .a files
* Rebase creates some weird situations, revert them manually
* Remove more weird changes due to rebase
* Need to add thread_name.cc after merge
2018-06-13 20:10:45 +00:00
|
|
|
@given(
|
|
|
|
|
X=hu.arrays(dims=[2, 5, 6]),
|
2018-10-12 00:33:29 +00:00
|
|
|
use_layer_norm_op=st.booleans(),
|
Update from facebook (#8384)
* [fix] fixup the bias multiplier data access issue
Hotfix for failues in conv_transpose
* [D2][Easy]: lint regularizer
lint with black
* [GanH]: Split mu in adaptive weight for diagnose
* [Dper] Add the ability to split FC weights into multiple smaller ones
* fix SumReduceLikeOp for empty blob
as desc.
* add ctc_greedy_decoder for caffe2
ctc_greedy_decoder same as tf's
* Update event callback handling
Allow multiple callbacks per event
* Add WeightedSum layer
The motivation is to do weighted sum in HoNet/crossnet, in the next diff, I'll replace model.Add with model.WeightedSum in
honet: https://fburl.com/f4rmolg2
crossnet: https://fburl.com/v7awn8se, https://fburl.com/63filbnm
* Replicate DAG's behavior
Some callers expect RunAsync to block, replicate that behavior in case of
explicit 'dag' net type
* [dper] layernorm layer
as title
* Override dag, async_dag, async_polling
Overriding dag, async_dag and async_polling with async_scheduling
* Name the thread pools
Caffe thread pools currently inherit the thread names from the thread that starts them, which can be misleading. Give them an explicit name instead.
* [Caffe2] FilleOp should support int64_t dimensions
Change argument type to int64_t for shape argument of FillerOp (used in ConstantFill, XavierFill, etc)
* Remove caffe2/caffe2/contrib/torch/
It's not used anywhere and depends on old lua torch that conflicts with Aten. Given PT1 it's not relevant any more (though it was nice and clever code!)
#accept2ship
* Fix linearWarmup multiplier check
The multiplier needs to be non-negative, not strictly positive.
* Revert D3314316
This is after 2 years and we do not seem to have a use case for this one, so
for the sake of clean API design we should potentially remove this. This would
allow us to potentially pass in arguments to optionally construct an object,
although it is indeed a little bit unclear how we can reuse existing objects if
constructor arguments are passed in. In any case, we may want to remove this
dangling feature.
* Speedup generate proposals by partial_sort.
Speedup generate proposals by partial_sort.
FACEBOOK:
- Saw speed improvement for training with this op.
- Yanghan benchmarked the op on a small dataset and see consistent 100% improvement on speed (6ms -> 3ms) on 420 input resolution. See next diff for details.
* More parallel processing friendly for CPP version of GenerateProposals.
More parallel processing friendly for CPP version of GenerateProposals.
* [DT] [43/n] Lift stop conditions inside reader code back to flow control
1. Split multi_reader function into local_reader and remote_reader
2. Lifted stop conditions inside Limiter back to flow control
3. Split epoch flow building logic into 3 cases:
- single machine (1 reader, 1 trainer on trainer0 node, no PS)
- (1 reader + 1 trainer) on trainer0 node, has PS
- multiple readers, readers do not share nodes with trainers, might have PS or not
* Resolve conflicts for torch/_thnn/utils.py
* [Caffe2] Handle image decoding errors
Image decoding errors can make the whole training fail. This diff is to handle them
1.Catch imdecode exceptions and check if decoded image has zero columns or rows. This is counted as decoding errors.
2.Replace the image with empty in case of error
3.Count the number of errors and throw runtime exception if the rate reaches given number
The empty image data is kept. It might introduce noise in the training data.
* Update MKL exporter to IDEEP ops
TSIA
* [Caffe2] GlobalInit is thread safe, fixing the comment
With the mutex and lock, GlobalInit is thread safe.
Update the comments.
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* [DT]: fix predictor save
similar to D6610058, here we add the fix for distributed online training
* Remove net_singlethread_async_gpu.cc
Closes https://github.com/caffe2/caffe2/pull/2528
This removes net_singlethread_async_gpu.cc as part of our effort to clean
CUDAContext and the net executors.
* Inline DFS task execution
Add a DFS inline task execution mode in executor
* Add c10 folder to fbcode
This adds the c10 folder and its test cases to fbcode. Build flags are mostly taken from aten.
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* [Fix] sparse regularization in distributed training
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* Improve shard logging in net tracing code
Make it handle arbitrary shard ids instead of just one digit ids.
* [Caffe2] Call GlobalInit in predictor only in mobile
FACEBOOK:
Calling GlobalInit long after the program starts may not be safe. There are issues if the following happens:
User does not call GlobalInit and initFacebook after program starts
User sets a flag manually: https://fburl.com/mcsumw7d
User calls OSS predictor.
OSS predictor calls GlobalInit
GlobalInit calls initFacebook
initFacebook resets all flags: https://fburl.com/tolszha1
Thus, the user manually set flags are overwritten
This would happen anytime GlobalInit is called long after the program starts.
I suppose the intention of the user in this case is not to call GlobalInit throughout the program,
but use Caffe2 regardless (is that desired?)
But adding GlobalInit in the OSS predictor would automatically call GlobalInit when using Caffe2.
This issue doesn't exist in mobile, since initFacebook is not called on mobile.
For now, guard the GlobalInit in predictor for mobile only.
May want to ensure the GlobalInit is always called at the start of the program. @[3501714:kutta] has seen weird issues when not calling GlobalInit at the start of the program on server side. He has made some progress on this.
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Add empty fix for SumLikeReduceOp
Add empty fix for SumLikeReduceOp
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* Add thread_name.cc to the CMake file
* No need to subtract 1. Fix test segfaults
* Fix NetTest, ObserverTest
Fix tests
(cherry picked from commit 3767e66c3f365596cba3d46d3e7322c933a0ab41)
* CTCGreedyDecoderOp only has CPU implementation, test should only run on CPU
* Add a variable to avoid conversion resizing issue
* [fix] fixup the bias multiplier data access issue
Hotfix for failues in conv_transpose
* [D2][Easy]: lint regularizer
lint with black
* [GanH]: Split mu in adaptive weight for diagnose
* [Dper] Add the ability to split FC weights into multiple smaller ones
* fix SumReduceLikeOp for empty blob
as desc.
* add ctc_greedy_decoder for caffe2
ctc_greedy_decoder same as tf's
* Update event callback handling
Allow multiple callbacks per event
* Add WeightedSum layer
The motivation is to do weighted sum in HoNet/crossnet, in the next diff, I'll replace model.Add with model.WeightedSum in
honet: https://fburl.com/f4rmolg2
crossnet: https://fburl.com/v7awn8se, https://fburl.com/63filbnm
* Replicate DAG's behavior
Some callers expect RunAsync to block, replicate that behavior in case of
explicit 'dag' net type
* [dper] layernorm layer
as title
* Override dag, async_dag, async_polling
Overriding dag, async_dag and async_polling with async_scheduling
* Name the thread pools
Caffe thread pools currently inherit the thread names from the thread that starts them, which can be misleading. Give them an explicit name instead.
* [Caffe2] FilleOp should support int64_t dimensions
Change argument type to int64_t for shape argument of FillerOp (used in ConstantFill, XavierFill, etc)
* Remove caffe2/caffe2/contrib/torch/
It's not used anywhere and depends on old lua torch that conflicts with Aten. Given PT1 it's not relevant any more (though it was nice and clever code!)
#accept2ship
* Fix linearWarmup multiplier check
The multiplier needs to be non-negative, not strictly positive.
* Revert D3314316
This is after 2 years and we do not seem to have a use case for this one, so
for the sake of clean API design we should potentially remove this. This would
allow us to potentially pass in arguments to optionally construct an object,
although it is indeed a little bit unclear how we can reuse existing objects if
constructor arguments are passed in. In any case, we may want to remove this
dangling feature.
* Speedup generate proposals by partial_sort.
Speedup generate proposals by partial_sort.
FACEBOOK:
- Saw speed improvement for training with this op.
- Yanghan benchmarked the op on a small dataset and see consistent 100% improvement on speed (6ms -> 3ms) on 420 input resolution. See next diff for details.
* More parallel processing friendly for CPP version of GenerateProposals.
More parallel processing friendly for CPP version of GenerateProposals.
* [DT] [43/n] Lift stop conditions inside reader code back to flow control
1. Split multi_reader function into local_reader and remote_reader
2. Lifted stop conditions inside Limiter back to flow control
3. Split epoch flow building logic into 3 cases:
- single machine (1 reader, 1 trainer on trainer0 node, no PS)
- (1 reader + 1 trainer) on trainer0 node, has PS
- multiple readers, readers do not share nodes with trainers, might have PS or not
* Resolve conflicts for torch/_thnn/utils.py
* [Caffe2] Handle image decoding errors
Image decoding errors can make the whole training fail. This diff is to handle them
1.Catch imdecode exceptions and check if decoded image has zero columns or rows. This is counted as decoding errors.
2.Replace the image with empty in case of error
3.Count the number of errors and throw runtime exception if the rate reaches given number
The empty image data is kept. It might introduce noise in the training data.
* Update MKL exporter to IDEEP ops
TSIA
* [Caffe2] GlobalInit is thread safe, fixing the comment
With the mutex and lock, GlobalInit is thread safe.
Update the comments.
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* [DT]: fix predictor save
similar to D6610058, here we add the fix for distributed online training
* Remove net_singlethread_async_gpu.cc
Closes https://github.com/caffe2/caffe2/pull/2528
This removes net_singlethread_async_gpu.cc as part of our effort to clean
CUDAContext and the net executors.
* Inline DFS task execution
Add a DFS inline task execution mode in executor
* Add c10 folder to fbcode
This adds the c10 folder and its test cases to fbcode. Build flags are mostly taken from aten.
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* [Fix] sparse regularization in distributed training
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* Improve shard logging in net tracing code
Make it handle arbitrary shard ids instead of just one digit ids.
* [Caffe2] Call GlobalInit in predictor only in mobile
FACEBOOK:
Calling GlobalInit long after the program starts may not be safe. There are issues if the following happens:
User does not call GlobalInit and initFacebook after program starts
User sets a flag manually: https://fburl.com/mcsumw7d
User calls OSS predictor.
OSS predictor calls GlobalInit
GlobalInit calls initFacebook
initFacebook resets all flags: https://fburl.com/tolszha1
Thus, the user manually set flags are overwritten
This would happen anytime GlobalInit is called long after the program starts.
I suppose the intention of the user in this case is not to call GlobalInit throughout the program,
but use Caffe2 regardless (is that desired?)
But adding GlobalInit in the OSS predictor would automatically call GlobalInit when using Caffe2.
This issue doesn't exist in mobile, since initFacebook is not called on mobile.
For now, guard the GlobalInit in predictor for mobile only.
May want to ensure the GlobalInit is always called at the start of the program. @[3501714:kutta] has seen weird issues when not calling GlobalInit at the start of the program on server side. He has made some progress on this.
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Add empty fix for SumLikeReduceOp
Add empty fix for SumLikeReduceOp
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* Add thread_name.cc to the CMake file
* No need to subtract 1. Fix test segfaults
* Fix NetTest, ObserverTest
Fix tests
(cherry picked from commit 3767e66c3f365596cba3d46d3e7322c933a0ab41)
* CTCGreedyDecoderOp only has CPU implementation, test should only run on CPU
* Add a variable to avoid conversion resizing issue
* Remove the code per soumith's comments
* Remove the code per soumith's comments
* Remove blank lines in the end of file
* Resolve conflicts for torch/_thnn/utils.py
* Update MKL exporter to IDEEP ops
TSIA
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* [caffe2] uprade IDEEP and hotfix for conv op accuracy issue (#8364)
* [IDEEP] Upgrade IDEEP version
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* [IDEEP] Fix accuracy issue in conv op
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* Fix build error due to lack of src in CMakeLists
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* Remove the code per soumith's comments
* [ONNX] Add an ATen fallback pathway for ONNX export (#8273)
* ATen fallback for ONNX export
* Move to enum
* Fix model test
* Add comment
* Address comments
BC interface
* Remove imaginary file (#8415)
* [Caffe2] Enable AMD/MIOPEN ops for Caffe2 (#8306)
* Add hip support for caffe2 core
* Add MIOPEN header/wrapper to caffe2 core
* Add HIP device into caffe2 PB
* top level makefile change for rocm/hip
* makefile scaffolding for AMD/RocM/HIP
* Makefile scafodding for AMD/RocM/HIP; add makefile/utility for HIP files
* caffe2 PB update for AMD/ROCM HIP device
* Add AMD/RocM/Thrust dependency
* HIP threadpool update
* Fix makefile macro
* makefile fix: duplicate test/binary name
* makefile clean-up
* makefile clean-up
* add HIP operator registry
* add utilities for hip device
* Add USE_HIP to config summary
* makefile fix for BUILD_TEST
* merge latest
* Fix indentation
* code clean-up
* Guard builds without HIP and use the same cmake script as PyTorch to find HIP
* Setup rocm environment variables in build.sh (ideally should be done in the docker images)
* setup locale
* set HIP_PLATFORM
* Revert "set HIP_PLATFORM"
This reverts commit 8ec58db2b390c9259220c49fa34cd403568300ad.
* continue the build script environment variables mess
* HCC_AMDGPU_TARGET
* Cleanup the mess, has been fixed in the lastest docker images
* Assign protobuf field hip_gpu_id a new field number for backward compatibility
* change name to avoid conflict
* Fix duplicated thread pool flag
* Refactor cmake files to not add hip includes and libs globally
* Fix the wrong usage of environment variables detection in cmake
* Add MIOPEN CNN operators
* Revert "Add MIOPEN CNN operators"
This reverts commit 6e89ad4385b5b8967a7854c4adda52c012cee42a.
* Add MIOPEN pooling operator
* Add MIOPEN activation operator
* Add MIOPEN softmax operator
* Add MIOPEN spatial batch norm operator
* Add MIOPEN loacl response normalization operator
* Add MIOPEN conv operator
* Clean-up LRN ops
* enable fp16 in MIOPEN pool ops
* Enable fp16 for MIOPEN relu op
* Enable fp16 for MIOPEN spatial batch norm op
* code clean-up
* revert float16 support
* Create Caffe2 python binding for AMD/ROCM/HIP
* Add op fallback for HIP operator
* add hip src/test files in cmake
* exclude hip src/test files
* fix python binding for hip backend
* fix MIOPEN pooling op workspace
* hack to compile miopen operators
* fix include path for MIOPEN ops
* Fix include path
* Add HIP math utilities
* Fix path for HIP math utils
* cmake fix
* Cmake fix / hipcc for hip files
* suppress hipcc warning
* cmake fix /replcae USE_HIP with USE_ROCM
* revert LoadHIP.cmake change
* fix include for thrust/cub-hip
* include path fix for conversion.h
* Updated with latest upstream changes
* clang format fixes
* Context_hip updates
* Fixed typo in rocblas handle get function
* Updated hipified math utils
* Updated math hip test util
* Updated context hip test
* Updated common_hip
* Updated net async dag for HIP
* Added MIOPEN in operator hip test
* fix
* C2 dependencies clean-up
* fix include path for building custom protobuf
* Decouple miopen pool op and conv_pool_op base
* cmake refactor
* fix operator_hip_test
* move all hip/miopen ops files into caffe2/operators/hip
* sanitize cmake
* permission issue
* remove extra parenthesis
* remove artifact from resolving merge conflict
* cont. sanitize cmake files
* fix syntax error
* sanitize conversion.h
* .
* Revert "."
This reverts commit 56020cb0e996a31ae27bf1f8f491955ed0b121b9.
* clang-format
* Enable some reduce operators' ONNX backend tests (#8418)
* fix old comment to point to the right file (#8416)
* Stop pinning nccl version. (#8421)
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* Expose logsumexp docs and mark log_sum_exp in distributions for internal use (#8428)
* Enable some of the ONNX backend test on broadcasting (#8423)
* Enable some of the ONNX backend test on broadcasting
* enable gemm broadcast
* Expose proto utils and ONNX (#8073)
* Expose proto utils and ONNX from PyTorch libcaffe2.so
* Try to use protobuf from _C.so
* Fix ONNX proto header include
* Adjust order of imports for ONNX until nanopb goes away
* Set and use ONNX_NAMESPACE for PyTorch builds
* Show protobuf summary for all builds
* Add ONNX_NAMESPACE for cpp_build
* Statically link libprotobuf.a into libtorch.so
* Set ONNX_NAMESPACE on Windows build
* Move core/dispatch up as well
* Add /MD flag for Windows build of _C
* Potential Windows fix for ONNX and protobuf
* Add direct linkage from _C to ONNX on Windows
* Only include protobuf wrapper for PyTorch
* Pass extra_compile_args to _nvrtc ext build
* Remove installation of .a files
* Rebase creates some weird situations, revert them manually
* Remove more weird changes due to rebase
* Need to add thread_name.cc after merge
2018-06-13 20:10:45 +00:00
|
|
|
)
|
2018-10-12 00:33:29 +00:00
|
|
|
def testLayerNormalization(self, X, use_layer_norm_op):
|
|
|
|
|
expect = (5, 6,)
|
|
|
|
|
if not use_layer_norm_op:
|
|
|
|
|
X = X.reshape(10, 6)
|
|
|
|
|
expect = (6,)
|
|
|
|
|
input_record = self.new_record(schema.Scalar((np.float32, expect)))
|
Update from facebook (#8384)
* [fix] fixup the bias multiplier data access issue
Hotfix for failues in conv_transpose
* [D2][Easy]: lint regularizer
lint with black
* [GanH]: Split mu in adaptive weight for diagnose
* [Dper] Add the ability to split FC weights into multiple smaller ones
* fix SumReduceLikeOp for empty blob
as desc.
* add ctc_greedy_decoder for caffe2
ctc_greedy_decoder same as tf's
* Update event callback handling
Allow multiple callbacks per event
* Add WeightedSum layer
The motivation is to do weighted sum in HoNet/crossnet, in the next diff, I'll replace model.Add with model.WeightedSum in
honet: https://fburl.com/f4rmolg2
crossnet: https://fburl.com/v7awn8se, https://fburl.com/63filbnm
* Replicate DAG's behavior
Some callers expect RunAsync to block, replicate that behavior in case of
explicit 'dag' net type
* [dper] layernorm layer
as title
* Override dag, async_dag, async_polling
Overriding dag, async_dag and async_polling with async_scheduling
* Name the thread pools
Caffe thread pools currently inherit the thread names from the thread that starts them, which can be misleading. Give them an explicit name instead.
* [Caffe2] FilleOp should support int64_t dimensions
Change argument type to int64_t for shape argument of FillerOp (used in ConstantFill, XavierFill, etc)
* Remove caffe2/caffe2/contrib/torch/
It's not used anywhere and depends on old lua torch that conflicts with Aten. Given PT1 it's not relevant any more (though it was nice and clever code!)
#accept2ship
* Fix linearWarmup multiplier check
The multiplier needs to be non-negative, not strictly positive.
* Revert D3314316
This is after 2 years and we do not seem to have a use case for this one, so
for the sake of clean API design we should potentially remove this. This would
allow us to potentially pass in arguments to optionally construct an object,
although it is indeed a little bit unclear how we can reuse existing objects if
constructor arguments are passed in. In any case, we may want to remove this
dangling feature.
* Speedup generate proposals by partial_sort.
Speedup generate proposals by partial_sort.
FACEBOOK:
- Saw speed improvement for training with this op.
- Yanghan benchmarked the op on a small dataset and see consistent 100% improvement on speed (6ms -> 3ms) on 420 input resolution. See next diff for details.
* More parallel processing friendly for CPP version of GenerateProposals.
More parallel processing friendly for CPP version of GenerateProposals.
* [DT] [43/n] Lift stop conditions inside reader code back to flow control
1. Split multi_reader function into local_reader and remote_reader
2. Lifted stop conditions inside Limiter back to flow control
3. Split epoch flow building logic into 3 cases:
- single machine (1 reader, 1 trainer on trainer0 node, no PS)
- (1 reader + 1 trainer) on trainer0 node, has PS
- multiple readers, readers do not share nodes with trainers, might have PS or not
* Resolve conflicts for torch/_thnn/utils.py
* [Caffe2] Handle image decoding errors
Image decoding errors can make the whole training fail. This diff is to handle them
1.Catch imdecode exceptions and check if decoded image has zero columns or rows. This is counted as decoding errors.
2.Replace the image with empty in case of error
3.Count the number of errors and throw runtime exception if the rate reaches given number
The empty image data is kept. It might introduce noise in the training data.
* Update MKL exporter to IDEEP ops
TSIA
* [Caffe2] GlobalInit is thread safe, fixing the comment
With the mutex and lock, GlobalInit is thread safe.
Update the comments.
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* [DT]: fix predictor save
similar to D6610058, here we add the fix for distributed online training
* Remove net_singlethread_async_gpu.cc
Closes https://github.com/caffe2/caffe2/pull/2528
This removes net_singlethread_async_gpu.cc as part of our effort to clean
CUDAContext and the net executors.
* Inline DFS task execution
Add a DFS inline task execution mode in executor
* Add c10 folder to fbcode
This adds the c10 folder and its test cases to fbcode. Build flags are mostly taken from aten.
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* [Fix] sparse regularization in distributed training
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* Improve shard logging in net tracing code
Make it handle arbitrary shard ids instead of just one digit ids.
* [Caffe2] Call GlobalInit in predictor only in mobile
FACEBOOK:
Calling GlobalInit long after the program starts may not be safe. There are issues if the following happens:
User does not call GlobalInit and initFacebook after program starts
User sets a flag manually: https://fburl.com/mcsumw7d
User calls OSS predictor.
OSS predictor calls GlobalInit
GlobalInit calls initFacebook
initFacebook resets all flags: https://fburl.com/tolszha1
Thus, the user manually set flags are overwritten
This would happen anytime GlobalInit is called long after the program starts.
I suppose the intention of the user in this case is not to call GlobalInit throughout the program,
but use Caffe2 regardless (is that desired?)
But adding GlobalInit in the OSS predictor would automatically call GlobalInit when using Caffe2.
This issue doesn't exist in mobile, since initFacebook is not called on mobile.
For now, guard the GlobalInit in predictor for mobile only.
May want to ensure the GlobalInit is always called at the start of the program. @[3501714:kutta] has seen weird issues when not calling GlobalInit at the start of the program on server side. He has made some progress on this.
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Add empty fix for SumLikeReduceOp
Add empty fix for SumLikeReduceOp
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* Add thread_name.cc to the CMake file
* No need to subtract 1. Fix test segfaults
* Fix NetTest, ObserverTest
Fix tests
(cherry picked from commit 3767e66c3f365596cba3d46d3e7322c933a0ab41)
* CTCGreedyDecoderOp only has CPU implementation, test should only run on CPU
* Add a variable to avoid conversion resizing issue
* [fix] fixup the bias multiplier data access issue
Hotfix for failues in conv_transpose
* [D2][Easy]: lint regularizer
lint with black
* [GanH]: Split mu in adaptive weight for diagnose
* [Dper] Add the ability to split FC weights into multiple smaller ones
* fix SumReduceLikeOp for empty blob
as desc.
* add ctc_greedy_decoder for caffe2
ctc_greedy_decoder same as tf's
* Update event callback handling
Allow multiple callbacks per event
* Add WeightedSum layer
The motivation is to do weighted sum in HoNet/crossnet, in the next diff, I'll replace model.Add with model.WeightedSum in
honet: https://fburl.com/f4rmolg2
crossnet: https://fburl.com/v7awn8se, https://fburl.com/63filbnm
* Replicate DAG's behavior
Some callers expect RunAsync to block, replicate that behavior in case of
explicit 'dag' net type
* [dper] layernorm layer
as title
* Override dag, async_dag, async_polling
Overriding dag, async_dag and async_polling with async_scheduling
* Name the thread pools
Caffe thread pools currently inherit the thread names from the thread that starts them, which can be misleading. Give them an explicit name instead.
* [Caffe2] FilleOp should support int64_t dimensions
Change argument type to int64_t for shape argument of FillerOp (used in ConstantFill, XavierFill, etc)
* Remove caffe2/caffe2/contrib/torch/
It's not used anywhere and depends on old lua torch that conflicts with Aten. Given PT1 it's not relevant any more (though it was nice and clever code!)
#accept2ship
* Fix linearWarmup multiplier check
The multiplier needs to be non-negative, not strictly positive.
* Revert D3314316
This is after 2 years and we do not seem to have a use case for this one, so
for the sake of clean API design we should potentially remove this. This would
allow us to potentially pass in arguments to optionally construct an object,
although it is indeed a little bit unclear how we can reuse existing objects if
constructor arguments are passed in. In any case, we may want to remove this
dangling feature.
* Speedup generate proposals by partial_sort.
Speedup generate proposals by partial_sort.
FACEBOOK:
- Saw speed improvement for training with this op.
- Yanghan benchmarked the op on a small dataset and see consistent 100% improvement on speed (6ms -> 3ms) on 420 input resolution. See next diff for details.
* More parallel processing friendly for CPP version of GenerateProposals.
More parallel processing friendly for CPP version of GenerateProposals.
* [DT] [43/n] Lift stop conditions inside reader code back to flow control
1. Split multi_reader function into local_reader and remote_reader
2. Lifted stop conditions inside Limiter back to flow control
3. Split epoch flow building logic into 3 cases:
- single machine (1 reader, 1 trainer on trainer0 node, no PS)
- (1 reader + 1 trainer) on trainer0 node, has PS
- multiple readers, readers do not share nodes with trainers, might have PS or not
* Resolve conflicts for torch/_thnn/utils.py
* [Caffe2] Handle image decoding errors
Image decoding errors can make the whole training fail. This diff is to handle them
1.Catch imdecode exceptions and check if decoded image has zero columns or rows. This is counted as decoding errors.
2.Replace the image with empty in case of error
3.Count the number of errors and throw runtime exception if the rate reaches given number
The empty image data is kept. It might introduce noise in the training data.
* Update MKL exporter to IDEEP ops
TSIA
* [Caffe2] GlobalInit is thread safe, fixing the comment
With the mutex and lock, GlobalInit is thread safe.
Update the comments.
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* [DT]: fix predictor save
similar to D6610058, here we add the fix for distributed online training
* Remove net_singlethread_async_gpu.cc
Closes https://github.com/caffe2/caffe2/pull/2528
This removes net_singlethread_async_gpu.cc as part of our effort to clean
CUDAContext and the net executors.
* Inline DFS task execution
Add a DFS inline task execution mode in executor
* Add c10 folder to fbcode
This adds the c10 folder and its test cases to fbcode. Build flags are mostly taken from aten.
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* [Fix] sparse regularization in distributed training
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* Improve shard logging in net tracing code
Make it handle arbitrary shard ids instead of just one digit ids.
* [Caffe2] Call GlobalInit in predictor only in mobile
FACEBOOK:
Calling GlobalInit long after the program starts may not be safe. There are issues if the following happens:
User does not call GlobalInit and initFacebook after program starts
User sets a flag manually: https://fburl.com/mcsumw7d
User calls OSS predictor.
OSS predictor calls GlobalInit
GlobalInit calls initFacebook
initFacebook resets all flags: https://fburl.com/tolszha1
Thus, the user manually set flags are overwritten
This would happen anytime GlobalInit is called long after the program starts.
I suppose the intention of the user in this case is not to call GlobalInit throughout the program,
but use Caffe2 regardless (is that desired?)
But adding GlobalInit in the OSS predictor would automatically call GlobalInit when using Caffe2.
This issue doesn't exist in mobile, since initFacebook is not called on mobile.
For now, guard the GlobalInit in predictor for mobile only.
May want to ensure the GlobalInit is always called at the start of the program. @[3501714:kutta] has seen weird issues when not calling GlobalInit at the start of the program on server side. He has made some progress on this.
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Add empty fix for SumLikeReduceOp
Add empty fix for SumLikeReduceOp
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* Add thread_name.cc to the CMake file
* No need to subtract 1. Fix test segfaults
* Fix NetTest, ObserverTest
Fix tests
(cherry picked from commit 3767e66c3f365596cba3d46d3e7322c933a0ab41)
* CTCGreedyDecoderOp only has CPU implementation, test should only run on CPU
* Add a variable to avoid conversion resizing issue
* Remove the code per soumith's comments
* Remove the code per soumith's comments
* Remove blank lines in the end of file
* Resolve conflicts for torch/_thnn/utils.py
* Update MKL exporter to IDEEP ops
TSIA
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* [caffe2] uprade IDEEP and hotfix for conv op accuracy issue (#8364)
* [IDEEP] Upgrade IDEEP version
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* [IDEEP] Fix accuracy issue in conv op
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* Fix build error due to lack of src in CMakeLists
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* Remove the code per soumith's comments
* [ONNX] Add an ATen fallback pathway for ONNX export (#8273)
* ATen fallback for ONNX export
* Move to enum
* Fix model test
* Add comment
* Address comments
BC interface
* Remove imaginary file (#8415)
* [Caffe2] Enable AMD/MIOPEN ops for Caffe2 (#8306)
* Add hip support for caffe2 core
* Add MIOPEN header/wrapper to caffe2 core
* Add HIP device into caffe2 PB
* top level makefile change for rocm/hip
* makefile scaffolding for AMD/RocM/HIP
* Makefile scafodding for AMD/RocM/HIP; add makefile/utility for HIP files
* caffe2 PB update for AMD/ROCM HIP device
* Add AMD/RocM/Thrust dependency
* HIP threadpool update
* Fix makefile macro
* makefile fix: duplicate test/binary name
* makefile clean-up
* makefile clean-up
* add HIP operator registry
* add utilities for hip device
* Add USE_HIP to config summary
* makefile fix for BUILD_TEST
* merge latest
* Fix indentation
* code clean-up
* Guard builds without HIP and use the same cmake script as PyTorch to find HIP
* Setup rocm environment variables in build.sh (ideally should be done in the docker images)
* setup locale
* set HIP_PLATFORM
* Revert "set HIP_PLATFORM"
This reverts commit 8ec58db2b390c9259220c49fa34cd403568300ad.
* continue the build script environment variables mess
* HCC_AMDGPU_TARGET
* Cleanup the mess, has been fixed in the lastest docker images
* Assign protobuf field hip_gpu_id a new field number for backward compatibility
* change name to avoid conflict
* Fix duplicated thread pool flag
* Refactor cmake files to not add hip includes and libs globally
* Fix the wrong usage of environment variables detection in cmake
* Add MIOPEN CNN operators
* Revert "Add MIOPEN CNN operators"
This reverts commit 6e89ad4385b5b8967a7854c4adda52c012cee42a.
* Add MIOPEN pooling operator
* Add MIOPEN activation operator
* Add MIOPEN softmax operator
* Add MIOPEN spatial batch norm operator
* Add MIOPEN loacl response normalization operator
* Add MIOPEN conv operator
* Clean-up LRN ops
* enable fp16 in MIOPEN pool ops
* Enable fp16 for MIOPEN relu op
* Enable fp16 for MIOPEN spatial batch norm op
* code clean-up
* revert float16 support
* Create Caffe2 python binding for AMD/ROCM/HIP
* Add op fallback for HIP operator
* add hip src/test files in cmake
* exclude hip src/test files
* fix python binding for hip backend
* fix MIOPEN pooling op workspace
* hack to compile miopen operators
* fix include path for MIOPEN ops
* Fix include path
* Add HIP math utilities
* Fix path for HIP math utils
* cmake fix
* Cmake fix / hipcc for hip files
* suppress hipcc warning
* cmake fix /replcae USE_HIP with USE_ROCM
* revert LoadHIP.cmake change
* fix include for thrust/cub-hip
* include path fix for conversion.h
* Updated with latest upstream changes
* clang format fixes
* Context_hip updates
* Fixed typo in rocblas handle get function
* Updated hipified math utils
* Updated math hip test util
* Updated context hip test
* Updated common_hip
* Updated net async dag for HIP
* Added MIOPEN in operator hip test
* fix
* C2 dependencies clean-up
* fix include path for building custom protobuf
* Decouple miopen pool op and conv_pool_op base
* cmake refactor
* fix operator_hip_test
* move all hip/miopen ops files into caffe2/operators/hip
* sanitize cmake
* permission issue
* remove extra parenthesis
* remove artifact from resolving merge conflict
* cont. sanitize cmake files
* fix syntax error
* sanitize conversion.h
* .
* Revert "."
This reverts commit 56020cb0e996a31ae27bf1f8f491955ed0b121b9.
* clang-format
* Enable some reduce operators' ONNX backend tests (#8418)
* fix old comment to point to the right file (#8416)
* Stop pinning nccl version. (#8421)
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* Expose logsumexp docs and mark log_sum_exp in distributions for internal use (#8428)
* Enable some of the ONNX backend test on broadcasting (#8423)
* Enable some of the ONNX backend test on broadcasting
* enable gemm broadcast
* Expose proto utils and ONNX (#8073)
* Expose proto utils and ONNX from PyTorch libcaffe2.so
* Try to use protobuf from _C.so
* Fix ONNX proto header include
* Adjust order of imports for ONNX until nanopb goes away
* Set and use ONNX_NAMESPACE for PyTorch builds
* Show protobuf summary for all builds
* Add ONNX_NAMESPACE for cpp_build
* Statically link libprotobuf.a into libtorch.so
* Set ONNX_NAMESPACE on Windows build
* Move core/dispatch up as well
* Add /MD flag for Windows build of _C
* Potential Windows fix for ONNX and protobuf
* Add direct linkage from _C to ONNX on Windows
* Only include protobuf wrapper for PyTorch
* Pass extra_compile_args to _nvrtc ext build
* Remove installation of .a files
* Rebase creates some weird situations, revert them manually
* Remove more weird changes due to rebase
* Need to add thread_name.cc after merge
2018-06-13 20:10:45 +00:00
|
|
|
schema.FeedRecord(input_record, [X])
|
2018-10-12 00:33:29 +00:00
|
|
|
ln_output = self.model.LayerNormalization(
|
|
|
|
|
input_record, use_layer_norm_op=use_layer_norm_op
|
|
|
|
|
)
|
|
|
|
|
self.assertEqual(schema.Scalar((np.float32, expect)), ln_output)
|
Update from facebook (#8384)
* [fix] fixup the bias multiplier data access issue
Hotfix for failues in conv_transpose
* [D2][Easy]: lint regularizer
lint with black
* [GanH]: Split mu in adaptive weight for diagnose
* [Dper] Add the ability to split FC weights into multiple smaller ones
* fix SumReduceLikeOp for empty blob
as desc.
* add ctc_greedy_decoder for caffe2
ctc_greedy_decoder same as tf's
* Update event callback handling
Allow multiple callbacks per event
* Add WeightedSum layer
The motivation is to do weighted sum in HoNet/crossnet, in the next diff, I'll replace model.Add with model.WeightedSum in
honet: https://fburl.com/f4rmolg2
crossnet: https://fburl.com/v7awn8se, https://fburl.com/63filbnm
* Replicate DAG's behavior
Some callers expect RunAsync to block, replicate that behavior in case of
explicit 'dag' net type
* [dper] layernorm layer
as title
* Override dag, async_dag, async_polling
Overriding dag, async_dag and async_polling with async_scheduling
* Name the thread pools
Caffe thread pools currently inherit the thread names from the thread that starts them, which can be misleading. Give them an explicit name instead.
* [Caffe2] FilleOp should support int64_t dimensions
Change argument type to int64_t for shape argument of FillerOp (used in ConstantFill, XavierFill, etc)
* Remove caffe2/caffe2/contrib/torch/
It's not used anywhere and depends on old lua torch that conflicts with Aten. Given PT1 it's not relevant any more (though it was nice and clever code!)
#accept2ship
* Fix linearWarmup multiplier check
The multiplier needs to be non-negative, not strictly positive.
* Revert D3314316
This is after 2 years and we do not seem to have a use case for this one, so
for the sake of clean API design we should potentially remove this. This would
allow us to potentially pass in arguments to optionally construct an object,
although it is indeed a little bit unclear how we can reuse existing objects if
constructor arguments are passed in. In any case, we may want to remove this
dangling feature.
* Speedup generate proposals by partial_sort.
Speedup generate proposals by partial_sort.
FACEBOOK:
- Saw speed improvement for training with this op.
- Yanghan benchmarked the op on a small dataset and see consistent 100% improvement on speed (6ms -> 3ms) on 420 input resolution. See next diff for details.
* More parallel processing friendly for CPP version of GenerateProposals.
More parallel processing friendly for CPP version of GenerateProposals.
* [DT] [43/n] Lift stop conditions inside reader code back to flow control
1. Split multi_reader function into local_reader and remote_reader
2. Lifted stop conditions inside Limiter back to flow control
3. Split epoch flow building logic into 3 cases:
- single machine (1 reader, 1 trainer on trainer0 node, no PS)
- (1 reader + 1 trainer) on trainer0 node, has PS
- multiple readers, readers do not share nodes with trainers, might have PS or not
* Resolve conflicts for torch/_thnn/utils.py
* [Caffe2] Handle image decoding errors
Image decoding errors can make the whole training fail. This diff is to handle them
1.Catch imdecode exceptions and check if decoded image has zero columns or rows. This is counted as decoding errors.
2.Replace the image with empty in case of error
3.Count the number of errors and throw runtime exception if the rate reaches given number
The empty image data is kept. It might introduce noise in the training data.
* Update MKL exporter to IDEEP ops
TSIA
* [Caffe2] GlobalInit is thread safe, fixing the comment
With the mutex and lock, GlobalInit is thread safe.
Update the comments.
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* [DT]: fix predictor save
similar to D6610058, here we add the fix for distributed online training
* Remove net_singlethread_async_gpu.cc
Closes https://github.com/caffe2/caffe2/pull/2528
This removes net_singlethread_async_gpu.cc as part of our effort to clean
CUDAContext and the net executors.
* Inline DFS task execution
Add a DFS inline task execution mode in executor
* Add c10 folder to fbcode
This adds the c10 folder and its test cases to fbcode. Build flags are mostly taken from aten.
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* [Fix] sparse regularization in distributed training
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* Improve shard logging in net tracing code
Make it handle arbitrary shard ids instead of just one digit ids.
* [Caffe2] Call GlobalInit in predictor only in mobile
FACEBOOK:
Calling GlobalInit long after the program starts may not be safe. There are issues if the following happens:
User does not call GlobalInit and initFacebook after program starts
User sets a flag manually: https://fburl.com/mcsumw7d
User calls OSS predictor.
OSS predictor calls GlobalInit
GlobalInit calls initFacebook
initFacebook resets all flags: https://fburl.com/tolszha1
Thus, the user manually set flags are overwritten
This would happen anytime GlobalInit is called long after the program starts.
I suppose the intention of the user in this case is not to call GlobalInit throughout the program,
but use Caffe2 regardless (is that desired?)
But adding GlobalInit in the OSS predictor would automatically call GlobalInit when using Caffe2.
This issue doesn't exist in mobile, since initFacebook is not called on mobile.
For now, guard the GlobalInit in predictor for mobile only.
May want to ensure the GlobalInit is always called at the start of the program. @[3501714:kutta] has seen weird issues when not calling GlobalInit at the start of the program on server side. He has made some progress on this.
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Add empty fix for SumLikeReduceOp
Add empty fix for SumLikeReduceOp
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* Add thread_name.cc to the CMake file
* No need to subtract 1. Fix test segfaults
* Fix NetTest, ObserverTest
Fix tests
(cherry picked from commit 3767e66c3f365596cba3d46d3e7322c933a0ab41)
* CTCGreedyDecoderOp only has CPU implementation, test should only run on CPU
* Add a variable to avoid conversion resizing issue
* [fix] fixup the bias multiplier data access issue
Hotfix for failues in conv_transpose
* [D2][Easy]: lint regularizer
lint with black
* [GanH]: Split mu in adaptive weight for diagnose
* [Dper] Add the ability to split FC weights into multiple smaller ones
* fix SumReduceLikeOp for empty blob
as desc.
* add ctc_greedy_decoder for caffe2
ctc_greedy_decoder same as tf's
* Update event callback handling
Allow multiple callbacks per event
* Add WeightedSum layer
The motivation is to do weighted sum in HoNet/crossnet, in the next diff, I'll replace model.Add with model.WeightedSum in
honet: https://fburl.com/f4rmolg2
crossnet: https://fburl.com/v7awn8se, https://fburl.com/63filbnm
* Replicate DAG's behavior
Some callers expect RunAsync to block, replicate that behavior in case of
explicit 'dag' net type
* [dper] layernorm layer
as title
* Override dag, async_dag, async_polling
Overriding dag, async_dag and async_polling with async_scheduling
* Name the thread pools
Caffe thread pools currently inherit the thread names from the thread that starts them, which can be misleading. Give them an explicit name instead.
* [Caffe2] FilleOp should support int64_t dimensions
Change argument type to int64_t for shape argument of FillerOp (used in ConstantFill, XavierFill, etc)
* Remove caffe2/caffe2/contrib/torch/
It's not used anywhere and depends on old lua torch that conflicts with Aten. Given PT1 it's not relevant any more (though it was nice and clever code!)
#accept2ship
* Fix linearWarmup multiplier check
The multiplier needs to be non-negative, not strictly positive.
* Revert D3314316
This is after 2 years and we do not seem to have a use case for this one, so
for the sake of clean API design we should potentially remove this. This would
allow us to potentially pass in arguments to optionally construct an object,
although it is indeed a little bit unclear how we can reuse existing objects if
constructor arguments are passed in. In any case, we may want to remove this
dangling feature.
* Speedup generate proposals by partial_sort.
Speedup generate proposals by partial_sort.
FACEBOOK:
- Saw speed improvement for training with this op.
- Yanghan benchmarked the op on a small dataset and see consistent 100% improvement on speed (6ms -> 3ms) on 420 input resolution. See next diff for details.
* More parallel processing friendly for CPP version of GenerateProposals.
More parallel processing friendly for CPP version of GenerateProposals.
* [DT] [43/n] Lift stop conditions inside reader code back to flow control
1. Split multi_reader function into local_reader and remote_reader
2. Lifted stop conditions inside Limiter back to flow control
3. Split epoch flow building logic into 3 cases:
- single machine (1 reader, 1 trainer on trainer0 node, no PS)
- (1 reader + 1 trainer) on trainer0 node, has PS
- multiple readers, readers do not share nodes with trainers, might have PS or not
* Resolve conflicts for torch/_thnn/utils.py
* [Caffe2] Handle image decoding errors
Image decoding errors can make the whole training fail. This diff is to handle them
1.Catch imdecode exceptions and check if decoded image has zero columns or rows. This is counted as decoding errors.
2.Replace the image with empty in case of error
3.Count the number of errors and throw runtime exception if the rate reaches given number
The empty image data is kept. It might introduce noise in the training data.
* Update MKL exporter to IDEEP ops
TSIA
* [Caffe2] GlobalInit is thread safe, fixing the comment
With the mutex and lock, GlobalInit is thread safe.
Update the comments.
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* [DT]: fix predictor save
similar to D6610058, here we add the fix for distributed online training
* Remove net_singlethread_async_gpu.cc
Closes https://github.com/caffe2/caffe2/pull/2528
This removes net_singlethread_async_gpu.cc as part of our effort to clean
CUDAContext and the net executors.
* Inline DFS task execution
Add a DFS inline task execution mode in executor
* Add c10 folder to fbcode
This adds the c10 folder and its test cases to fbcode. Build flags are mostly taken from aten.
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* [Fix] sparse regularization in distributed training
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* Improve shard logging in net tracing code
Make it handle arbitrary shard ids instead of just one digit ids.
* [Caffe2] Call GlobalInit in predictor only in mobile
FACEBOOK:
Calling GlobalInit long after the program starts may not be safe. There are issues if the following happens:
User does not call GlobalInit and initFacebook after program starts
User sets a flag manually: https://fburl.com/mcsumw7d
User calls OSS predictor.
OSS predictor calls GlobalInit
GlobalInit calls initFacebook
initFacebook resets all flags: https://fburl.com/tolszha1
Thus, the user manually set flags are overwritten
This would happen anytime GlobalInit is called long after the program starts.
I suppose the intention of the user in this case is not to call GlobalInit throughout the program,
but use Caffe2 regardless (is that desired?)
But adding GlobalInit in the OSS predictor would automatically call GlobalInit when using Caffe2.
This issue doesn't exist in mobile, since initFacebook is not called on mobile.
For now, guard the GlobalInit in predictor for mobile only.
May want to ensure the GlobalInit is always called at the start of the program. @[3501714:kutta] has seen weird issues when not calling GlobalInit at the start of the program on server side. He has made some progress on this.
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Add empty fix for SumLikeReduceOp
Add empty fix for SumLikeReduceOp
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* Add thread_name.cc to the CMake file
* No need to subtract 1. Fix test segfaults
* Fix NetTest, ObserverTest
Fix tests
(cherry picked from commit 3767e66c3f365596cba3d46d3e7322c933a0ab41)
* CTCGreedyDecoderOp only has CPU implementation, test should only run on CPU
* Add a variable to avoid conversion resizing issue
* Remove the code per soumith's comments
* Remove the code per soumith's comments
* Remove blank lines in the end of file
* Resolve conflicts for torch/_thnn/utils.py
* Update MKL exporter to IDEEP ops
TSIA
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* [caffe2] uprade IDEEP and hotfix for conv op accuracy issue (#8364)
* [IDEEP] Upgrade IDEEP version
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* [IDEEP] Fix accuracy issue in conv op
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* Fix build error due to lack of src in CMakeLists
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* Remove the code per soumith's comments
* [ONNX] Add an ATen fallback pathway for ONNX export (#8273)
* ATen fallback for ONNX export
* Move to enum
* Fix model test
* Add comment
* Address comments
BC interface
* Remove imaginary file (#8415)
* [Caffe2] Enable AMD/MIOPEN ops for Caffe2 (#8306)
* Add hip support for caffe2 core
* Add MIOPEN header/wrapper to caffe2 core
* Add HIP device into caffe2 PB
* top level makefile change for rocm/hip
* makefile scaffolding for AMD/RocM/HIP
* Makefile scafodding for AMD/RocM/HIP; add makefile/utility for HIP files
* caffe2 PB update for AMD/ROCM HIP device
* Add AMD/RocM/Thrust dependency
* HIP threadpool update
* Fix makefile macro
* makefile fix: duplicate test/binary name
* makefile clean-up
* makefile clean-up
* add HIP operator registry
* add utilities for hip device
* Add USE_HIP to config summary
* makefile fix for BUILD_TEST
* merge latest
* Fix indentation
* code clean-up
* Guard builds without HIP and use the same cmake script as PyTorch to find HIP
* Setup rocm environment variables in build.sh (ideally should be done in the docker images)
* setup locale
* set HIP_PLATFORM
* Revert "set HIP_PLATFORM"
This reverts commit 8ec58db2b390c9259220c49fa34cd403568300ad.
* continue the build script environment variables mess
* HCC_AMDGPU_TARGET
* Cleanup the mess, has been fixed in the lastest docker images
* Assign protobuf field hip_gpu_id a new field number for backward compatibility
* change name to avoid conflict
* Fix duplicated thread pool flag
* Refactor cmake files to not add hip includes and libs globally
* Fix the wrong usage of environment variables detection in cmake
* Add MIOPEN CNN operators
* Revert "Add MIOPEN CNN operators"
This reverts commit 6e89ad4385b5b8967a7854c4adda52c012cee42a.
* Add MIOPEN pooling operator
* Add MIOPEN activation operator
* Add MIOPEN softmax operator
* Add MIOPEN spatial batch norm operator
* Add MIOPEN loacl response normalization operator
* Add MIOPEN conv operator
* Clean-up LRN ops
* enable fp16 in MIOPEN pool ops
* Enable fp16 for MIOPEN relu op
* Enable fp16 for MIOPEN spatial batch norm op
* code clean-up
* revert float16 support
* Create Caffe2 python binding for AMD/ROCM/HIP
* Add op fallback for HIP operator
* add hip src/test files in cmake
* exclude hip src/test files
* fix python binding for hip backend
* fix MIOPEN pooling op workspace
* hack to compile miopen operators
* fix include path for MIOPEN ops
* Fix include path
* Add HIP math utilities
* Fix path for HIP math utils
* cmake fix
* Cmake fix / hipcc for hip files
* suppress hipcc warning
* cmake fix /replcae USE_HIP with USE_ROCM
* revert LoadHIP.cmake change
* fix include for thrust/cub-hip
* include path fix for conversion.h
* Updated with latest upstream changes
* clang format fixes
* Context_hip updates
* Fixed typo in rocblas handle get function
* Updated hipified math utils
* Updated math hip test util
* Updated context hip test
* Updated common_hip
* Updated net async dag for HIP
* Added MIOPEN in operator hip test
* fix
* C2 dependencies clean-up
* fix include path for building custom protobuf
* Decouple miopen pool op and conv_pool_op base
* cmake refactor
* fix operator_hip_test
* move all hip/miopen ops files into caffe2/operators/hip
* sanitize cmake
* permission issue
* remove extra parenthesis
* remove artifact from resolving merge conflict
* cont. sanitize cmake files
* fix syntax error
* sanitize conversion.h
* .
* Revert "."
This reverts commit 56020cb0e996a31ae27bf1f8f491955ed0b121b9.
* clang-format
* Enable some reduce operators' ONNX backend tests (#8418)
* fix old comment to point to the right file (#8416)
* Stop pinning nccl version. (#8421)
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* Expose logsumexp docs and mark log_sum_exp in distributions for internal use (#8428)
* Enable some of the ONNX backend test on broadcasting (#8423)
* Enable some of the ONNX backend test on broadcasting
* enable gemm broadcast
* Expose proto utils and ONNX (#8073)
* Expose proto utils and ONNX from PyTorch libcaffe2.so
* Try to use protobuf from _C.so
* Fix ONNX proto header include
* Adjust order of imports for ONNX until nanopb goes away
* Set and use ONNX_NAMESPACE for PyTorch builds
* Show protobuf summary for all builds
* Add ONNX_NAMESPACE for cpp_build
* Statically link libprotobuf.a into libtorch.so
* Set ONNX_NAMESPACE on Windows build
* Move core/dispatch up as well
* Add /MD flag for Windows build of _C
* Potential Windows fix for ONNX and protobuf
* Add direct linkage from _C to ONNX on Windows
* Only include protobuf wrapper for PyTorch
* Pass extra_compile_args to _nvrtc ext build
* Remove installation of .a files
* Rebase creates some weird situations, revert them manually
* Remove more weird changes due to rebase
* Need to add thread_name.cc after merge
2018-06-13 20:10:45 +00:00
|
|
|
self.model.output_schema = schema.Struct()
|
|
|
|
|
|
2018-10-12 00:33:29 +00:00
|
|
|
train_init_net, train_net = self.get_training_nets(add_constants=True)
|
Update from facebook (#8384)
* [fix] fixup the bias multiplier data access issue
Hotfix for failues in conv_transpose
* [D2][Easy]: lint regularizer
lint with black
* [GanH]: Split mu in adaptive weight for diagnose
* [Dper] Add the ability to split FC weights into multiple smaller ones
* fix SumReduceLikeOp for empty blob
as desc.
* add ctc_greedy_decoder for caffe2
ctc_greedy_decoder same as tf's
* Update event callback handling
Allow multiple callbacks per event
* Add WeightedSum layer
The motivation is to do weighted sum in HoNet/crossnet, in the next diff, I'll replace model.Add with model.WeightedSum in
honet: https://fburl.com/f4rmolg2
crossnet: https://fburl.com/v7awn8se, https://fburl.com/63filbnm
* Replicate DAG's behavior
Some callers expect RunAsync to block, replicate that behavior in case of
explicit 'dag' net type
* [dper] layernorm layer
as title
* Override dag, async_dag, async_polling
Overriding dag, async_dag and async_polling with async_scheduling
* Name the thread pools
Caffe thread pools currently inherit the thread names from the thread that starts them, which can be misleading. Give them an explicit name instead.
* [Caffe2] FilleOp should support int64_t dimensions
Change argument type to int64_t for shape argument of FillerOp (used in ConstantFill, XavierFill, etc)
* Remove caffe2/caffe2/contrib/torch/
It's not used anywhere and depends on old lua torch that conflicts with Aten. Given PT1 it's not relevant any more (though it was nice and clever code!)
#accept2ship
* Fix linearWarmup multiplier check
The multiplier needs to be non-negative, not strictly positive.
* Revert D3314316
This is after 2 years and we do not seem to have a use case for this one, so
for the sake of clean API design we should potentially remove this. This would
allow us to potentially pass in arguments to optionally construct an object,
although it is indeed a little bit unclear how we can reuse existing objects if
constructor arguments are passed in. In any case, we may want to remove this
dangling feature.
* Speedup generate proposals by partial_sort.
Speedup generate proposals by partial_sort.
FACEBOOK:
- Saw speed improvement for training with this op.
- Yanghan benchmarked the op on a small dataset and see consistent 100% improvement on speed (6ms -> 3ms) on 420 input resolution. See next diff for details.
* More parallel processing friendly for CPP version of GenerateProposals.
More parallel processing friendly for CPP version of GenerateProposals.
* [DT] [43/n] Lift stop conditions inside reader code back to flow control
1. Split multi_reader function into local_reader and remote_reader
2. Lifted stop conditions inside Limiter back to flow control
3. Split epoch flow building logic into 3 cases:
- single machine (1 reader, 1 trainer on trainer0 node, no PS)
- (1 reader + 1 trainer) on trainer0 node, has PS
- multiple readers, readers do not share nodes with trainers, might have PS or not
* Resolve conflicts for torch/_thnn/utils.py
* [Caffe2] Handle image decoding errors
Image decoding errors can make the whole training fail. This diff is to handle them
1.Catch imdecode exceptions and check if decoded image has zero columns or rows. This is counted as decoding errors.
2.Replace the image with empty in case of error
3.Count the number of errors and throw runtime exception if the rate reaches given number
The empty image data is kept. It might introduce noise in the training data.
* Update MKL exporter to IDEEP ops
TSIA
* [Caffe2] GlobalInit is thread safe, fixing the comment
With the mutex and lock, GlobalInit is thread safe.
Update the comments.
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* [DT]: fix predictor save
similar to D6610058, here we add the fix for distributed online training
* Remove net_singlethread_async_gpu.cc
Closes https://github.com/caffe2/caffe2/pull/2528
This removes net_singlethread_async_gpu.cc as part of our effort to clean
CUDAContext and the net executors.
* Inline DFS task execution
Add a DFS inline task execution mode in executor
* Add c10 folder to fbcode
This adds the c10 folder and its test cases to fbcode. Build flags are mostly taken from aten.
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* [Fix] sparse regularization in distributed training
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* Improve shard logging in net tracing code
Make it handle arbitrary shard ids instead of just one digit ids.
* [Caffe2] Call GlobalInit in predictor only in mobile
FACEBOOK:
Calling GlobalInit long after the program starts may not be safe. There are issues if the following happens:
User does not call GlobalInit and initFacebook after program starts
User sets a flag manually: https://fburl.com/mcsumw7d
User calls OSS predictor.
OSS predictor calls GlobalInit
GlobalInit calls initFacebook
initFacebook resets all flags: https://fburl.com/tolszha1
Thus, the user manually set flags are overwritten
This would happen anytime GlobalInit is called long after the program starts.
I suppose the intention of the user in this case is not to call GlobalInit throughout the program,
but use Caffe2 regardless (is that desired?)
But adding GlobalInit in the OSS predictor would automatically call GlobalInit when using Caffe2.
This issue doesn't exist in mobile, since initFacebook is not called on mobile.
For now, guard the GlobalInit in predictor for mobile only.
May want to ensure the GlobalInit is always called at the start of the program. @[3501714:kutta] has seen weird issues when not calling GlobalInit at the start of the program on server side. He has made some progress on this.
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Add empty fix for SumLikeReduceOp
Add empty fix for SumLikeReduceOp
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* Add thread_name.cc to the CMake file
* No need to subtract 1. Fix test segfaults
* Fix NetTest, ObserverTest
Fix tests
(cherry picked from commit 3767e66c3f365596cba3d46d3e7322c933a0ab41)
* CTCGreedyDecoderOp only has CPU implementation, test should only run on CPU
* Add a variable to avoid conversion resizing issue
* [fix] fixup the bias multiplier data access issue
Hotfix for failues in conv_transpose
* [D2][Easy]: lint regularizer
lint with black
* [GanH]: Split mu in adaptive weight for diagnose
* [Dper] Add the ability to split FC weights into multiple smaller ones
* fix SumReduceLikeOp for empty blob
as desc.
* add ctc_greedy_decoder for caffe2
ctc_greedy_decoder same as tf's
* Update event callback handling
Allow multiple callbacks per event
* Add WeightedSum layer
The motivation is to do weighted sum in HoNet/crossnet, in the next diff, I'll replace model.Add with model.WeightedSum in
honet: https://fburl.com/f4rmolg2
crossnet: https://fburl.com/v7awn8se, https://fburl.com/63filbnm
* Replicate DAG's behavior
Some callers expect RunAsync to block, replicate that behavior in case of
explicit 'dag' net type
* [dper] layernorm layer
as title
* Override dag, async_dag, async_polling
Overriding dag, async_dag and async_polling with async_scheduling
* Name the thread pools
Caffe thread pools currently inherit the thread names from the thread that starts them, which can be misleading. Give them an explicit name instead.
* [Caffe2] FilleOp should support int64_t dimensions
Change argument type to int64_t for shape argument of FillerOp (used in ConstantFill, XavierFill, etc)
* Remove caffe2/caffe2/contrib/torch/
It's not used anywhere and depends on old lua torch that conflicts with Aten. Given PT1 it's not relevant any more (though it was nice and clever code!)
#accept2ship
* Fix linearWarmup multiplier check
The multiplier needs to be non-negative, not strictly positive.
* Revert D3314316
This is after 2 years and we do not seem to have a use case for this one, so
for the sake of clean API design we should potentially remove this. This would
allow us to potentially pass in arguments to optionally construct an object,
although it is indeed a little bit unclear how we can reuse existing objects if
constructor arguments are passed in. In any case, we may want to remove this
dangling feature.
* Speedup generate proposals by partial_sort.
Speedup generate proposals by partial_sort.
FACEBOOK:
- Saw speed improvement for training with this op.
- Yanghan benchmarked the op on a small dataset and see consistent 100% improvement on speed (6ms -> 3ms) on 420 input resolution. See next diff for details.
* More parallel processing friendly for CPP version of GenerateProposals.
More parallel processing friendly for CPP version of GenerateProposals.
* [DT] [43/n] Lift stop conditions inside reader code back to flow control
1. Split multi_reader function into local_reader and remote_reader
2. Lifted stop conditions inside Limiter back to flow control
3. Split epoch flow building logic into 3 cases:
- single machine (1 reader, 1 trainer on trainer0 node, no PS)
- (1 reader + 1 trainer) on trainer0 node, has PS
- multiple readers, readers do not share nodes with trainers, might have PS or not
* Resolve conflicts for torch/_thnn/utils.py
* [Caffe2] Handle image decoding errors
Image decoding errors can make the whole training fail. This diff is to handle them
1.Catch imdecode exceptions and check if decoded image has zero columns or rows. This is counted as decoding errors.
2.Replace the image with empty in case of error
3.Count the number of errors and throw runtime exception if the rate reaches given number
The empty image data is kept. It might introduce noise in the training data.
* Update MKL exporter to IDEEP ops
TSIA
* [Caffe2] GlobalInit is thread safe, fixing the comment
With the mutex and lock, GlobalInit is thread safe.
Update the comments.
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* [DT]: fix predictor save
similar to D6610058, here we add the fix for distributed online training
* Remove net_singlethread_async_gpu.cc
Closes https://github.com/caffe2/caffe2/pull/2528
This removes net_singlethread_async_gpu.cc as part of our effort to clean
CUDAContext and the net executors.
* Inline DFS task execution
Add a DFS inline task execution mode in executor
* Add c10 folder to fbcode
This adds the c10 folder and its test cases to fbcode. Build flags are mostly taken from aten.
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* [Fix] sparse regularization in distributed training
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* Improve shard logging in net tracing code
Make it handle arbitrary shard ids instead of just one digit ids.
* [Caffe2] Call GlobalInit in predictor only in mobile
FACEBOOK:
Calling GlobalInit long after the program starts may not be safe. There are issues if the following happens:
User does not call GlobalInit and initFacebook after program starts
User sets a flag manually: https://fburl.com/mcsumw7d
User calls OSS predictor.
OSS predictor calls GlobalInit
GlobalInit calls initFacebook
initFacebook resets all flags: https://fburl.com/tolszha1
Thus, the user manually set flags are overwritten
This would happen anytime GlobalInit is called long after the program starts.
I suppose the intention of the user in this case is not to call GlobalInit throughout the program,
but use Caffe2 regardless (is that desired?)
But adding GlobalInit in the OSS predictor would automatically call GlobalInit when using Caffe2.
This issue doesn't exist in mobile, since initFacebook is not called on mobile.
For now, guard the GlobalInit in predictor for mobile only.
May want to ensure the GlobalInit is always called at the start of the program. @[3501714:kutta] has seen weird issues when not calling GlobalInit at the start of the program on server side. He has made some progress on this.
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Add empty fix for SumLikeReduceOp
Add empty fix for SumLikeReduceOp
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* Add thread_name.cc to the CMake file
* No need to subtract 1. Fix test segfaults
* Fix NetTest, ObserverTest
Fix tests
(cherry picked from commit 3767e66c3f365596cba3d46d3e7322c933a0ab41)
* CTCGreedyDecoderOp only has CPU implementation, test should only run on CPU
* Add a variable to avoid conversion resizing issue
* Remove the code per soumith's comments
* Remove the code per soumith's comments
* Remove blank lines in the end of file
* Resolve conflicts for torch/_thnn/utils.py
* Update MKL exporter to IDEEP ops
TSIA
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* [caffe2] uprade IDEEP and hotfix for conv op accuracy issue (#8364)
* [IDEEP] Upgrade IDEEP version
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* [IDEEP] Fix accuracy issue in conv op
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* Fix build error due to lack of src in CMakeLists
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* Remove the code per soumith's comments
* [ONNX] Add an ATen fallback pathway for ONNX export (#8273)
* ATen fallback for ONNX export
* Move to enum
* Fix model test
* Add comment
* Address comments
BC interface
* Remove imaginary file (#8415)
* [Caffe2] Enable AMD/MIOPEN ops for Caffe2 (#8306)
* Add hip support for caffe2 core
* Add MIOPEN header/wrapper to caffe2 core
* Add HIP device into caffe2 PB
* top level makefile change for rocm/hip
* makefile scaffolding for AMD/RocM/HIP
* Makefile scafodding for AMD/RocM/HIP; add makefile/utility for HIP files
* caffe2 PB update for AMD/ROCM HIP device
* Add AMD/RocM/Thrust dependency
* HIP threadpool update
* Fix makefile macro
* makefile fix: duplicate test/binary name
* makefile clean-up
* makefile clean-up
* add HIP operator registry
* add utilities for hip device
* Add USE_HIP to config summary
* makefile fix for BUILD_TEST
* merge latest
* Fix indentation
* code clean-up
* Guard builds without HIP and use the same cmake script as PyTorch to find HIP
* Setup rocm environment variables in build.sh (ideally should be done in the docker images)
* setup locale
* set HIP_PLATFORM
* Revert "set HIP_PLATFORM"
This reverts commit 8ec58db2b390c9259220c49fa34cd403568300ad.
* continue the build script environment variables mess
* HCC_AMDGPU_TARGET
* Cleanup the mess, has been fixed in the lastest docker images
* Assign protobuf field hip_gpu_id a new field number for backward compatibility
* change name to avoid conflict
* Fix duplicated thread pool flag
* Refactor cmake files to not add hip includes and libs globally
* Fix the wrong usage of environment variables detection in cmake
* Add MIOPEN CNN operators
* Revert "Add MIOPEN CNN operators"
This reverts commit 6e89ad4385b5b8967a7854c4adda52c012cee42a.
* Add MIOPEN pooling operator
* Add MIOPEN activation operator
* Add MIOPEN softmax operator
* Add MIOPEN spatial batch norm operator
* Add MIOPEN loacl response normalization operator
* Add MIOPEN conv operator
* Clean-up LRN ops
* enable fp16 in MIOPEN pool ops
* Enable fp16 for MIOPEN relu op
* Enable fp16 for MIOPEN spatial batch norm op
* code clean-up
* revert float16 support
* Create Caffe2 python binding for AMD/ROCM/HIP
* Add op fallback for HIP operator
* add hip src/test files in cmake
* exclude hip src/test files
* fix python binding for hip backend
* fix MIOPEN pooling op workspace
* hack to compile miopen operators
* fix include path for MIOPEN ops
* Fix include path
* Add HIP math utilities
* Fix path for HIP math utils
* cmake fix
* Cmake fix / hipcc for hip files
* suppress hipcc warning
* cmake fix /replcae USE_HIP with USE_ROCM
* revert LoadHIP.cmake change
* fix include for thrust/cub-hip
* include path fix for conversion.h
* Updated with latest upstream changes
* clang format fixes
* Context_hip updates
* Fixed typo in rocblas handle get function
* Updated hipified math utils
* Updated math hip test util
* Updated context hip test
* Updated common_hip
* Updated net async dag for HIP
* Added MIOPEN in operator hip test
* fix
* C2 dependencies clean-up
* fix include path for building custom protobuf
* Decouple miopen pool op and conv_pool_op base
* cmake refactor
* fix operator_hip_test
* move all hip/miopen ops files into caffe2/operators/hip
* sanitize cmake
* permission issue
* remove extra parenthesis
* remove artifact from resolving merge conflict
* cont. sanitize cmake files
* fix syntax error
* sanitize conversion.h
* .
* Revert "."
This reverts commit 56020cb0e996a31ae27bf1f8f491955ed0b121b9.
* clang-format
* Enable some reduce operators' ONNX backend tests (#8418)
* fix old comment to point to the right file (#8416)
* Stop pinning nccl version. (#8421)
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* Expose logsumexp docs and mark log_sum_exp in distributions for internal use (#8428)
* Enable some of the ONNX backend test on broadcasting (#8423)
* Enable some of the ONNX backend test on broadcasting
* enable gemm broadcast
* Expose proto utils and ONNX (#8073)
* Expose proto utils and ONNX from PyTorch libcaffe2.so
* Try to use protobuf from _C.so
* Fix ONNX proto header include
* Adjust order of imports for ONNX until nanopb goes away
* Set and use ONNX_NAMESPACE for PyTorch builds
* Show protobuf summary for all builds
* Add ONNX_NAMESPACE for cpp_build
* Statically link libprotobuf.a into libtorch.so
* Set ONNX_NAMESPACE on Windows build
* Move core/dispatch up as well
* Add /MD flag for Windows build of _C
* Potential Windows fix for ONNX and protobuf
* Add direct linkage from _C to ONNX on Windows
* Only include protobuf wrapper for PyTorch
* Pass extra_compile_args to _nvrtc ext build
* Remove installation of .a files
* Rebase creates some weird situations, revert them manually
* Remove more weird changes due to rebase
* Need to add thread_name.cc after merge
2018-06-13 20:10:45 +00:00
|
|
|
workspace.RunNetOnce(train_init_net)
|
|
|
|
|
workspace.RunNetOnce(train_net)
|
|
|
|
|
|
2017-05-05 00:19:40 +00:00
|
|
|
@given(
|
|
|
|
|
X=hu.arrays(dims=[5, 2]),
|
|
|
|
|
num_to_collect=st.integers(min_value=1, max_value=10),
|
|
|
|
|
)
|
|
|
|
|
def testLastNWindowCollector(self, X, num_to_collect):
|
|
|
|
|
input_record = self.new_record(schema.Scalar(np.float32))
|
|
|
|
|
schema.FeedRecord(input_record, [X])
|
|
|
|
|
last_n = self.model.LastNWindowCollector(input_record, num_to_collect)
|
|
|
|
|
self.run_train_net_forward_only()
|
2017-09-22 16:42:34 +00:00
|
|
|
output_record = schema.FetchRecord(last_n.last_n)
|
2017-05-05 00:19:40 +00:00
|
|
|
start = max(0, 5 - num_to_collect)
|
|
|
|
|
npt.assert_array_equal(X[start:], output_record())
|
2017-09-22 16:42:34 +00:00
|
|
|
num_visited = schema.FetchRecord(last_n.num_visited)
|
|
|
|
|
npt.assert_array_equal([5], num_visited())
|
2017-05-05 00:19:40 +00:00
|
|
|
|
[Caffe2] Changes done inside Facebook (#6378)
* fix unit test for sqrt op
From the error logging:
[idx, grad, grad_estimate] are:
[[ 146. 0.5 0.45776367]
[ 147. 0.5 0.45776367]
The gradient == 0.5 is correct, which means the SqrtOp and its gradient is doing right job. (Because y = sqrt(x), loss = y^2/2 = x/2, and then d(loss)/dx = 1/2 = 0.5; )
The test failed because of numerical problem of grad_estimate (in unit test). It can be because the step_size is small, and float precision is not high (when there are multiple elements in the tensor, we do sum(y^2) to compute loss)
This diff
- increase the step size, and also move the test cases to be further away from 0 (where sqrt(x) is not well defined) to be safe :)
- also clean up, and merge the test case for inplace Vs. non-inplace
Tested with:
`CAFFE2_HYPOTHESIS_PROFILE=debug ai_bt caffe2/caffe2/python/operator_test:elementwise_ops_test -- "test_sqrt"`
* CompositeReader & CompositeReaderBuilder
A new type of reader gluing multiple readers together.
* Back out "Revert D7394363: [GanH]: Log D Trick for Cross Entropy with Sigmoid"
Original commit changeset: 9325a4356dbe
* [dai][WIP] convert params to int8 on ps before sending to trainer
Add float->uint8 conversion in addition to float->fp16 conversion in model_saver.
* [easy] improve unit test for sparse length sum ops
as desc.
#accept2ship
* Update GitHub upstream to 771fcb3455cbfe69c2abcc4cb3bd7ef92d59af24
* move sparse hash unique ops to OOS and add unit tests
- move the SparseHash version to OOS, since 'sparsehash' is already deps of caffe2 OOS: https://fburl.com/arssw4n1
- The 'SparseHash' engine is also being used in OOS, so the SparseHash version shall be in OOS to reduce confusion: https://fburl.com/o5ea7ah2
- fix the CUDA UniqueOp for the case when batch is empty.
- add unit test
* group_norm_op for caffe2
This is the cuda op for Group Normalization (GN): https://arxiv.org/abs/1803.08494
This code implements GN in one op that computes Y=gamma * (X-mu) / sigma + beta and also its gradients. It is expected to have minimal memory consumption (similar to the BN op), without creating new blobs if GN were implemented as several ops (e.g., reshape, norm_mean/std, affine_channel).
* Resubmit D7405233: disappeared in D7464958
OOS publish causes the op missing -- however, test was still there
* [c2] add sparse hash engine for cuda unique op
The SparseHash version of UniqueOp copy input tensor to CPU, and make use of sparse hash map to get unique output, and then copy back to GPU.
* [dper][gpu] enable unit testing gpu trainer for sparse nn
to debug the GPU trainer using mock data in unit test.
make it easier to develop GPU trainer for new models.
* Reuse Gloo context for Synchronize() calls
Previously we were creating (and leaking) the Gloo context on each call to Synchronize(). Now only run the common world op and create the barrier net once, then run the barrier net on each Synchronize() call. Since timeout is associated with the Gloo context, assert that the timeout is fixed instead of trying to handle the complexity of multiple timeouts (and associated contexts).
* [GanH/WGAN][1/n]: add FC param clipping
as titled
* [mobile] minimizing changes between caffe2_benchmark and speed_benchmark
* [GanH]: enable diagnose within model
avoid finding blob names but to directly enable inside the model
* Add `net_transformer_fun` option to DPM
This callback allows for various transformations to be made to the
model after gradient operators have been added. The immediate motivation for
this is to allow transformations such has "checkpoint-and-recompute" which
allow trading off memory for additional compute.
Adding several callbacks like this has made DPM's API less than ideal at this
stage. However, I could not find any reasonable alternative.
* [DT] [33/n] Compile flow task groups
task groups need to compiled in order to pickle the object in fblearner. However I also changed the Job's compile function as creating new object is not necessary.
* Initial commit for sparse_normalize vectorization and benchmark
* [GanH]: LB Calibration for JSD
as titled
* Tracing event in async executor
Adding event tracing through TRACE_EVENT macro in async executor
* [Resubmit] D7409751 Reseting book-keeping blobs when the reservoir is reset
D7409751 got lost in D7464958
* Visualizing realtime weights values
we want to visualize the weights values as optimizer is iterating. This diff supports to visual the weights at an assigned index.
Currently, we assume the blob to be 2 dimensional.
* [GanH][Easy]: Fix Homotopy Weighting
apparantely, there was a bug in homotopy weight (alpha, beta) update
* [c2] move sparse hash unique op out of oss
so that oss do not need to depend on google hash map.
* Get rid of std::round as it's not supported on Android
* Revert changes on setup.py
* Skip shaky test on Dataio
* fix
2018-04-11 04:11:43 +00:00
|
|
|
@given(
|
|
|
|
|
X=hu.arrays(dims=[5, 2]),
|
|
|
|
|
num_to_collect=st.integers(min_value=3, max_value=3),
|
|
|
|
|
)
|
2020-08-08 19:10:52 +00:00
|
|
|
@settings(deadline=1000)
|
[Caffe2] Changes done inside Facebook (#6378)
* fix unit test for sqrt op
From the error logging:
[idx, grad, grad_estimate] are:
[[ 146. 0.5 0.45776367]
[ 147. 0.5 0.45776367]
The gradient == 0.5 is correct, which means the SqrtOp and its gradient is doing right job. (Because y = sqrt(x), loss = y^2/2 = x/2, and then d(loss)/dx = 1/2 = 0.5; )
The test failed because of numerical problem of grad_estimate (in unit test). It can be because the step_size is small, and float precision is not high (when there are multiple elements in the tensor, we do sum(y^2) to compute loss)
This diff
- increase the step size, and also move the test cases to be further away from 0 (where sqrt(x) is not well defined) to be safe :)
- also clean up, and merge the test case for inplace Vs. non-inplace
Tested with:
`CAFFE2_HYPOTHESIS_PROFILE=debug ai_bt caffe2/caffe2/python/operator_test:elementwise_ops_test -- "test_sqrt"`
* CompositeReader & CompositeReaderBuilder
A new type of reader gluing multiple readers together.
* Back out "Revert D7394363: [GanH]: Log D Trick for Cross Entropy with Sigmoid"
Original commit changeset: 9325a4356dbe
* [dai][WIP] convert params to int8 on ps before sending to trainer
Add float->uint8 conversion in addition to float->fp16 conversion in model_saver.
* [easy] improve unit test for sparse length sum ops
as desc.
#accept2ship
* Update GitHub upstream to 771fcb3455cbfe69c2abcc4cb3bd7ef92d59af24
* move sparse hash unique ops to OOS and add unit tests
- move the SparseHash version to OOS, since 'sparsehash' is already deps of caffe2 OOS: https://fburl.com/arssw4n1
- The 'SparseHash' engine is also being used in OOS, so the SparseHash version shall be in OOS to reduce confusion: https://fburl.com/o5ea7ah2
- fix the CUDA UniqueOp for the case when batch is empty.
- add unit test
* group_norm_op for caffe2
This is the cuda op for Group Normalization (GN): https://arxiv.org/abs/1803.08494
This code implements GN in one op that computes Y=gamma * (X-mu) / sigma + beta and also its gradients. It is expected to have minimal memory consumption (similar to the BN op), without creating new blobs if GN were implemented as several ops (e.g., reshape, norm_mean/std, affine_channel).
* Resubmit D7405233: disappeared in D7464958
OOS publish causes the op missing -- however, test was still there
* [c2] add sparse hash engine for cuda unique op
The SparseHash version of UniqueOp copy input tensor to CPU, and make use of sparse hash map to get unique output, and then copy back to GPU.
* [dper][gpu] enable unit testing gpu trainer for sparse nn
to debug the GPU trainer using mock data in unit test.
make it easier to develop GPU trainer for new models.
* Reuse Gloo context for Synchronize() calls
Previously we were creating (and leaking) the Gloo context on each call to Synchronize(). Now only run the common world op and create the barrier net once, then run the barrier net on each Synchronize() call. Since timeout is associated with the Gloo context, assert that the timeout is fixed instead of trying to handle the complexity of multiple timeouts (and associated contexts).
* [GanH/WGAN][1/n]: add FC param clipping
as titled
* [mobile] minimizing changes between caffe2_benchmark and speed_benchmark
* [GanH]: enable diagnose within model
avoid finding blob names but to directly enable inside the model
* Add `net_transformer_fun` option to DPM
This callback allows for various transformations to be made to the
model after gradient operators have been added. The immediate motivation for
this is to allow transformations such has "checkpoint-and-recompute" which
allow trading off memory for additional compute.
Adding several callbacks like this has made DPM's API less than ideal at this
stage. However, I could not find any reasonable alternative.
* [DT] [33/n] Compile flow task groups
task groups need to compiled in order to pickle the object in fblearner. However I also changed the Job's compile function as creating new object is not necessary.
* Initial commit for sparse_normalize vectorization and benchmark
* [GanH]: LB Calibration for JSD
as titled
* Tracing event in async executor
Adding event tracing through TRACE_EVENT macro in async executor
* [Resubmit] D7409751 Reseting book-keeping blobs when the reservoir is reset
D7409751 got lost in D7464958
* Visualizing realtime weights values
we want to visualize the weights values as optimizer is iterating. This diff supports to visual the weights at an assigned index.
Currently, we assume the blob to be 2 dimensional.
* [GanH][Easy]: Fix Homotopy Weighting
apparantely, there was a bug in homotopy weight (alpha, beta) update
* [c2] move sparse hash unique op out of oss
so that oss do not need to depend on google hash map.
* Get rid of std::round as it's not supported on Android
* Revert changes on setup.py
* Skip shaky test on Dataio
* fix
2018-04-11 04:11:43 +00:00
|
|
|
def testReservoirSamplingWithID(self, X, num_to_collect):
|
|
|
|
|
ID = np.array([1, 2, 3, 1, 2], dtype=np.int64)
|
|
|
|
|
input_record = self.new_record(
|
|
|
|
|
schema.Struct(
|
|
|
|
|
('record', schema.Struct(
|
|
|
|
|
('dense', schema.Scalar()),
|
|
|
|
|
)),
|
|
|
|
|
('object_id', schema.Scalar(np.int64)),
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
schema.FeedRecord(input_record, [X, ID])
|
|
|
|
|
packed_record = self.model.PackRecords(
|
|
|
|
|
input_record.record, 1, fields=input_record.record.field_names())
|
|
|
|
|
reservoir_input = schema.Struct(
|
|
|
|
|
('data', packed_record),
|
|
|
|
|
('object_id', input_record.object_id),
|
|
|
|
|
)
|
|
|
|
|
reservoir = self.model.ReservoirSampling(
|
|
|
|
|
reservoir_input, num_to_collect)
|
|
|
|
|
self.model.output_schema = schema.Struct()
|
|
|
|
|
train_init_net, train_net = \
|
|
|
|
|
layer_model_instantiator.generate_training_nets_forward_only(
|
|
|
|
|
self.model)
|
|
|
|
|
workspace.RunNetOnce(train_init_net)
|
|
|
|
|
workspace.CreateNet(train_net)
|
|
|
|
|
workspace.RunNet(train_net.Proto().name, num_iter=2)
|
|
|
|
|
num_visited = schema.FetchRecord(reservoir.num_visited)
|
|
|
|
|
npt.assert_array_equal([3], num_visited())
|
|
|
|
|
for param in self.model.params:
|
|
|
|
|
serialized = workspace.SerializeBlob(str(param))
|
|
|
|
|
workspace.DeserializeBlob(str(param), serialized)
|
|
|
|
|
ID = np.array([3, 5, 3, 3, 5], dtype=np.int64)
|
|
|
|
|
schema.FeedRecord(input_record.object_id, [ID])
|
|
|
|
|
workspace.RunNet(train_net.Proto().name, num_iter=2)
|
|
|
|
|
num_visited = schema.FetchRecord(reservoir.num_visited)
|
|
|
|
|
npt.assert_array_equal([2], num_visited())
|
|
|
|
|
|
2017-03-29 21:33:52 +00:00
|
|
|
def testUniformSampling(self):
|
|
|
|
|
input_record = self.new_record(schema.Scalar(np.int32))
|
|
|
|
|
input_array = np.array([3, 10, 11, 15, 20, 99], dtype=np.int32)
|
|
|
|
|
schema.FeedRecord(input_record, [input_array])
|
|
|
|
|
num_samples = 20
|
|
|
|
|
num_elements = 100
|
|
|
|
|
uniform_sampling_output = self.model.UniformSampling(
|
|
|
|
|
input_record, num_samples, num_elements)
|
|
|
|
|
self.model.loss = uniform_sampling_output
|
|
|
|
|
self.run_train_net()
|
|
|
|
|
samples = workspace.FetchBlob(uniform_sampling_output.samples())
|
|
|
|
|
sampling_prob = workspace.FetchBlob(
|
|
|
|
|
uniform_sampling_output.sampling_prob())
|
|
|
|
|
self.assertEqual(num_samples, len(samples))
|
|
|
|
|
np.testing.assert_array_equal(input_array, samples[:len(input_array)])
|
|
|
|
|
np.testing.assert_almost_equal(
|
|
|
|
|
np.array([float(num_samples) / num_elements] * num_samples,
|
|
|
|
|
dtype=np.float32),
|
|
|
|
|
sampling_prob
|
|
|
|
|
)
|
|
|
|
|
|
2017-09-21 23:29:08 +00:00
|
|
|
def testUniformSamplingWithIncorrectSampleSize(self):
|
|
|
|
|
input_record = self.new_record(schema.Scalar(np.int32))
|
|
|
|
|
num_samples = 200
|
|
|
|
|
num_elements = 100
|
|
|
|
|
with self.assertRaises(AssertionError):
|
|
|
|
|
self.model.UniformSampling(input_record, num_samples, num_elements)
|
|
|
|
|
|
2017-04-13 21:45:49 +00:00
|
|
|
def testGatherRecord(self):
|
|
|
|
|
indices = np.array([1, 3, 4], dtype=np.int32)
|
2017-06-07 06:59:46 +00:00
|
|
|
dense = np.array(list(range(20)), dtype=np.float32).reshape(10, 2)
|
|
|
|
|
lengths = np.array(list(range(10)), dtype=np.int32)
|
|
|
|
|
items = np.array(list(range(lengths.sum())), dtype=np.int64)
|
|
|
|
|
items_lengths = np.array(list(range(lengths.sum())), dtype=np.int32)
|
|
|
|
|
items_items = np.array(list(range(items_lengths.sum())), dtype=np.int64)
|
2017-04-13 21:45:49 +00:00
|
|
|
record = self.new_record(schema.Struct(
|
|
|
|
|
('dense', schema.Scalar(np.float32)),
|
|
|
|
|
('sparse', schema.Struct(
|
|
|
|
|
('list', schema.List(np.int64)),
|
|
|
|
|
('list_of_list', schema.List(schema.List(np.int64))),
|
|
|
|
|
)),
|
2017-04-20 05:17:43 +00:00
|
|
|
('empty_struct', schema.Struct())
|
2017-04-13 21:45:49 +00:00
|
|
|
))
|
|
|
|
|
indices_record = self.new_record(schema.Scalar(np.int32))
|
|
|
|
|
input_record = schema.Struct(
|
|
|
|
|
('indices', indices_record),
|
|
|
|
|
('record', record),
|
|
|
|
|
)
|
|
|
|
|
schema.FeedRecord(
|
|
|
|
|
input_record,
|
|
|
|
|
[indices, dense, lengths, items, lengths, items_lengths,
|
|
|
|
|
items_items])
|
|
|
|
|
gathered_record = self.model.GatherRecord(input_record)
|
|
|
|
|
self.assertTrue(schema.equal_schemas(gathered_record, record))
|
|
|
|
|
|
2017-04-20 05:17:43 +00:00
|
|
|
self.run_train_net_forward_only()
|
2017-04-13 21:45:49 +00:00
|
|
|
gathered_dense = workspace.FetchBlob(gathered_record.dense())
|
|
|
|
|
np.testing.assert_array_equal(
|
|
|
|
|
np.concatenate([dense[i:i + 1] for i in indices]), gathered_dense)
|
|
|
|
|
gathered_lengths = workspace.FetchBlob(
|
|
|
|
|
gathered_record.sparse.list.lengths())
|
|
|
|
|
np.testing.assert_array_equal(
|
|
|
|
|
np.concatenate([lengths[i:i + 1] for i in indices]),
|
|
|
|
|
gathered_lengths)
|
|
|
|
|
gathered_items = workspace.FetchBlob(
|
|
|
|
|
gathered_record.sparse.list.items())
|
|
|
|
|
offsets = lengths.cumsum() - lengths
|
|
|
|
|
np.testing.assert_array_equal(
|
|
|
|
|
np.concatenate([
|
|
|
|
|
items[offsets[i]: offsets[i] + lengths[i]]
|
|
|
|
|
for i in indices
|
|
|
|
|
]), gathered_items)
|
|
|
|
|
|
|
|
|
|
gathered_items_lengths = workspace.FetchBlob(
|
|
|
|
|
gathered_record.sparse.list_of_list.items.lengths())
|
|
|
|
|
np.testing.assert_array_equal(
|
|
|
|
|
np.concatenate([
|
|
|
|
|
items_lengths[offsets[i]: offsets[i] + lengths[i]]
|
|
|
|
|
for i in indices
|
|
|
|
|
]),
|
|
|
|
|
gathered_items_lengths
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
nested_offsets = []
|
|
|
|
|
nested_lengths = []
|
|
|
|
|
nested_offset = 0
|
|
|
|
|
j = 0
|
|
|
|
|
for l in lengths:
|
|
|
|
|
nested_offsets.append(nested_offset)
|
|
|
|
|
nested_length = 0
|
|
|
|
|
for _i in range(l):
|
|
|
|
|
nested_offset += items_lengths[j]
|
|
|
|
|
nested_length += items_lengths[j]
|
|
|
|
|
j += 1
|
|
|
|
|
nested_lengths.append(nested_length)
|
|
|
|
|
|
|
|
|
|
gathered_items_items = workspace.FetchBlob(
|
|
|
|
|
gathered_record.sparse.list_of_list.items.items())
|
|
|
|
|
np.testing.assert_array_equal(
|
|
|
|
|
np.concatenate([
|
|
|
|
|
items_items[nested_offsets[i]:
|
|
|
|
|
nested_offsets[i] + nested_lengths[i]]
|
|
|
|
|
for i in indices
|
|
|
|
|
]),
|
|
|
|
|
gathered_items_items
|
|
|
|
|
)
|
|
|
|
|
|
2017-04-25 23:00:42 +00:00
|
|
|
def testMapToRange(self):
|
|
|
|
|
input_record = self.new_record(schema.Scalar(np.int32))
|
2017-07-13 07:10:41 +00:00
|
|
|
indices_blob = self.model.MapToRange(input_record,
|
|
|
|
|
max_index=100).indices
|
2017-04-25 23:00:42 +00:00
|
|
|
self.model.output_schema = schema.Struct()
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
|
|
|
|
schema.FeedRecord(
|
|
|
|
|
input_record,
|
|
|
|
|
[np.array([10, 3, 20, 99, 15, 11, 3, 11], dtype=np.int32)]
|
|
|
|
|
)
|
|
|
|
|
workspace.RunNetOnce(train_init_net)
|
|
|
|
|
workspace.RunNetOnce(train_net)
|
2017-07-13 07:10:41 +00:00
|
|
|
indices = workspace.FetchBlob(indices_blob())
|
2017-04-25 23:00:42 +00:00
|
|
|
np.testing.assert_array_equal(
|
|
|
|
|
np.array([1, 2, 3, 4, 5, 6, 2, 6], dtype=np.int32),
|
|
|
|
|
indices
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
schema.FeedRecord(
|
|
|
|
|
input_record,
|
|
|
|
|
[np.array([10, 3, 23, 35, 60, 15, 10, 15], dtype=np.int32)]
|
|
|
|
|
)
|
|
|
|
|
workspace.RunNetOnce(train_net)
|
2017-07-13 07:10:41 +00:00
|
|
|
indices = workspace.FetchBlob(indices_blob())
|
2017-04-25 23:00:42 +00:00
|
|
|
np.testing.assert_array_equal(
|
|
|
|
|
np.array([1, 2, 7, 8, 9, 5, 1, 5], dtype=np.int32),
|
|
|
|
|
indices
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
eval_net = self.get_eval_net()
|
|
|
|
|
|
|
|
|
|
schema.FeedRecord(
|
|
|
|
|
input_record,
|
|
|
|
|
[np.array([10, 3, 23, 35, 60, 15, 200], dtype=np.int32)]
|
|
|
|
|
)
|
|
|
|
|
workspace.RunNetOnce(eval_net)
|
2017-07-13 07:10:41 +00:00
|
|
|
indices = workspace.FetchBlob(indices_blob())
|
2017-04-25 23:00:42 +00:00
|
|
|
np.testing.assert_array_equal(
|
|
|
|
|
np.array([1, 2, 7, 8, 9, 5, 0], dtype=np.int32),
|
|
|
|
|
indices
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
schema.FeedRecord(
|
|
|
|
|
input_record,
|
|
|
|
|
[np.array([10, 3, 23, 15, 101, 115], dtype=np.int32)]
|
|
|
|
|
)
|
|
|
|
|
workspace.RunNetOnce(eval_net)
|
2017-07-13 07:10:41 +00:00
|
|
|
indices = workspace.FetchBlob(indices_blob())
|
2017-04-25 23:00:42 +00:00
|
|
|
np.testing.assert_array_equal(
|
|
|
|
|
np.array([1, 2, 7, 5, 0, 0], dtype=np.int32),
|
|
|
|
|
indices
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
|
|
|
|
|
schema.FeedRecord(
|
|
|
|
|
input_record,
|
|
|
|
|
[np.array([3, 3, 20, 23, 151, 35, 60, 15, 200], dtype=np.int32)]
|
|
|
|
|
)
|
|
|
|
|
workspace.RunNetOnce(predict_net)
|
2017-07-13 07:10:41 +00:00
|
|
|
indices = workspace.FetchBlob(indices_blob())
|
2017-04-25 23:00:42 +00:00
|
|
|
np.testing.assert_array_equal(
|
|
|
|
|
np.array([2, 2, 3, 7, 0, 8, 9, 5, 0], dtype=np.int32),
|
|
|
|
|
indices
|
|
|
|
|
)
|
|
|
|
|
|
2017-05-03 00:22:16 +00:00
|
|
|
def testSelectRecordByContext(self):
|
|
|
|
|
float_features = self.model.input_feature_schema.float_features
|
|
|
|
|
|
|
|
|
|
float_array = np.array([1.0, 2.0], dtype=np.float32)
|
|
|
|
|
|
|
|
|
|
schema.FeedRecord(float_features, [float_array])
|
|
|
|
|
|
|
|
|
|
with Tags(Tags.EXCLUDE_FROM_PREDICTION):
|
2017-07-12 18:30:59 +00:00
|
|
|
log_float_features = self.model.Log(float_features, 1)
|
2017-05-03 00:22:16 +00:00
|
|
|
joined = self.model.SelectRecordByContext(
|
|
|
|
|
schema.Struct(
|
|
|
|
|
(InstantiationContext.PREDICTION, float_features),
|
|
|
|
|
(InstantiationContext.TRAINING, log_float_features),
|
|
|
|
|
# TODO: TRAIN_ONLY layers are also generated in eval
|
|
|
|
|
(InstantiationContext.EVAL, log_float_features),
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# model.output_schema has to a struct
|
|
|
|
|
self.model.output_schema = schema.Struct((
|
|
|
|
|
'joined', joined
|
|
|
|
|
))
|
|
|
|
|
predict_net = layer_model_instantiator.generate_predict_net(self.model)
|
|
|
|
|
workspace.RunNetOnce(predict_net)
|
|
|
|
|
predict_output = schema.FetchRecord(predict_net.output_record())
|
|
|
|
|
npt.assert_array_equal(float_array,
|
|
|
|
|
predict_output['joined']())
|
|
|
|
|
eval_net = layer_model_instantiator.generate_eval_net(self.model)
|
|
|
|
|
workspace.RunNetOnce(eval_net)
|
|
|
|
|
eval_output = schema.FetchRecord(eval_net.output_record())
|
|
|
|
|
npt.assert_array_equal(np.log(float_array),
|
|
|
|
|
eval_output['joined']())
|
|
|
|
|
_, train_net = (
|
|
|
|
|
layer_model_instantiator.generate_training_nets_forward_only(
|
|
|
|
|
self.model
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
workspace.RunNetOnce(train_net)
|
|
|
|
|
train_output = schema.FetchRecord(train_net.output_record())
|
|
|
|
|
npt.assert_array_equal(np.log(float_array),
|
|
|
|
|
train_output['joined']())
|
|
|
|
|
|
2017-02-28 07:22:06 +00:00
|
|
|
def testFunctionalLayer(self):
|
|
|
|
|
def normalize(net, in_record, out_record):
|
|
|
|
|
mean = net.ReduceFrontMean(in_record(), 1)
|
|
|
|
|
net.Sub(
|
|
|
|
|
[in_record(), mean],
|
2017-07-12 18:30:59 +00:00
|
|
|
out_record(),
|
2017-02-28 07:22:06 +00:00
|
|
|
broadcast=1)
|
|
|
|
|
normalized = self.model.Functional(
|
|
|
|
|
self.model.input_feature_schema.float_features, 1,
|
|
|
|
|
normalize, name="normalizer")
|
|
|
|
|
|
|
|
|
|
# Attach metadata to one of the outputs and use it in FC
|
2017-07-12 18:30:59 +00:00
|
|
|
normalized.set_type((np.float32, 32))
|
|
|
|
|
self.model.output_schema = self.model.FC(normalized, 2)
|
2017-02-28 07:22:06 +00:00
|
|
|
|
|
|
|
|
predict_net = layer_model_instantiator.generate_predict_net(
|
|
|
|
|
self.model)
|
|
|
|
|
ops = predict_net.Proto().op
|
|
|
|
|
assert len(ops) == 3
|
|
|
|
|
assert ops[0].type == "ReduceFrontMean"
|
|
|
|
|
assert ops[1].type == "Sub"
|
|
|
|
|
assert ops[2].type == "FC"
|
|
|
|
|
assert len(ops[0].input) == 1
|
|
|
|
|
assert ops[0].input[0] ==\
|
|
|
|
|
self.model.input_feature_schema.float_features()
|
|
|
|
|
assert len(ops[1].output) == 1
|
|
|
|
|
assert ops[1].output[0] in ops[2].input
|
|
|
|
|
|
|
|
|
|
def testFunctionalLayerHelper(self):
|
|
|
|
|
mean = self.model.ReduceFrontMean(
|
|
|
|
|
self.model.input_feature_schema.float_features, 1)
|
|
|
|
|
normalized = self.model.Sub(
|
|
|
|
|
schema.Tuple(
|
2017-07-12 18:30:59 +00:00
|
|
|
self.model.input_feature_schema.float_features, mean),
|
2017-02-28 07:22:06 +00:00
|
|
|
1, broadcast=1)
|
|
|
|
|
# Attach metadata to one of the outputs and use it in FC
|
2017-07-12 18:30:59 +00:00
|
|
|
normalized.set_type((np.float32, (32,)))
|
|
|
|
|
self.model.output_schema = self.model.FC(normalized, 2)
|
2017-02-28 07:22:06 +00:00
|
|
|
|
|
|
|
|
predict_net = layer_model_instantiator.generate_predict_net(
|
|
|
|
|
self.model)
|
|
|
|
|
ops = predict_net.Proto().op
|
|
|
|
|
assert len(ops) == 3
|
|
|
|
|
assert ops[0].type == "ReduceFrontMean"
|
|
|
|
|
assert ops[1].type == "Sub"
|
|
|
|
|
assert ops[2].type == "FC"
|
|
|
|
|
assert len(ops[0].input) == 1
|
|
|
|
|
assert ops[0].input[0] ==\
|
|
|
|
|
self.model.input_feature_schema.float_features()
|
|
|
|
|
assert len(ops[1].output) == 1
|
|
|
|
|
assert ops[1].output[0] in ops[2].input
|
|
|
|
|
|
|
|
|
|
def testFunctionalLayerHelperAutoInference(self):
|
|
|
|
|
softsign = self.model.Softsign(
|
|
|
|
|
schema.Tuple(self.model.input_feature_schema.float_features),
|
|
|
|
|
1)
|
2017-07-12 18:30:59 +00:00
|
|
|
assert softsign.field_type().base == np.float32
|
|
|
|
|
assert softsign.field_type().shape == (32,)
|
|
|
|
|
self.model.output_schema = self.model.FC(softsign, 2)
|
2017-02-28 07:22:06 +00:00
|
|
|
|
|
|
|
|
predict_net = layer_model_instantiator.generate_predict_net(
|
|
|
|
|
self.model)
|
|
|
|
|
ops = predict_net.Proto().op
|
|
|
|
|
assert len(ops) == 2
|
|
|
|
|
assert ops[0].type == "Softsign"
|
|
|
|
|
assert ops[1].type == "FC"
|
|
|
|
|
assert len(ops[0].input) == 1
|
|
|
|
|
assert ops[0].input[0] ==\
|
|
|
|
|
self.model.input_feature_schema.float_features()
|
|
|
|
|
assert len(ops[0].output) == 1
|
|
|
|
|
assert ops[0].output[0] in ops[1].input
|
2017-03-14 22:23:34 +00:00
|
|
|
|
2018-02-08 23:17:32 +00:00
|
|
|
def testHalfToFloatTypeInference(self):
|
|
|
|
|
input = self.new_record(schema.Scalar((np.float32, (32,))))
|
|
|
|
|
|
|
|
|
|
output = self.model.FloatToHalf(input, 1)
|
|
|
|
|
assert output.field_type().base == np.float16
|
|
|
|
|
assert output.field_type().shape == (32, )
|
|
|
|
|
|
|
|
|
|
output = self.model.HalfToFloat(output, 1)
|
|
|
|
|
assert output.field_type().base == np.float32
|
|
|
|
|
assert output.field_type().shape == (32, )
|
|
|
|
|
|
2017-03-14 22:23:34 +00:00
|
|
|
def testFunctionalLayerHelperAutoInferenceScalar(self):
|
|
|
|
|
loss = self.model.AveragedLoss(self.model.input_feature_schema, 1)
|
2017-03-15 17:56:36 +00:00
|
|
|
self.assertEqual(1, len(loss.field_types()))
|
|
|
|
|
self.assertEqual(np.float32, loss.field_types()[0].base)
|
|
|
|
|
self.assertEqual(tuple(), loss.field_types()[0].shape)
|
2017-03-23 20:16:44 +00:00
|
|
|
|
2017-04-18 02:22:46 +00:00
|
|
|
def testFunctionalLayerInputCoercion(self):
|
|
|
|
|
one = self.model.global_constants['ONE']
|
|
|
|
|
two = self.model.Add([one, one], 1)
|
|
|
|
|
self.model.loss = two
|
|
|
|
|
self.run_train_net()
|
|
|
|
|
data = workspace.FetchBlob(two.field_blobs()[0])
|
|
|
|
|
np.testing.assert_array_equal([2.0], data)
|
|
|
|
|
|
2017-03-23 20:16:44 +00:00
|
|
|
def testFunctionalLayerWithOutputNames(self):
|
|
|
|
|
k = 3
|
|
|
|
|
topk = self.model.TopK(
|
|
|
|
|
self.model.input_feature_schema,
|
|
|
|
|
output_names_or_num=['values', 'indices'],
|
|
|
|
|
k=k,
|
|
|
|
|
)
|
|
|
|
|
self.assertEqual(2, len(topk.field_types()))
|
|
|
|
|
self.assertEqual(np.float32, topk.field_types()[0].base)
|
|
|
|
|
self.assertEqual((k,), topk.field_types()[0].shape)
|
|
|
|
|
self.assertEqual(np.int32, topk.field_types()[1].base)
|
|
|
|
|
self.assertEqual((k,), topk.field_types()[1].shape)
|
|
|
|
|
self.assertEqual(['TopK/values', 'TopK/indices'], topk.field_blobs())
|
2017-04-18 23:23:31 +00:00
|
|
|
|
2017-08-18 00:32:15 +00:00
|
|
|
def testFunctionalLayerSameOperatorOutputNames(self):
|
|
|
|
|
Con1 = self.model.ConstantFill([], 1, value=1)
|
|
|
|
|
Con2 = self.model.ConstantFill([], 1, value=2)
|
|
|
|
|
self.assertNotEqual(str(Con1), str(Con2))
|
|
|
|
|
|
2017-04-18 23:23:31 +00:00
|
|
|
def testFunctionalLayerWithOutputDtypes(self):
|
|
|
|
|
loss = self.model.AveragedLoss(
|
|
|
|
|
self.model.input_feature_schema,
|
|
|
|
|
1,
|
|
|
|
|
output_dtypes=(np.float32, (1,)),
|
|
|
|
|
)
|
|
|
|
|
self.assertEqual(1, len(loss.field_types()))
|
|
|
|
|
self.assertEqual(np.float32, loss.field_types()[0].base)
|
|
|
|
|
self.assertEqual((1,), loss.field_types()[0].shape)
|
2017-05-02 17:44:33 +00:00
|
|
|
|
|
|
|
|
def testPropagateRequestOnly(self):
|
|
|
|
|
# test case when output is request only
|
|
|
|
|
input_record = self.new_record(schema.Struct(
|
|
|
|
|
('input1', schema.Scalar((np.float32, (32, )))),
|
|
|
|
|
('input2', schema.Scalar((np.float32, (64, )))),
|
|
|
|
|
('input3', schema.Scalar((np.float32, (16, )))),
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
set_request_only(input_record)
|
|
|
|
|
concat_output = self.model.Concat(input_record)
|
|
|
|
|
self.assertEqual(is_request_only_scalar(concat_output), True)
|
|
|
|
|
|
|
|
|
|
# test case when output is not request only
|
|
|
|
|
input_record2 = self.new_record(schema.Struct(
|
|
|
|
|
('input4', schema.Scalar((np.float32, (100, ))))
|
|
|
|
|
)) + input_record
|
|
|
|
|
|
|
|
|
|
concat_output2 = self.model.Concat(input_record2)
|
|
|
|
|
self.assertEqual(is_request_only_scalar(concat_output2), False)
|
|
|
|
|
|
|
|
|
|
def testSetRequestOnly(self):
|
|
|
|
|
input_record = schema.Scalar(np.int64)
|
|
|
|
|
schema.attach_metadata_to_scalars(
|
|
|
|
|
input_record,
|
|
|
|
|
schema.Metadata(
|
|
|
|
|
categorical_limit=100000000,
|
|
|
|
|
expected_value=99,
|
|
|
|
|
feature_specs=schema.FeatureSpec(
|
|
|
|
|
feature_ids=[1, 100, 1001]
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
set_request_only(input_record)
|
|
|
|
|
self.assertEqual(input_record.metadata.categorical_limit, 100000000)
|
|
|
|
|
self.assertEqual(input_record.metadata.expected_value, 99)
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
input_record.metadata.feature_specs.feature_ids,
|
|
|
|
|
[1, 100, 1001]
|
|
|
|
|
)
|
2017-06-19 21:43:43 +00:00
|
|
|
|
|
|
|
|
@given(
|
|
|
|
|
X=hu.arrays(dims=[5, 5]), # Shape of X is irrelevant
|
2019-03-01 07:17:35 +00:00
|
|
|
dropout_for_eval=st.booleans(),
|
2017-06-19 21:43:43 +00:00
|
|
|
)
|
2019-03-01 07:17:35 +00:00
|
|
|
def testDropout(self, X, dropout_for_eval):
|
2017-06-19 21:43:43 +00:00
|
|
|
input_record = self.new_record(schema.Scalar((np.float32, (1,))))
|
|
|
|
|
schema.FeedRecord(input_record, [X])
|
2019-03-01 07:17:35 +00:00
|
|
|
d_output = self.model.Dropout(
|
|
|
|
|
input_record,
|
|
|
|
|
dropout_for_eval=dropout_for_eval
|
|
|
|
|
)
|
2017-06-19 21:43:43 +00:00
|
|
|
self.assertEqual(schema.Scalar((np.float32, (1,))), d_output)
|
|
|
|
|
self.model.output_schema = schema.Struct()
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
|
|
|
|
input_blob = input_record.field_blobs()[0]
|
|
|
|
|
output_blob = d_output.field_blobs()[0]
|
|
|
|
|
|
2019-03-01 07:17:35 +00:00
|
|
|
with_d_spec = OpSpec(
|
2017-06-19 21:43:43 +00:00
|
|
|
"Dropout",
|
|
|
|
|
[input_blob],
|
|
|
|
|
[output_blob, None],
|
|
|
|
|
{'is_test': 0, 'ratio': 0.5}
|
|
|
|
|
)
|
|
|
|
|
|
2019-03-01 07:17:35 +00:00
|
|
|
without_d_spec = OpSpec(
|
2017-06-19 21:43:43 +00:00
|
|
|
"Dropout",
|
|
|
|
|
[input_blob],
|
|
|
|
|
[output_blob, None],
|
|
|
|
|
{'is_test': 1, 'ratio': 0.5}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertNetContainOps(
|
|
|
|
|
train_net,
|
2019-03-01 07:17:35 +00:00
|
|
|
[with_d_spec]
|
2017-06-19 21:43:43 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
eval_net = self.get_eval_net()
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
|
2019-03-01 07:17:35 +00:00
|
|
|
if dropout_for_eval:
|
|
|
|
|
self.assertNetContainOps(
|
|
|
|
|
eval_net,
|
|
|
|
|
[with_d_spec]
|
|
|
|
|
)
|
|
|
|
|
self.assertNetContainOps(
|
|
|
|
|
predict_net,
|
|
|
|
|
[with_d_spec]
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
self.assertNetContainOps(
|
|
|
|
|
eval_net,
|
|
|
|
|
[without_d_spec]
|
|
|
|
|
)
|
|
|
|
|
self.assertNetContainOps(
|
|
|
|
|
predict_net,
|
|
|
|
|
[without_d_spec]
|
|
|
|
|
)
|
2017-06-19 21:43:43 +00:00
|
|
|
|
|
|
|
|
workspace.RunNetOnce(train_init_net)
|
|
|
|
|
workspace.RunNetOnce(train_net)
|
|
|
|
|
|
|
|
|
|
schema.FeedRecord(input_record, [X])
|
|
|
|
|
workspace.RunNetOnce(eval_net)
|
|
|
|
|
|
|
|
|
|
schema.FeedRecord(input_record, [X])
|
|
|
|
|
workspace.RunNetOnce(predict_net)
|
2017-07-05 06:46:00 +00:00
|
|
|
|
2017-08-22 23:54:00 +00:00
|
|
|
@given(
|
|
|
|
|
num_inputs=st.integers(1, 3),
|
|
|
|
|
batch_size=st.integers(5, 10)
|
|
|
|
|
)
|
|
|
|
|
def testMergeIdListsLayer(self, num_inputs, batch_size):
|
|
|
|
|
inputs = []
|
|
|
|
|
for _ in range(num_inputs):
|
|
|
|
|
lengths = np.random.randint(5, size=batch_size).astype(np.int32)
|
|
|
|
|
size = lengths.sum()
|
|
|
|
|
values = np.random.randint(1, 10, size=size).astype(np.int64)
|
|
|
|
|
inputs.append(lengths)
|
|
|
|
|
inputs.append(values)
|
|
|
|
|
input_schema = schema.Tuple(
|
|
|
|
|
*[schema.List(
|
|
|
|
|
schema.Scalar(dtype=np.int64, metadata=schema.Metadata(
|
|
|
|
|
categorical_limit=20
|
|
|
|
|
))) for _ in range(num_inputs)]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
input_record = schema.NewRecord(self.model.net, input_schema)
|
|
|
|
|
schema.FeedRecord(input_record, inputs)
|
|
|
|
|
output_schema = self.model.MergeIdLists(input_record)
|
|
|
|
|
assert schema.equal_schemas(
|
|
|
|
|
output_schema, IdList,
|
|
|
|
|
check_field_names=False)
|
|
|
|
|
|
2017-07-05 06:46:00 +00:00
|
|
|
@given(
|
|
|
|
|
batch_size=st.integers(min_value=2, max_value=10),
|
|
|
|
|
input_dims=st.integers(min_value=5, max_value=10),
|
|
|
|
|
output_dims=st.integers(min_value=5, max_value=10),
|
|
|
|
|
bandwidth=st.floats(min_value=0.1, max_value=5),
|
|
|
|
|
)
|
|
|
|
|
def testRandomFourierFeatures(self, batch_size, input_dims, output_dims, bandwidth):
|
2017-07-27 22:10:38 +00:00
|
|
|
|
|
|
|
|
def _rff_hypothesis_test(rff_output, X, W, b, scale):
|
2019-07-23 21:24:21 +00:00
|
|
|
'''
|
2017-07-27 22:10:38 +00:00
|
|
|
Runs hypothesis test for Semi Random Features layer.
|
|
|
|
|
|
|
|
|
|
Inputs:
|
|
|
|
|
rff_output -- output of net after running random fourier features layer
|
|
|
|
|
X -- input data
|
|
|
|
|
W -- weight parameter from train_init_net
|
|
|
|
|
b -- bias parameter from train_init_net
|
|
|
|
|
scale -- value by which to scale the output vector
|
2019-07-23 21:24:21 +00:00
|
|
|
'''
|
2017-07-27 22:10:38 +00:00
|
|
|
output = workspace.FetchBlob(rff_output)
|
|
|
|
|
output_ref = scale * np.cos(np.dot(X, np.transpose(W)) + b)
|
2017-09-23 06:24:00 +00:00
|
|
|
npt.assert_allclose(output, output_ref, rtol=1e-3, atol=1e-3)
|
2017-07-27 22:10:38 +00:00
|
|
|
|
2017-07-05 06:46:00 +00:00
|
|
|
X = np.random.random((batch_size, input_dims)).astype(np.float32)
|
|
|
|
|
scale = np.sqrt(2.0 / output_dims)
|
|
|
|
|
input_record = self.new_record(schema.Scalar((np.float32, (input_dims,))))
|
|
|
|
|
schema.FeedRecord(input_record, [X])
|
|
|
|
|
input_blob = input_record.field_blobs()[0]
|
|
|
|
|
rff_output = self.model.RandomFourierFeatures(input_record,
|
|
|
|
|
output_dims,
|
|
|
|
|
bandwidth)
|
|
|
|
|
self.model.output_schema = schema.Struct()
|
|
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (output_dims, ))),
|
|
|
|
|
rff_output
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
|
|
|
|
# Init net assertions
|
2017-07-11 07:29:41 +00:00
|
|
|
init_ops_list = [
|
|
|
|
|
OpSpec("GaussianFill", None, None),
|
|
|
|
|
OpSpec("UniformFill", None, None),
|
|
|
|
|
]
|
|
|
|
|
init_ops = self._test_net(train_init_net, init_ops_list)
|
|
|
|
|
W = workspace.FetchBlob(self.model.layers[0].w)
|
|
|
|
|
b = workspace.FetchBlob(self.model.layers[0].b)
|
2017-07-05 06:46:00 +00:00
|
|
|
|
|
|
|
|
# Operation specifications
|
2017-07-11 07:29:41 +00:00
|
|
|
fc_spec = OpSpec("FC", [input_blob, init_ops[0].output[0],
|
|
|
|
|
init_ops[1].output[0]], None)
|
2017-07-05 06:46:00 +00:00
|
|
|
cosine_spec = OpSpec("Cos", None, None)
|
|
|
|
|
scale_spec = OpSpec("Scale", None, rff_output.field_blobs(),
|
|
|
|
|
{'scale': scale})
|
2017-07-11 07:29:41 +00:00
|
|
|
ops_list = [
|
|
|
|
|
fc_spec,
|
|
|
|
|
cosine_spec,
|
|
|
|
|
scale_spec
|
|
|
|
|
]
|
2017-07-05 06:46:00 +00:00
|
|
|
|
|
|
|
|
# Train net assertions
|
2017-07-11 07:29:41 +00:00
|
|
|
self._test_net(train_net, ops_list)
|
2017-07-27 22:10:38 +00:00
|
|
|
_rff_hypothesis_test(rff_output(), X, W, b, scale)
|
2017-07-05 06:46:00 +00:00
|
|
|
|
|
|
|
|
# Eval net assertions
|
|
|
|
|
eval_net = self.get_eval_net()
|
2017-07-11 07:29:41 +00:00
|
|
|
self._test_net(eval_net, ops_list)
|
2017-07-27 22:10:38 +00:00
|
|
|
_rff_hypothesis_test(rff_output(), X, W, b, scale)
|
2017-07-05 06:46:00 +00:00
|
|
|
|
|
|
|
|
# Predict net assertions
|
|
|
|
|
predict_net = self.get_predict_net()
|
2017-07-11 07:29:41 +00:00
|
|
|
self._test_net(predict_net, ops_list)
|
2017-07-27 22:10:38 +00:00
|
|
|
_rff_hypothesis_test(rff_output(), X, W, b, scale)
|
2017-07-10 16:55:12 +00:00
|
|
|
|
|
|
|
|
@given(
|
|
|
|
|
batch_size=st.integers(min_value=2, max_value=10),
|
|
|
|
|
input_dims=st.integers(min_value=5, max_value=10),
|
|
|
|
|
output_dims=st.integers(min_value=5, max_value=10),
|
|
|
|
|
s=st.integers(min_value=0, max_value=3),
|
2017-07-26 23:33:04 +00:00
|
|
|
scale=st.floats(min_value=0.1, max_value=5),
|
|
|
|
|
set_weight_as_global_constant=st.booleans()
|
2017-07-10 16:55:12 +00:00
|
|
|
)
|
2017-07-26 23:33:04 +00:00
|
|
|
def testArcCosineFeatureMap(self, batch_size, input_dims, output_dims, s, scale,
|
|
|
|
|
set_weight_as_global_constant):
|
2017-07-27 22:10:38 +00:00
|
|
|
|
|
|
|
|
def _arc_cosine_hypothesis_test(ac_output, X, W, b, s):
|
2019-07-23 21:24:21 +00:00
|
|
|
'''
|
2017-07-27 22:10:38 +00:00
|
|
|
Runs hypothesis test for Arc Cosine layer.
|
|
|
|
|
|
|
|
|
|
Inputs:
|
|
|
|
|
ac_output -- output of net after running arc cosine layer
|
|
|
|
|
X -- input data
|
|
|
|
|
W -- weight parameter from train_init_net
|
|
|
|
|
b -- bias parameter from train_init_net
|
|
|
|
|
s -- degree parameter
|
2019-07-23 21:24:21 +00:00
|
|
|
'''
|
2017-07-27 22:10:38 +00:00
|
|
|
# Get output from net
|
|
|
|
|
net_output = workspace.FetchBlob(ac_output)
|
|
|
|
|
|
|
|
|
|
# Computing output directly
|
|
|
|
|
x_rand = np.matmul(X, np.transpose(W)) + b
|
|
|
|
|
x_pow = np.power(x_rand, s)
|
2017-08-04 06:29:09 +00:00
|
|
|
if s > 0:
|
|
|
|
|
h_rand_features = np.piecewise(x_rand,
|
|
|
|
|
[x_rand <= 0, x_rand > 0],
|
|
|
|
|
[0, 1])
|
|
|
|
|
else:
|
|
|
|
|
h_rand_features = np.piecewise(x_rand,
|
|
|
|
|
[x_rand <= 0, x_rand > 0],
|
|
|
|
|
[0, lambda x: x / (1 + x)])
|
2017-07-27 22:10:38 +00:00
|
|
|
output_ref = np.multiply(x_pow, h_rand_features)
|
|
|
|
|
|
|
|
|
|
# Comparing net output and computed output
|
2017-09-23 06:24:00 +00:00
|
|
|
npt.assert_allclose(net_output, output_ref, rtol=1e-3, atol=1e-3)
|
2017-07-27 22:10:38 +00:00
|
|
|
|
2017-07-10 16:55:12 +00:00
|
|
|
X = np.random.normal(size=(batch_size, input_dims)).astype(np.float32)
|
|
|
|
|
input_record = self.new_record(schema.Scalar((np.float32, (input_dims,))))
|
|
|
|
|
schema.FeedRecord(input_record, [X])
|
|
|
|
|
input_blob = input_record.field_blobs()[0]
|
|
|
|
|
|
2017-07-26 23:33:04 +00:00
|
|
|
ac_output = self.model.ArcCosineFeatureMap(
|
|
|
|
|
input_record,
|
|
|
|
|
output_dims,
|
|
|
|
|
s=s,
|
|
|
|
|
scale=scale,
|
|
|
|
|
set_weight_as_global_constant=set_weight_as_global_constant
|
|
|
|
|
)
|
2017-07-10 16:55:12 +00:00
|
|
|
self.model.output_schema = schema.Struct()
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (output_dims, ))),
|
|
|
|
|
ac_output
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
2017-07-26 23:33:04 +00:00
|
|
|
# Run create_init_net to initialize the global constants, and W and b
|
|
|
|
|
workspace.RunNetOnce(train_init_net)
|
|
|
|
|
workspace.RunNetOnce(self.model.create_init_net(name='init_net'))
|
|
|
|
|
|
|
|
|
|
if set_weight_as_global_constant:
|
|
|
|
|
W = workspace.FetchBlob(
|
|
|
|
|
self.model.global_constants['arc_cosine_feature_map_fixed_rand_W']
|
|
|
|
|
)
|
|
|
|
|
b = workspace.FetchBlob(
|
|
|
|
|
self.model.global_constants['arc_cosine_feature_map_fixed_rand_b']
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
W = workspace.FetchBlob(self.model.layers[0].random_w)
|
|
|
|
|
b = workspace.FetchBlob(self.model.layers[0].random_b)
|
2017-07-10 16:55:12 +00:00
|
|
|
|
|
|
|
|
# Operation specifications
|
2017-07-26 23:33:04 +00:00
|
|
|
fc_spec = OpSpec("FC", [input_blob, None, None], None)
|
2017-08-04 06:29:09 +00:00
|
|
|
softsign_spec = OpSpec("Softsign", None, None)
|
2017-07-10 16:55:12 +00:00
|
|
|
relu_spec = OpSpec("Relu", None, None)
|
|
|
|
|
relu_spec_output = OpSpec("Relu", None, ac_output.field_blobs())
|
|
|
|
|
pow_spec = OpSpec("Pow", None, None, {'exponent': float(s - 1)})
|
|
|
|
|
mul_spec = OpSpec("Mul", None, ac_output.field_blobs())
|
|
|
|
|
|
|
|
|
|
if s == 0:
|
|
|
|
|
ops_list = [
|
|
|
|
|
fc_spec,
|
2017-08-04 06:29:09 +00:00
|
|
|
softsign_spec,
|
|
|
|
|
relu_spec_output,
|
2017-07-10 16:55:12 +00:00
|
|
|
]
|
|
|
|
|
elif s == 1:
|
|
|
|
|
ops_list = [
|
|
|
|
|
fc_spec,
|
|
|
|
|
relu_spec_output,
|
|
|
|
|
]
|
|
|
|
|
else:
|
|
|
|
|
ops_list = [
|
|
|
|
|
fc_spec,
|
|
|
|
|
relu_spec,
|
|
|
|
|
pow_spec,
|
|
|
|
|
mul_spec,
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
# Train net assertions
|
|
|
|
|
self._test_net(train_net, ops_list)
|
2017-07-27 22:10:38 +00:00
|
|
|
_arc_cosine_hypothesis_test(ac_output(), X, W, b, s)
|
2017-07-10 16:55:12 +00:00
|
|
|
|
|
|
|
|
# Eval net assertions
|
|
|
|
|
eval_net = self.get_eval_net()
|
|
|
|
|
self._test_net(eval_net, ops_list)
|
2017-07-27 22:10:38 +00:00
|
|
|
_arc_cosine_hypothesis_test(ac_output(), X, W, b, s)
|
2017-07-10 16:55:12 +00:00
|
|
|
|
|
|
|
|
# Predict net assertions
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
self._test_net(predict_net, ops_list)
|
2017-07-27 22:10:38 +00:00
|
|
|
_arc_cosine_hypothesis_test(ac_output(), X, W, b, s)
|
2017-07-14 20:13:14 +00:00
|
|
|
|
|
|
|
|
@given(
|
|
|
|
|
batch_size=st.integers(min_value=2, max_value=10),
|
|
|
|
|
input_dims=st.integers(min_value=5, max_value=10),
|
|
|
|
|
output_dims=st.integers(min_value=5, max_value=10),
|
|
|
|
|
s=st.integers(min_value=0, max_value=3),
|
2017-07-26 23:33:04 +00:00
|
|
|
scale=st.floats(min_value=0.1, max_value=5),
|
|
|
|
|
set_weight_as_global_constant=st.booleans(),
|
2017-07-27 22:10:38 +00:00
|
|
|
use_struct_input=st.booleans(),
|
2017-07-14 20:13:14 +00:00
|
|
|
)
|
2017-07-26 23:33:04 +00:00
|
|
|
def testSemiRandomFeatures(self, batch_size, input_dims, output_dims, s, scale,
|
2017-07-27 22:10:38 +00:00
|
|
|
set_weight_as_global_constant, use_struct_input):
|
|
|
|
|
|
|
|
|
|
def _semi_random_hypothesis_test(srf_output, X_full, X_random, rand_w,
|
|
|
|
|
rand_b, s):
|
2019-07-23 21:24:21 +00:00
|
|
|
'''
|
2017-07-27 22:10:38 +00:00
|
|
|
Runs hypothesis test for Semi Random Features layer.
|
|
|
|
|
|
|
|
|
|
Inputs:
|
|
|
|
|
srf_output -- output of net after running semi random features layer
|
|
|
|
|
X_full -- full input data
|
|
|
|
|
X_random -- random-output input data
|
|
|
|
|
rand_w -- random-initialized weight parameter from train_init_net
|
|
|
|
|
rand_b -- random-initialized bias parameter from train_init_net
|
|
|
|
|
s -- degree parameter
|
|
|
|
|
|
2019-07-23 21:24:21 +00:00
|
|
|
'''
|
2017-07-27 22:10:38 +00:00
|
|
|
# Get output from net
|
|
|
|
|
net_output = workspace.FetchBlob(srf_output)
|
|
|
|
|
|
|
|
|
|
# Fetch learned parameter blobs
|
|
|
|
|
learned_w = workspace.FetchBlob(self.model.layers[0].learned_w)
|
|
|
|
|
learned_b = workspace.FetchBlob(self.model.layers[0].learned_b)
|
|
|
|
|
|
|
|
|
|
# Computing output directly
|
|
|
|
|
x_rand = np.matmul(X_random, np.transpose(rand_w)) + rand_b
|
|
|
|
|
x_learn = np.matmul(X_full, np.transpose(learned_w)) + learned_b
|
|
|
|
|
x_pow = np.power(x_rand, s)
|
2017-08-04 06:29:09 +00:00
|
|
|
if s > 0:
|
|
|
|
|
h_rand_features = np.piecewise(x_rand,
|
|
|
|
|
[x_rand <= 0, x_rand > 0],
|
|
|
|
|
[0, 1])
|
|
|
|
|
else:
|
|
|
|
|
h_rand_features = np.piecewise(x_rand,
|
|
|
|
|
[x_rand <= 0, x_rand > 0],
|
|
|
|
|
[0, lambda x: x / (1 + x)])
|
2017-07-27 22:10:38 +00:00
|
|
|
output_ref = np.multiply(np.multiply(x_pow, h_rand_features), x_learn)
|
|
|
|
|
|
|
|
|
|
# Comparing net output and computed output
|
2017-09-23 06:24:00 +00:00
|
|
|
npt.assert_allclose(net_output, output_ref, rtol=1e-3, atol=1e-3)
|
2017-07-27 22:10:38 +00:00
|
|
|
|
|
|
|
|
X_full = np.random.normal(size=(batch_size, input_dims)).astype(np.float32)
|
|
|
|
|
if use_struct_input:
|
|
|
|
|
X_random = np.random.normal(size=(batch_size, input_dims)).\
|
|
|
|
|
astype(np.float32)
|
|
|
|
|
input_data = [X_full, X_random]
|
|
|
|
|
input_record = self.new_record(schema.Struct(
|
|
|
|
|
('full', schema.Scalar(
|
|
|
|
|
(np.float32, (input_dims,))
|
|
|
|
|
)),
|
|
|
|
|
('random', schema.Scalar(
|
|
|
|
|
(np.float32, (input_dims,))
|
|
|
|
|
))
|
|
|
|
|
))
|
|
|
|
|
else:
|
|
|
|
|
X_random = X_full
|
|
|
|
|
input_data = [X_full]
|
|
|
|
|
input_record = self.new_record(schema.Scalar(
|
|
|
|
|
(np.float32, (input_dims,))
|
|
|
|
|
))
|
2017-07-14 20:13:14 +00:00
|
|
|
|
2017-07-27 22:10:38 +00:00
|
|
|
schema.FeedRecord(input_record, input_data)
|
2017-07-26 23:33:04 +00:00
|
|
|
srf_output = self.model.SemiRandomFeatures(
|
|
|
|
|
input_record,
|
|
|
|
|
output_dims,
|
|
|
|
|
s=s,
|
2017-08-07 19:45:55 +00:00
|
|
|
scale_random=scale,
|
|
|
|
|
scale_learned=scale,
|
2017-07-26 23:33:04 +00:00
|
|
|
set_weight_as_global_constant=set_weight_as_global_constant
|
|
|
|
|
)
|
2017-07-27 22:10:38 +00:00
|
|
|
|
2017-07-14 20:13:14 +00:00
|
|
|
self.model.output_schema = schema.Struct()
|
2017-07-27 22:10:38 +00:00
|
|
|
|
2017-07-14 20:13:14 +00:00
|
|
|
self.assertEqual(
|
2017-07-27 22:10:38 +00:00
|
|
|
schema.Struct(
|
|
|
|
|
('full', schema.Scalar(
|
|
|
|
|
(np.float32, (output_dims,))
|
|
|
|
|
)),
|
|
|
|
|
('random', schema.Scalar(
|
|
|
|
|
(np.float32, (output_dims,))
|
|
|
|
|
))
|
|
|
|
|
),
|
2017-07-14 20:13:14 +00:00
|
|
|
srf_output
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
init_ops_list = [
|
|
|
|
|
OpSpec("GaussianFill", None, None),
|
|
|
|
|
OpSpec("UniformFill", None, None),
|
|
|
|
|
OpSpec("GaussianFill", None, None),
|
|
|
|
|
OpSpec("UniformFill", None, None),
|
|
|
|
|
]
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
|
|
|
|
# Need to run to initialize the global constants for layer
|
2017-07-26 23:33:04 +00:00
|
|
|
workspace.RunNetOnce(self.model.create_init_net(name='init_net'))
|
2017-07-14 20:13:14 +00:00
|
|
|
|
2017-07-26 23:33:04 +00:00
|
|
|
if set_weight_as_global_constant:
|
|
|
|
|
# If weight params are global constants, they won't be in train_init_net
|
|
|
|
|
init_ops = self._test_net(train_init_net, init_ops_list[:2])
|
|
|
|
|
rand_w = workspace.FetchBlob(
|
|
|
|
|
self.model.global_constants['semi_random_features_fixed_rand_W']
|
|
|
|
|
)
|
|
|
|
|
rand_b = workspace.FetchBlob(
|
|
|
|
|
self.model.global_constants['semi_random_features_fixed_rand_b']
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Operation specifications
|
2017-07-27 22:10:38 +00:00
|
|
|
fc_random_spec = OpSpec("FC", [None, None, None], None)
|
|
|
|
|
fc_learned_spec = OpSpec("FC", [None, init_ops[0].output[0],
|
2017-07-26 23:33:04 +00:00
|
|
|
init_ops[1].output[0]], None)
|
|
|
|
|
else:
|
|
|
|
|
init_ops = self._test_net(train_init_net, init_ops_list)
|
|
|
|
|
rand_w = workspace.FetchBlob(self.model.layers[0].random_w)
|
|
|
|
|
rand_b = workspace.FetchBlob(self.model.layers[0].random_b)
|
|
|
|
|
|
|
|
|
|
# Operation specifications
|
2017-07-27 22:10:38 +00:00
|
|
|
fc_random_spec = OpSpec("FC", [None, init_ops[0].output[0],
|
2017-07-26 23:33:04 +00:00
|
|
|
init_ops[1].output[0]], None)
|
2017-07-27 22:10:38 +00:00
|
|
|
fc_learned_spec = OpSpec("FC", [None, init_ops[2].output[0],
|
2017-07-26 23:33:04 +00:00
|
|
|
init_ops[3].output[0]], None)
|
2017-07-14 20:13:14 +00:00
|
|
|
|
2017-08-04 06:29:09 +00:00
|
|
|
softsign_spec = OpSpec("Softsign", None, None)
|
2017-07-14 20:13:14 +00:00
|
|
|
relu_spec = OpSpec("Relu", None, None)
|
2017-08-04 06:29:09 +00:00
|
|
|
relu_output_spec = OpSpec("Relu", None, srf_output.random.field_blobs())
|
2017-07-14 20:13:14 +00:00
|
|
|
pow_spec = OpSpec("Pow", None, None, {'exponent': float(s - 1)})
|
2017-08-04 06:29:09 +00:00
|
|
|
mul_interim_spec = OpSpec("Mul", None, srf_output.random.field_blobs())
|
2017-07-27 22:10:38 +00:00
|
|
|
mul_spec = OpSpec("Mul", None, srf_output.full.field_blobs())
|
2017-07-14 20:13:14 +00:00
|
|
|
|
|
|
|
|
if s == 0:
|
|
|
|
|
ops_list = [
|
|
|
|
|
fc_learned_spec,
|
2017-07-27 22:10:38 +00:00
|
|
|
fc_random_spec,
|
2017-08-04 06:29:09 +00:00
|
|
|
softsign_spec,
|
|
|
|
|
relu_output_spec,
|
2017-07-14 20:13:14 +00:00
|
|
|
mul_spec,
|
|
|
|
|
]
|
|
|
|
|
elif s == 1:
|
|
|
|
|
ops_list = [
|
|
|
|
|
fc_learned_spec,
|
2017-07-27 22:10:38 +00:00
|
|
|
fc_random_spec,
|
2017-08-04 06:29:09 +00:00
|
|
|
relu_output_spec,
|
2017-07-14 20:13:14 +00:00
|
|
|
mul_spec,
|
|
|
|
|
]
|
|
|
|
|
else:
|
|
|
|
|
ops_list = [
|
|
|
|
|
fc_learned_spec,
|
2017-07-27 22:10:38 +00:00
|
|
|
fc_random_spec,
|
2017-07-14 20:13:14 +00:00
|
|
|
relu_spec,
|
|
|
|
|
pow_spec,
|
|
|
|
|
mul_interim_spec,
|
|
|
|
|
mul_spec,
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
# Train net assertions
|
|
|
|
|
self._test_net(train_net, ops_list)
|
2017-07-27 22:10:38 +00:00
|
|
|
_semi_random_hypothesis_test(srf_output.full(), X_full, X_random,
|
|
|
|
|
rand_w, rand_b, s)
|
2017-07-14 20:13:14 +00:00
|
|
|
|
|
|
|
|
# Eval net assertions
|
|
|
|
|
eval_net = self.get_eval_net()
|
|
|
|
|
self._test_net(eval_net, ops_list)
|
2017-07-27 22:10:38 +00:00
|
|
|
_semi_random_hypothesis_test(srf_output.full(), X_full, X_random,
|
|
|
|
|
rand_w, rand_b, s)
|
2017-07-14 20:13:14 +00:00
|
|
|
|
|
|
|
|
# Predict net assertions
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
self._test_net(predict_net, ops_list)
|
2017-07-27 22:10:38 +00:00
|
|
|
_semi_random_hypothesis_test(srf_output.full(), X_full, X_random,
|
|
|
|
|
rand_w, rand_b, s)
|
2017-08-08 17:40:27 +00:00
|
|
|
|
|
|
|
|
def testConv(self):
|
|
|
|
|
batch_size = 50
|
|
|
|
|
H = 1
|
|
|
|
|
W = 10
|
|
|
|
|
C = 50
|
|
|
|
|
output_dims = 32
|
|
|
|
|
kernel_h = 1
|
|
|
|
|
kernel_w = 3
|
|
|
|
|
stride_h = 1
|
|
|
|
|
stride_w = 1
|
|
|
|
|
pad_t = 0
|
|
|
|
|
pad_b = 0
|
|
|
|
|
pad_r = None
|
|
|
|
|
pad_l = None
|
|
|
|
|
|
|
|
|
|
input_record = self.new_record(schema.Scalar((np.float32, (H, W, C))))
|
|
|
|
|
X = np.random.random((batch_size, H, W, C)).astype(np.float32)
|
|
|
|
|
schema.FeedRecord(input_record, [X])
|
|
|
|
|
conv = self.model.Conv(
|
|
|
|
|
input_record,
|
|
|
|
|
output_dims,
|
|
|
|
|
kernel_h=kernel_h,
|
|
|
|
|
kernel_w=kernel_w,
|
|
|
|
|
stride_h=stride_h,
|
|
|
|
|
stride_w=stride_w,
|
|
|
|
|
pad_t=pad_t,
|
|
|
|
|
pad_b=pad_b,
|
|
|
|
|
pad_r=pad_r,
|
|
|
|
|
pad_l=pad_l,
|
|
|
|
|
order='NHWC'
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (output_dims,))),
|
|
|
|
|
conv
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.run_train_net_forward_only()
|
|
|
|
|
output_record = schema.FetchRecord(conv)
|
|
|
|
|
# check the number of output channels is the same as input in this example
|
|
|
|
|
assert output_record.field_types()[0].shape == (H, W, output_dims)
|
|
|
|
|
assert output_record().shape == (batch_size, H, W, output_dims)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
# Init net assertions
|
|
|
|
|
init_ops = self.assertNetContainOps(
|
|
|
|
|
train_init_net,
|
|
|
|
|
[
|
|
|
|
|
OpSpec("XavierFill", None, None),
|
|
|
|
|
OpSpec("ConstantFill", None, None),
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
conv_spec = OpSpec(
|
|
|
|
|
"Conv",
|
|
|
|
|
[
|
|
|
|
|
input_record.field_blobs()[0],
|
|
|
|
|
init_ops[0].output[0],
|
|
|
|
|
init_ops[1].output[0],
|
|
|
|
|
],
|
|
|
|
|
conv.field_blobs()
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Train net assertions
|
|
|
|
|
self.assertNetContainOps(train_net, [conv_spec])
|
|
|
|
|
|
|
|
|
|
# Predict net assertions
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
self.assertNetContainOps(predict_net, [conv_spec])
|
|
|
|
|
|
|
|
|
|
# Eval net assertions
|
|
|
|
|
eval_net = self.get_eval_net()
|
|
|
|
|
self.assertNetContainOps(eval_net, [conv_spec])
|
2018-03-20 04:42:56 +00:00
|
|
|
|
|
|
|
|
@given(
|
|
|
|
|
num=st.integers(min_value=10, max_value=100),
|
|
|
|
|
feed_weight=st.booleans(),
|
2018-06-27 11:50:56 +00:00
|
|
|
use_inv_var_parameterization=st.booleans(),
|
|
|
|
|
use_log_barrier=st.booleans(),
|
|
|
|
|
enable_diagnose=st.booleans(),
|
2018-03-20 04:42:56 +00:00
|
|
|
**hu.gcs
|
|
|
|
|
)
|
2020-08-08 19:10:52 +00:00
|
|
|
@settings(deadline=1000)
|
2018-06-27 11:50:56 +00:00
|
|
|
def testAdaptiveWeight(
|
|
|
|
|
self, num, feed_weight, use_inv_var_parameterization, use_log_barrier,
|
|
|
|
|
enable_diagnose, gc, dc
|
|
|
|
|
):
|
2018-03-20 04:42:56 +00:00
|
|
|
input_record = self.new_record(schema.RawTuple(num))
|
|
|
|
|
data = np.random.random(num)
|
|
|
|
|
schema.FeedRecord(
|
2018-06-27 11:50:56 +00:00
|
|
|
input_record, [np.array(x).astype(np.float32) for x in data]
|
2018-03-20 04:42:56 +00:00
|
|
|
)
|
|
|
|
|
weights = np.random.random(num) if feed_weight else None
|
2018-06-27 11:50:56 +00:00
|
|
|
result = self.model.AdaptiveWeight(
|
|
|
|
|
input_record,
|
|
|
|
|
weights=weights,
|
|
|
|
|
estimation_method=(
|
|
|
|
|
'inv_var' if use_inv_var_parameterization else 'log_std'
|
|
|
|
|
),
|
|
|
|
|
pos_optim_method=(
|
|
|
|
|
'log_barrier' if use_log_barrier else 'pos_grad_proj'
|
|
|
|
|
),
|
|
|
|
|
enable_diagnose=enable_diagnose
|
|
|
|
|
)
|
2018-03-20 04:42:56 +00:00
|
|
|
train_init_net, train_net = self.get_training_nets(True)
|
|
|
|
|
workspace.RunNetOnce(train_init_net)
|
|
|
|
|
workspace.RunNetOnce(train_net)
|
|
|
|
|
result = workspace.FetchBlob(result())
|
|
|
|
|
if not feed_weight:
|
2018-06-27 11:50:56 +00:00
|
|
|
weights = np.array([1. / num for _ in range(num)])
|
Update from Facebook (#8887)
* add opencl + fpga context
adds an opencl context inside caffe2/fb which can be used for fpga access
* [Caffe2] Force tensor inference checks to be triggered during testing
We've started to rely on TensorInference functions more for different analysis. This diff ensures that the TensorInference function's result matches what is expected from the definition of the operator.
* Enable building //caffe2:torch with @mode/opt
In @mode/opt, python runs out of a PAR, which breaks a lot of
assumptions in the code about where templates/ folders live relative
to __file__. Rather than introduce hacks with parutil, I simply turn
template_path into a parameter for all the relevant functions and
thread it through from the top level.
* [Caffe2] Fix cost models for DotProduct and Div. Update Tensor Inference for dot product
As title. DotProduct states that output is a 1-D tensor (https://caffe2.ai/docs/operators-catalogue.html#dotproduct) though code suggests it is either 0- or 1-D depending on inputs. TensorInference defined to support implementation.
* [SG-MoE] Add an option to make the experts NOT as components
* [nomnigraph] Rename and fixup convertToNeuralNetOperator API
This will make things a bit cleaner
* no longer symlink THNN.h and THCUNN.h
* forced decoder network (onnx export)
Closes https://github.com/pytorch/translate/pull/95
Add networks in ensemble_export.py to create a forced decoding network from PyTorch NMT checkpoints. This network takes an arbitrary numberized (source, target) pair and returns the model score for the translation, including penalties.
Vocabulary reduction networks are also supported, but note that target indices which are not in the possible_translation_tokens generated for the source input will be trea
* Revert schema change to fix production models
Revert schema change to fix production models
* MockLogDeviceReader - rebase on FIX
# Goal
1), Build a make_mock_log_device_reader using make_mock_reader
2), Replace the real log_device_reader here: https://fburl.com/raihwf1p
# Log by D8151734
Real log_device_reader:
```
I0529 20:29:05.373108 954994 tensor.h:839] Tensor print_net/log of type std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >. Dims: (): read_net/ParseOpenTrainingRow:0
I0529 20:29:05.373244 954994 tensor.h:839] Tensor read_net/ParseOpenTrainin
* [C2/D2][1/n]: Nonnegative-Constrained Optimization -- log barrier
implement log barrier as a regularization method
* Add teacher weight screening.
Add teacher weight sceening according to teacher labels. If teacher label is zero, we do not use the distill loss in the objective function.
* Add NormalizerContext
See task for more detail. This implementation is a copy of what exists for RegularizerContext except for how the parameters are defined in the model_definition thrift file.
I'll try an alternative implementation which overrides the default arguments of functions instead like for argscopes in tensorflow.
https://github.com/pytorch/pytorch/compare/master...MaximeBoucher:update-from-facebook-0939578c068c?expand=1
* Adding cosine similarity option in dot processor
Add pairwise cosine similarity option in dot product.
Add an option to concate dot product and cosine similarity.
Add test cases.
* [nomnigraph][redo] Concat elim for sparseNN
Same as D7962948, which was reverted because Operator Schema was not
defined
* [pytorch] Revert pytorch/pytorch#7918 'Release GIL when copying to shared memory', breaks ASAN
Revert this pytorch diff that breaks ASAN when running Filament in dev mode; in opt mode it gives "bad file descriptor" errors. Looks like a race when copying tensors to shared memory in multiple mp.Queue's (which spawn separate threads).
https://github.com/pytorch/pytorch/pull/7918/files
* [nomnigraph][mobile] Enable nomnigraph by default, use -Oz on nomnigraph related code to reduce code size
enables nomnigraph and reduces codesize
* [Warmup] Allow both offline incremental training and online training
Change plan name on saving side and reading side to support both training type
This diff depends on D8128530 and D8168651.
* Revert D7802642: [Warmup] Allow both offline incremental training and online training
This reverts commit afc213cf9b36cecf75333a788391c4d09f4afccc
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Add legacy grad logic to fix div op on old graphs.
Add legacy grad logic to fix div op on old graphs.
* Correctly propagate operator failures
Propagate errors from operators that throw exceptions and return false
* Revert D8374829: [caffe2][nomnigraph][redo] Concat elim for sparseNN
This reverts commit 6dda028c463e54bb5c32188bbbe9202107e188a5
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [Caffe2] Added extra_info to core.DeviceOption(), enforced extra_info to be inherited in scope.DeviceScope
extra_info is a newly defined field in DeviceOption proto. This diff added extra_info to the core.DeviceOption(). And, In scope.DeviceScope(), this diff enforce the new scope to inherit the extra_info from old scope.
* [opt] hgdirsync wasn't enabled, merge diverged code
Here's the damage, P59732616 basically xplat was left behind but had
the change from assert to CAFFE_ENFORCE
* OMP parallelism over RoIs for RoIAlign op
Simpler to parallelize over RoIs. Shouldn't affect other uses as it relies on
the number of OMP threads set during startup.
PR: https://github.com/pytorch/pytorch/pull/8562
* Use int64_t for shape in FillOps
to avoid overflow of int32
* Implement Rotated RoIAlign op
Based on Rotated RPNs as explained in https://arxiv.org/abs/1703.01086.
The idea is simple - orientation/angle is added as an RPN
anchor parameter and then the angle is further regressed similar to bbox
coords. There are some additional changes related to NMS and IoU, but besides
that it's a direct extension to Faster-RCNN. Further details in https://fb.quip.com/sZHlA1iMfWPZ.
RoIs are represented in [center_x, center_y, width, height, angle] format.
`angle` repre
* Rotated RoIAlign op CUDA forward implementation
CUDA forward impl for D8415490
* RoIAlignRotated op CUDA backward pass implementation
TSIA
* All remaining fixes to eliminate process_github.sh
Most of this diff has already been reviewed separately, except for the parts relating to _thnn/utils.py and _utils._internal.py
remove skipIf(True, 'Fbcode') line from process_github.sh
replace sed of cpp file with #ifdef to control cudnnDestroy use
undo sync-time deletion of .gitattributes, remove process_github.sh
switch to using _utils._internal rather than try-import-except
This diff also fixes the open-source bug where rebuilds have
* Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
Original commit changeset: 7707d2efe60e The original diff is backout becuase the online trainer package is backed out. This code would only work with new online trainer package
* [easy] improve error log in adagrad op
as title
* re-allow use of thnn_h_path
This fixes cffi usage in OSS
* [4/4] [tum] paralyzing layerNorm for GPU full sync
as title
* add compile=False to pytorch tests, remove hack with pyc
* Add shape and type inference for RowWiseArgMax operator
See title
* Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training"
This reverts commit 78167eeef0af16b60f72c82f9dcdda9b41b4dcbd
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [fix-flaky-test] mock_hive_reader_test flaky, because GlobalCounter collects local counts intervally
# Problem
`MockHiveReader` uses `GlobalCounter` to limit `max_examples`.
GlobalCounter on server node collect local counts from worker nodes every 1 sec.
This 1 sec delay makes it impossible to limit exactly to the `max_examples`, it will definitely exceed `max_examples`.
# Plan
Given,
```
Expected num_examples = max_examples + num_examples/sec (Read Speed) x 1 sec (GlobalCounter Sync Int
* [Caffe2] Fix FCGradient cost inference. Prevent overflow in cost inference
FCGradient missed a factor 2 in the `num_outputs == 3` case. Overflow was occurring with flop calculation for FC. Changed types to `uint64_t` to prevent future problems.
* Fix binary ops with empty inputs
Fix binary ops with empty inputs
* Support the filling of input blob with provided data
as title for Biz Integrity case
* Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
Original commit changeset: 30c55dd38816 Original diff is reverted due to introducing bad integration test. Fixed the integration test.
* [c2][easy] improve pack ops error loggings
as desc.
* Add ShapeTypeInference for LpNorm operator
As desc
* Shard test_nn to reduce runtime for each test target
Closes https://github.com/pytorch/pytorch/pull/8793
The current test_nn would time out and be disabled in GreenWarden, and we need to have an option to split it up in order to pass the stress test. Right now GreenWarden roughly allows running 100 test cases in test_nn before timing out, and here we have an option to divide test_nn into 30 shards (with ~40 tests in each shard) to allow for some test suite growth in the future.
* Change default caffe2_streams_per_gpu to 1
* Remove IN_SANDCASTLE from common.py and test_nn.py
We prefer to disable the failing tests through Sandcastle UI instead.
* Add a new class for an updated prof_dag.proto
This diff contains:
- An updated prof_dag.proto that contains blob profiles.
- A class to deserialize this information (serialization is in a follow up diff)
- Update to separate profiling information from NeuralNet (and use it as part of the class above).
- Unit tests
* Lambdarank for SparseNN
This diff adds a lambda_rank_layer for SparseNN.
changes include
1) Adds support for multi sessions in c2 op
2) Adds support for two different loss functions in c2 op
3) Unit tests for op
* Revert D8586950: Back out "Revert D8515341: Back out "Revert D7802642: [Warmup] Allow both offline incremental training and online training""
This reverts commit 012220ed63eccc35659a57b31d16a3625da6317b
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* [easy] A few fixups to multithread predictor benchmark
(1) support perf on T6 server
(2) remove dead code
* fix a bug about the map size
as title
* Fix reduce sum on in-place case.
Fix reduce sum on in-place case.
* [Warmup] Reland reverted diff Allow both offline incremental training and online training
Closes https://github.com/pytorch/pytorch/pull/8827
fix net transform integration test. Allow offline and online trainer to coexist D7802642.
* Add StoreHandlerNotAvailableException
Add an exception for a store that is not available or has been
deleted.
* Use exception handling for fault tolerance, missing KV store
Remove status blobs to communication ops so that exceptions propagate on
failure.
* [C2/D2][2/n]: Nonnegative-Constrained Optimization -- bounded grad proj
for simple bounded constrained optimization, incl non-negative box constraints.
* [GanH]: Adaptive Weighting with More Estimations
With implemented postivity optimization, we now learn adaptive weights with different
parameterizations.
This improves parameter estimation and training stability.
* Revert some changes for landing
* Remove AutoNoGIL in StorageSharing
* Temporarily disable net_tests
* Revert "[Caffe2] Force tensor inference checks to be triggered during testing"
This reverts commit 67ef05c22b2f71b4a489695384932f968384a2a4.
* Revert "Fix reduce sum on in-place case."
This reverts commit 6cb8a8e1b3db7b6d20941b0053e3f3836068eb64.
* Revert "Revert "Fix reduce sum on in-place case.""
This reverts commit 130a257c0893dc09f4bd6e6a45d112261807fd2c.
2018-06-26 21:55:48 +00:00
|
|
|
expected = np.sum(weights * data + 0.5 * np.log(1. / 2. / weights))
|
2018-03-20 04:42:56 +00:00
|
|
|
npt.assert_allclose(expected, result, atol=1e-4, rtol=1e-4)
|
2018-06-27 11:50:56 +00:00
|
|
|
if enable_diagnose:
|
|
|
|
|
assert len(self.model.ad_hoc_plot_blobs) == num
|
|
|
|
|
reconst_weights_from_ad_hoc = np.array(
|
|
|
|
|
[workspace.FetchBlob(b) for b in self.model.ad_hoc_plot_blobs]
|
|
|
|
|
).flatten()
|
|
|
|
|
npt.assert_allclose(
|
|
|
|
|
reconst_weights_from_ad_hoc, weights, atol=1e-4, rtol=1e-4
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
assert len(self.model.ad_hoc_plot_blobs) == 0
|
2018-03-20 04:42:56 +00:00
|
|
|
|
|
|
|
|
@given(num=st.integers(min_value=10, max_value=100), **hu.gcs)
|
|
|
|
|
def testConstantWeight(self, num, gc, dc):
|
|
|
|
|
input_record = self.new_record(schema.RawTuple(num))
|
|
|
|
|
data = np.random.random(num)
|
|
|
|
|
schema.FeedRecord(
|
|
|
|
|
input_record, [np.array(x).astype(np.float32) for x in data]
|
|
|
|
|
)
|
|
|
|
|
weights = np.random.random(num)
|
|
|
|
|
result = self.model.ConstantWeight(input_record, weights=weights)
|
|
|
|
|
train_init_net, train_net = self.get_training_nets(True)
|
|
|
|
|
workspace.RunNetOnce(train_init_net)
|
|
|
|
|
workspace.RunNetOnce(train_net)
|
|
|
|
|
result = workspace.FetchBlob(result())
|
|
|
|
|
expected = np.sum(weights * data)
|
|
|
|
|
npt.assert_allclose(expected, result, atol=1e-4, rtol=1e-4)
|
|
|
|
|
|
|
|
|
|
@given(**hu.gcs)
|
2020-08-11 22:31:36 +00:00
|
|
|
@settings(deadline=10000)
|
2018-03-20 04:42:56 +00:00
|
|
|
def testHomotopyWeight(self, gc, dc):
|
|
|
|
|
input_record = self.new_record(schema.RawTuple(2))
|
|
|
|
|
data = np.random.random(2)
|
|
|
|
|
schema.FeedRecord(
|
|
|
|
|
input_record, [np.array(x).astype(np.float32) for x in data]
|
|
|
|
|
)
|
2018-03-26 18:17:35 +00:00
|
|
|
# ensure: quad_life > 2 * half_life
|
|
|
|
|
half_life = int(np.random.random() * 1e2 + 1)
|
|
|
|
|
quad_life = int(np.random.random() * 1e3 + 2 * half_life + 1)
|
[Caffe2] Changes done inside Facebook (#6378)
* fix unit test for sqrt op
From the error logging:
[idx, grad, grad_estimate] are:
[[ 146. 0.5 0.45776367]
[ 147. 0.5 0.45776367]
The gradient == 0.5 is correct, which means the SqrtOp and its gradient is doing right job. (Because y = sqrt(x), loss = y^2/2 = x/2, and then d(loss)/dx = 1/2 = 0.5; )
The test failed because of numerical problem of grad_estimate (in unit test). It can be because the step_size is small, and float precision is not high (when there are multiple elements in the tensor, we do sum(y^2) to compute loss)
This diff
- increase the step size, and also move the test cases to be further away from 0 (where sqrt(x) is not well defined) to be safe :)
- also clean up, and merge the test case for inplace Vs. non-inplace
Tested with:
`CAFFE2_HYPOTHESIS_PROFILE=debug ai_bt caffe2/caffe2/python/operator_test:elementwise_ops_test -- "test_sqrt"`
* CompositeReader & CompositeReaderBuilder
A new type of reader gluing multiple readers together.
* Back out "Revert D7394363: [GanH]: Log D Trick for Cross Entropy with Sigmoid"
Original commit changeset: 9325a4356dbe
* [dai][WIP] convert params to int8 on ps before sending to trainer
Add float->uint8 conversion in addition to float->fp16 conversion in model_saver.
* [easy] improve unit test for sparse length sum ops
as desc.
#accept2ship
* Update GitHub upstream to 771fcb3455cbfe69c2abcc4cb3bd7ef92d59af24
* move sparse hash unique ops to OOS and add unit tests
- move the SparseHash version to OOS, since 'sparsehash' is already deps of caffe2 OOS: https://fburl.com/arssw4n1
- The 'SparseHash' engine is also being used in OOS, so the SparseHash version shall be in OOS to reduce confusion: https://fburl.com/o5ea7ah2
- fix the CUDA UniqueOp for the case when batch is empty.
- add unit test
* group_norm_op for caffe2
This is the cuda op for Group Normalization (GN): https://arxiv.org/abs/1803.08494
This code implements GN in one op that computes Y=gamma * (X-mu) / sigma + beta and also its gradients. It is expected to have minimal memory consumption (similar to the BN op), without creating new blobs if GN were implemented as several ops (e.g., reshape, norm_mean/std, affine_channel).
* Resubmit D7405233: disappeared in D7464958
OOS publish causes the op missing -- however, test was still there
* [c2] add sparse hash engine for cuda unique op
The SparseHash version of UniqueOp copy input tensor to CPU, and make use of sparse hash map to get unique output, and then copy back to GPU.
* [dper][gpu] enable unit testing gpu trainer for sparse nn
to debug the GPU trainer using mock data in unit test.
make it easier to develop GPU trainer for new models.
* Reuse Gloo context for Synchronize() calls
Previously we were creating (and leaking) the Gloo context on each call to Synchronize(). Now only run the common world op and create the barrier net once, then run the barrier net on each Synchronize() call. Since timeout is associated with the Gloo context, assert that the timeout is fixed instead of trying to handle the complexity of multiple timeouts (and associated contexts).
* [GanH/WGAN][1/n]: add FC param clipping
as titled
* [mobile] minimizing changes between caffe2_benchmark and speed_benchmark
* [GanH]: enable diagnose within model
avoid finding blob names but to directly enable inside the model
* Add `net_transformer_fun` option to DPM
This callback allows for various transformations to be made to the
model after gradient operators have been added. The immediate motivation for
this is to allow transformations such has "checkpoint-and-recompute" which
allow trading off memory for additional compute.
Adding several callbacks like this has made DPM's API less than ideal at this
stage. However, I could not find any reasonable alternative.
* [DT] [33/n] Compile flow task groups
task groups need to compiled in order to pickle the object in fblearner. However I also changed the Job's compile function as creating new object is not necessary.
* Initial commit for sparse_normalize vectorization and benchmark
* [GanH]: LB Calibration for JSD
as titled
* Tracing event in async executor
Adding event tracing through TRACE_EVENT macro in async executor
* [Resubmit] D7409751 Reseting book-keeping blobs when the reservoir is reset
D7409751 got lost in D7464958
* Visualizing realtime weights values
we want to visualize the weights values as optimizer is iterating. This diff supports to visual the weights at an assigned index.
Currently, we assume the blob to be 2 dimensional.
* [GanH][Easy]: Fix Homotopy Weighting
apparantely, there was a bug in homotopy weight (alpha, beta) update
* [c2] move sparse hash unique op out of oss
so that oss do not need to depend on google hash map.
* Get rid of std::round as it's not supported on Android
* Revert changes on setup.py
* Skip shaky test on Dataio
* fix
2018-04-11 04:11:43 +00:00
|
|
|
min_weight = np.random.random()
|
|
|
|
|
max_weight = np.random.random() + min_weight + 1e-5
|
2018-03-20 04:42:56 +00:00
|
|
|
result = self.model.HomotopyWeight(
|
|
|
|
|
input_record,
|
[Caffe2] Changes done inside Facebook (#6378)
* fix unit test for sqrt op
From the error logging:
[idx, grad, grad_estimate] are:
[[ 146. 0.5 0.45776367]
[ 147. 0.5 0.45776367]
The gradient == 0.5 is correct, which means the SqrtOp and its gradient is doing right job. (Because y = sqrt(x), loss = y^2/2 = x/2, and then d(loss)/dx = 1/2 = 0.5; )
The test failed because of numerical problem of grad_estimate (in unit test). It can be because the step_size is small, and float precision is not high (when there are multiple elements in the tensor, we do sum(y^2) to compute loss)
This diff
- increase the step size, and also move the test cases to be further away from 0 (where sqrt(x) is not well defined) to be safe :)
- also clean up, and merge the test case for inplace Vs. non-inplace
Tested with:
`CAFFE2_HYPOTHESIS_PROFILE=debug ai_bt caffe2/caffe2/python/operator_test:elementwise_ops_test -- "test_sqrt"`
* CompositeReader & CompositeReaderBuilder
A new type of reader gluing multiple readers together.
* Back out "Revert D7394363: [GanH]: Log D Trick for Cross Entropy with Sigmoid"
Original commit changeset: 9325a4356dbe
* [dai][WIP] convert params to int8 on ps before sending to trainer
Add float->uint8 conversion in addition to float->fp16 conversion in model_saver.
* [easy] improve unit test for sparse length sum ops
as desc.
#accept2ship
* Update GitHub upstream to 771fcb3455cbfe69c2abcc4cb3bd7ef92d59af24
* move sparse hash unique ops to OOS and add unit tests
- move the SparseHash version to OOS, since 'sparsehash' is already deps of caffe2 OOS: https://fburl.com/arssw4n1
- The 'SparseHash' engine is also being used in OOS, so the SparseHash version shall be in OOS to reduce confusion: https://fburl.com/o5ea7ah2
- fix the CUDA UniqueOp for the case when batch is empty.
- add unit test
* group_norm_op for caffe2
This is the cuda op for Group Normalization (GN): https://arxiv.org/abs/1803.08494
This code implements GN in one op that computes Y=gamma * (X-mu) / sigma + beta and also its gradients. It is expected to have minimal memory consumption (similar to the BN op), without creating new blobs if GN were implemented as several ops (e.g., reshape, norm_mean/std, affine_channel).
* Resubmit D7405233: disappeared in D7464958
OOS publish causes the op missing -- however, test was still there
* [c2] add sparse hash engine for cuda unique op
The SparseHash version of UniqueOp copy input tensor to CPU, and make use of sparse hash map to get unique output, and then copy back to GPU.
* [dper][gpu] enable unit testing gpu trainer for sparse nn
to debug the GPU trainer using mock data in unit test.
make it easier to develop GPU trainer for new models.
* Reuse Gloo context for Synchronize() calls
Previously we were creating (and leaking) the Gloo context on each call to Synchronize(). Now only run the common world op and create the barrier net once, then run the barrier net on each Synchronize() call. Since timeout is associated with the Gloo context, assert that the timeout is fixed instead of trying to handle the complexity of multiple timeouts (and associated contexts).
* [GanH/WGAN][1/n]: add FC param clipping
as titled
* [mobile] minimizing changes between caffe2_benchmark and speed_benchmark
* [GanH]: enable diagnose within model
avoid finding blob names but to directly enable inside the model
* Add `net_transformer_fun` option to DPM
This callback allows for various transformations to be made to the
model after gradient operators have been added. The immediate motivation for
this is to allow transformations such has "checkpoint-and-recompute" which
allow trading off memory for additional compute.
Adding several callbacks like this has made DPM's API less than ideal at this
stage. However, I could not find any reasonable alternative.
* [DT] [33/n] Compile flow task groups
task groups need to compiled in order to pickle the object in fblearner. However I also changed the Job's compile function as creating new object is not necessary.
* Initial commit for sparse_normalize vectorization and benchmark
* [GanH]: LB Calibration for JSD
as titled
* Tracing event in async executor
Adding event tracing through TRACE_EVENT macro in async executor
* [Resubmit] D7409751 Reseting book-keeping blobs when the reservoir is reset
D7409751 got lost in D7464958
* Visualizing realtime weights values
we want to visualize the weights values as optimizer is iterating. This diff supports to visual the weights at an assigned index.
Currently, we assume the blob to be 2 dimensional.
* [GanH][Easy]: Fix Homotopy Weighting
apparantely, there was a bug in homotopy weight (alpha, beta) update
* [c2] move sparse hash unique op out of oss
so that oss do not need to depend on google hash map.
* Get rid of std::round as it's not supported on Android
* Revert changes on setup.py
* Skip shaky test on Dataio
* fix
2018-04-11 04:11:43 +00:00
|
|
|
min_weight=min_weight,
|
|
|
|
|
max_weight=max_weight,
|
2018-03-20 04:42:56 +00:00
|
|
|
half_life=half_life,
|
|
|
|
|
quad_life=quad_life,
|
|
|
|
|
)
|
|
|
|
|
train_init_net, train_net = self.get_training_nets(True)
|
|
|
|
|
workspace.RunNetOnce(train_init_net)
|
|
|
|
|
workspace.CreateNet(train_net)
|
|
|
|
|
workspace.RunNet(train_net.Name(), num_iter=half_life)
|
|
|
|
|
half_life_result = workspace.FetchBlob(result())
|
|
|
|
|
workspace.RunNet(train_net.Name(), num_iter=quad_life - half_life)
|
|
|
|
|
quad_life_result = workspace.FetchBlob(result())
|
[Caffe2] Changes done inside Facebook (#6378)
* fix unit test for sqrt op
From the error logging:
[idx, grad, grad_estimate] are:
[[ 146. 0.5 0.45776367]
[ 147. 0.5 0.45776367]
The gradient == 0.5 is correct, which means the SqrtOp and its gradient is doing right job. (Because y = sqrt(x), loss = y^2/2 = x/2, and then d(loss)/dx = 1/2 = 0.5; )
The test failed because of numerical problem of grad_estimate (in unit test). It can be because the step_size is small, and float precision is not high (when there are multiple elements in the tensor, we do sum(y^2) to compute loss)
This diff
- increase the step size, and also move the test cases to be further away from 0 (where sqrt(x) is not well defined) to be safe :)
- also clean up, and merge the test case for inplace Vs. non-inplace
Tested with:
`CAFFE2_HYPOTHESIS_PROFILE=debug ai_bt caffe2/caffe2/python/operator_test:elementwise_ops_test -- "test_sqrt"`
* CompositeReader & CompositeReaderBuilder
A new type of reader gluing multiple readers together.
* Back out "Revert D7394363: [GanH]: Log D Trick for Cross Entropy with Sigmoid"
Original commit changeset: 9325a4356dbe
* [dai][WIP] convert params to int8 on ps before sending to trainer
Add float->uint8 conversion in addition to float->fp16 conversion in model_saver.
* [easy] improve unit test for sparse length sum ops
as desc.
#accept2ship
* Update GitHub upstream to 771fcb3455cbfe69c2abcc4cb3bd7ef92d59af24
* move sparse hash unique ops to OOS and add unit tests
- move the SparseHash version to OOS, since 'sparsehash' is already deps of caffe2 OOS: https://fburl.com/arssw4n1
- The 'SparseHash' engine is also being used in OOS, so the SparseHash version shall be in OOS to reduce confusion: https://fburl.com/o5ea7ah2
- fix the CUDA UniqueOp for the case when batch is empty.
- add unit test
* group_norm_op for caffe2
This is the cuda op for Group Normalization (GN): https://arxiv.org/abs/1803.08494
This code implements GN in one op that computes Y=gamma * (X-mu) / sigma + beta and also its gradients. It is expected to have minimal memory consumption (similar to the BN op), without creating new blobs if GN were implemented as several ops (e.g., reshape, norm_mean/std, affine_channel).
* Resubmit D7405233: disappeared in D7464958
OOS publish causes the op missing -- however, test was still there
* [c2] add sparse hash engine for cuda unique op
The SparseHash version of UniqueOp copy input tensor to CPU, and make use of sparse hash map to get unique output, and then copy back to GPU.
* [dper][gpu] enable unit testing gpu trainer for sparse nn
to debug the GPU trainer using mock data in unit test.
make it easier to develop GPU trainer for new models.
* Reuse Gloo context for Synchronize() calls
Previously we were creating (and leaking) the Gloo context on each call to Synchronize(). Now only run the common world op and create the barrier net once, then run the barrier net on each Synchronize() call. Since timeout is associated with the Gloo context, assert that the timeout is fixed instead of trying to handle the complexity of multiple timeouts (and associated contexts).
* [GanH/WGAN][1/n]: add FC param clipping
as titled
* [mobile] minimizing changes between caffe2_benchmark and speed_benchmark
* [GanH]: enable diagnose within model
avoid finding blob names but to directly enable inside the model
* Add `net_transformer_fun` option to DPM
This callback allows for various transformations to be made to the
model after gradient operators have been added. The immediate motivation for
this is to allow transformations such has "checkpoint-and-recompute" which
allow trading off memory for additional compute.
Adding several callbacks like this has made DPM's API less than ideal at this
stage. However, I could not find any reasonable alternative.
* [DT] [33/n] Compile flow task groups
task groups need to compiled in order to pickle the object in fblearner. However I also changed the Job's compile function as creating new object is not necessary.
* Initial commit for sparse_normalize vectorization and benchmark
* [GanH]: LB Calibration for JSD
as titled
* Tracing event in async executor
Adding event tracing through TRACE_EVENT macro in async executor
* [Resubmit] D7409751 Reseting book-keeping blobs when the reservoir is reset
D7409751 got lost in D7464958
* Visualizing realtime weights values
we want to visualize the weights values as optimizer is iterating. This diff supports to visual the weights at an assigned index.
Currently, we assume the blob to be 2 dimensional.
* [GanH][Easy]: Fix Homotopy Weighting
apparantely, there was a bug in homotopy weight (alpha, beta) update
* [c2] move sparse hash unique op out of oss
so that oss do not need to depend on google hash map.
* Get rid of std::round as it's not supported on Android
* Revert changes on setup.py
* Skip shaky test on Dataio
* fix
2018-04-11 04:11:43 +00:00
|
|
|
|
|
|
|
|
alpha = (min_weight + max_weight) / 2.
|
|
|
|
|
beta = (min_weight + max_weight) / 2.
|
|
|
|
|
expected_half_life_result = alpha * data[0] + beta * data[1]
|
|
|
|
|
alpha = (3 * min_weight + max_weight) / 4.
|
|
|
|
|
beta = (min_weight + 3 * max_weight) / 4.
|
|
|
|
|
expected_quad_life_result = alpha * data[0] + beta * data[1]
|
2018-03-20 04:42:56 +00:00
|
|
|
npt.assert_allclose(
|
|
|
|
|
expected_half_life_result, half_life_result, atol=1e-2, rtol=1e-2
|
|
|
|
|
)
|
|
|
|
|
npt.assert_allclose(
|
|
|
|
|
expected_quad_life_result, quad_life_result, atol=1e-2, rtol=1e-2
|
|
|
|
|
)
|
2018-03-26 18:08:51 +00:00
|
|
|
|
|
|
|
|
def _testLabelSmooth(self, categories, binary_prob_label, bsz):
|
|
|
|
|
label = self.new_record(schema.Scalar((np.float32, (1, ))))
|
|
|
|
|
label_np = np.random.randint(categories, size=bsz).astype(np.float32)
|
|
|
|
|
schema.FeedRecord(label, [label_np])
|
|
|
|
|
smooth_matrix_shape = (
|
|
|
|
|
2 if binary_prob_label else (categories, categories)
|
|
|
|
|
)
|
|
|
|
|
smooth_matrix = np.random.random(smooth_matrix_shape)
|
|
|
|
|
smoothed_label = self.model.LabelSmooth(label, smooth_matrix)
|
|
|
|
|
train_init_net, train_net = self.get_training_nets(True)
|
|
|
|
|
workspace.RunNetOnce(train_init_net)
|
|
|
|
|
workspace.RunNetOnce(train_net)
|
|
|
|
|
smoothed_label_np = workspace.FetchBlob(smoothed_label())
|
|
|
|
|
if binary_prob_label:
|
|
|
|
|
expected = np.array(
|
|
|
|
|
[
|
|
|
|
|
smooth_matrix[0] if x == 0.0 else smooth_matrix[1]
|
|
|
|
|
for x in label_np
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
expected = np.array([smooth_matrix[int(x)] for x in label_np])
|
|
|
|
|
npt.assert_allclose(expected, smoothed_label_np, atol=1e-4, rtol=1e-4)
|
|
|
|
|
|
|
|
|
|
@given(
|
|
|
|
|
categories=st.integers(min_value=2, max_value=10),
|
|
|
|
|
bsz=st.integers(min_value=10, max_value=100),
|
|
|
|
|
**hu.gcs
|
|
|
|
|
)
|
|
|
|
|
def testLabelSmoothForCategoricalLabel(self, categories, bsz, gc, dc):
|
|
|
|
|
self._testLabelSmooth(categories, False, bsz)
|
|
|
|
|
|
|
|
|
|
@given(
|
|
|
|
|
bsz=st.integers(min_value=10, max_value=100),
|
|
|
|
|
**hu.gcs
|
|
|
|
|
)
|
|
|
|
|
def testLabelSmoothForBinaryProbLabel(self, bsz, gc, dc):
|
|
|
|
|
self._testLabelSmooth(2, True, bsz)
|
Update from facebook (#8384)
* [fix] fixup the bias multiplier data access issue
Hotfix for failues in conv_transpose
* [D2][Easy]: lint regularizer
lint with black
* [GanH]: Split mu in adaptive weight for diagnose
* [Dper] Add the ability to split FC weights into multiple smaller ones
* fix SumReduceLikeOp for empty blob
as desc.
* add ctc_greedy_decoder for caffe2
ctc_greedy_decoder same as tf's
* Update event callback handling
Allow multiple callbacks per event
* Add WeightedSum layer
The motivation is to do weighted sum in HoNet/crossnet, in the next diff, I'll replace model.Add with model.WeightedSum in
honet: https://fburl.com/f4rmolg2
crossnet: https://fburl.com/v7awn8se, https://fburl.com/63filbnm
* Replicate DAG's behavior
Some callers expect RunAsync to block, replicate that behavior in case of
explicit 'dag' net type
* [dper] layernorm layer
as title
* Override dag, async_dag, async_polling
Overriding dag, async_dag and async_polling with async_scheduling
* Name the thread pools
Caffe thread pools currently inherit the thread names from the thread that starts them, which can be misleading. Give them an explicit name instead.
* [Caffe2] FilleOp should support int64_t dimensions
Change argument type to int64_t for shape argument of FillerOp (used in ConstantFill, XavierFill, etc)
* Remove caffe2/caffe2/contrib/torch/
It's not used anywhere and depends on old lua torch that conflicts with Aten. Given PT1 it's not relevant any more (though it was nice and clever code!)
#accept2ship
* Fix linearWarmup multiplier check
The multiplier needs to be non-negative, not strictly positive.
* Revert D3314316
This is after 2 years and we do not seem to have a use case for this one, so
for the sake of clean API design we should potentially remove this. This would
allow us to potentially pass in arguments to optionally construct an object,
although it is indeed a little bit unclear how we can reuse existing objects if
constructor arguments are passed in. In any case, we may want to remove this
dangling feature.
* Speedup generate proposals by partial_sort.
Speedup generate proposals by partial_sort.
FACEBOOK:
- Saw speed improvement for training with this op.
- Yanghan benchmarked the op on a small dataset and see consistent 100% improvement on speed (6ms -> 3ms) on 420 input resolution. See next diff for details.
* More parallel processing friendly for CPP version of GenerateProposals.
More parallel processing friendly for CPP version of GenerateProposals.
* [DT] [43/n] Lift stop conditions inside reader code back to flow control
1. Split multi_reader function into local_reader and remote_reader
2. Lifted stop conditions inside Limiter back to flow control
3. Split epoch flow building logic into 3 cases:
- single machine (1 reader, 1 trainer on trainer0 node, no PS)
- (1 reader + 1 trainer) on trainer0 node, has PS
- multiple readers, readers do not share nodes with trainers, might have PS or not
* Resolve conflicts for torch/_thnn/utils.py
* [Caffe2] Handle image decoding errors
Image decoding errors can make the whole training fail. This diff is to handle them
1.Catch imdecode exceptions and check if decoded image has zero columns or rows. This is counted as decoding errors.
2.Replace the image with empty in case of error
3.Count the number of errors and throw runtime exception if the rate reaches given number
The empty image data is kept. It might introduce noise in the training data.
* Update MKL exporter to IDEEP ops
TSIA
* [Caffe2] GlobalInit is thread safe, fixing the comment
With the mutex and lock, GlobalInit is thread safe.
Update the comments.
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* [DT]: fix predictor save
similar to D6610058, here we add the fix for distributed online training
* Remove net_singlethread_async_gpu.cc
Closes https://github.com/caffe2/caffe2/pull/2528
This removes net_singlethread_async_gpu.cc as part of our effort to clean
CUDAContext and the net executors.
* Inline DFS task execution
Add a DFS inline task execution mode in executor
* Add c10 folder to fbcode
This adds the c10 folder and its test cases to fbcode. Build flags are mostly taken from aten.
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* [Fix] sparse regularization in distributed training
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* Improve shard logging in net tracing code
Make it handle arbitrary shard ids instead of just one digit ids.
* [Caffe2] Call GlobalInit in predictor only in mobile
FACEBOOK:
Calling GlobalInit long after the program starts may not be safe. There are issues if the following happens:
User does not call GlobalInit and initFacebook after program starts
User sets a flag manually: https://fburl.com/mcsumw7d
User calls OSS predictor.
OSS predictor calls GlobalInit
GlobalInit calls initFacebook
initFacebook resets all flags: https://fburl.com/tolszha1
Thus, the user manually set flags are overwritten
This would happen anytime GlobalInit is called long after the program starts.
I suppose the intention of the user in this case is not to call GlobalInit throughout the program,
but use Caffe2 regardless (is that desired?)
But adding GlobalInit in the OSS predictor would automatically call GlobalInit when using Caffe2.
This issue doesn't exist in mobile, since initFacebook is not called on mobile.
For now, guard the GlobalInit in predictor for mobile only.
May want to ensure the GlobalInit is always called at the start of the program. @[3501714:kutta] has seen weird issues when not calling GlobalInit at the start of the program on server side. He has made some progress on this.
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Add empty fix for SumLikeReduceOp
Add empty fix for SumLikeReduceOp
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* Add thread_name.cc to the CMake file
* No need to subtract 1. Fix test segfaults
* Fix NetTest, ObserverTest
Fix tests
(cherry picked from commit 3767e66c3f365596cba3d46d3e7322c933a0ab41)
* CTCGreedyDecoderOp only has CPU implementation, test should only run on CPU
* Add a variable to avoid conversion resizing issue
* [fix] fixup the bias multiplier data access issue
Hotfix for failues in conv_transpose
* [D2][Easy]: lint regularizer
lint with black
* [GanH]: Split mu in adaptive weight for diagnose
* [Dper] Add the ability to split FC weights into multiple smaller ones
* fix SumReduceLikeOp for empty blob
as desc.
* add ctc_greedy_decoder for caffe2
ctc_greedy_decoder same as tf's
* Update event callback handling
Allow multiple callbacks per event
* Add WeightedSum layer
The motivation is to do weighted sum in HoNet/crossnet, in the next diff, I'll replace model.Add with model.WeightedSum in
honet: https://fburl.com/f4rmolg2
crossnet: https://fburl.com/v7awn8se, https://fburl.com/63filbnm
* Replicate DAG's behavior
Some callers expect RunAsync to block, replicate that behavior in case of
explicit 'dag' net type
* [dper] layernorm layer
as title
* Override dag, async_dag, async_polling
Overriding dag, async_dag and async_polling with async_scheduling
* Name the thread pools
Caffe thread pools currently inherit the thread names from the thread that starts them, which can be misleading. Give them an explicit name instead.
* [Caffe2] FilleOp should support int64_t dimensions
Change argument type to int64_t for shape argument of FillerOp (used in ConstantFill, XavierFill, etc)
* Remove caffe2/caffe2/contrib/torch/
It's not used anywhere and depends on old lua torch that conflicts with Aten. Given PT1 it's not relevant any more (though it was nice and clever code!)
#accept2ship
* Fix linearWarmup multiplier check
The multiplier needs to be non-negative, not strictly positive.
* Revert D3314316
This is after 2 years and we do not seem to have a use case for this one, so
for the sake of clean API design we should potentially remove this. This would
allow us to potentially pass in arguments to optionally construct an object,
although it is indeed a little bit unclear how we can reuse existing objects if
constructor arguments are passed in. In any case, we may want to remove this
dangling feature.
* Speedup generate proposals by partial_sort.
Speedup generate proposals by partial_sort.
FACEBOOK:
- Saw speed improvement for training with this op.
- Yanghan benchmarked the op on a small dataset and see consistent 100% improvement on speed (6ms -> 3ms) on 420 input resolution. See next diff for details.
* More parallel processing friendly for CPP version of GenerateProposals.
More parallel processing friendly for CPP version of GenerateProposals.
* [DT] [43/n] Lift stop conditions inside reader code back to flow control
1. Split multi_reader function into local_reader and remote_reader
2. Lifted stop conditions inside Limiter back to flow control
3. Split epoch flow building logic into 3 cases:
- single machine (1 reader, 1 trainer on trainer0 node, no PS)
- (1 reader + 1 trainer) on trainer0 node, has PS
- multiple readers, readers do not share nodes with trainers, might have PS or not
* Resolve conflicts for torch/_thnn/utils.py
* [Caffe2] Handle image decoding errors
Image decoding errors can make the whole training fail. This diff is to handle them
1.Catch imdecode exceptions and check if decoded image has zero columns or rows. This is counted as decoding errors.
2.Replace the image with empty in case of error
3.Count the number of errors and throw runtime exception if the rate reaches given number
The empty image data is kept. It might introduce noise in the training data.
* Update MKL exporter to IDEEP ops
TSIA
* [Caffe2] GlobalInit is thread safe, fixing the comment
With the mutex and lock, GlobalInit is thread safe.
Update the comments.
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* [DT]: fix predictor save
similar to D6610058, here we add the fix for distributed online training
* Remove net_singlethread_async_gpu.cc
Closes https://github.com/caffe2/caffe2/pull/2528
This removes net_singlethread_async_gpu.cc as part of our effort to clean
CUDAContext and the net executors.
* Inline DFS task execution
Add a DFS inline task execution mode in executor
* Add c10 folder to fbcode
This adds the c10 folder and its test cases to fbcode. Build flags are mostly taken from aten.
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* [Fix] sparse regularization in distributed training
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* Improve shard logging in net tracing code
Make it handle arbitrary shard ids instead of just one digit ids.
* [Caffe2] Call GlobalInit in predictor only in mobile
FACEBOOK:
Calling GlobalInit long after the program starts may not be safe. There are issues if the following happens:
User does not call GlobalInit and initFacebook after program starts
User sets a flag manually: https://fburl.com/mcsumw7d
User calls OSS predictor.
OSS predictor calls GlobalInit
GlobalInit calls initFacebook
initFacebook resets all flags: https://fburl.com/tolszha1
Thus, the user manually set flags are overwritten
This would happen anytime GlobalInit is called long after the program starts.
I suppose the intention of the user in this case is not to call GlobalInit throughout the program,
but use Caffe2 regardless (is that desired?)
But adding GlobalInit in the OSS predictor would automatically call GlobalInit when using Caffe2.
This issue doesn't exist in mobile, since initFacebook is not called on mobile.
For now, guard the GlobalInit in predictor for mobile only.
May want to ensure the GlobalInit is always called at the start of the program. @[3501714:kutta] has seen weird issues when not calling GlobalInit at the start of the program on server side. He has made some progress on this.
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Add empty fix for SumLikeReduceOp
Add empty fix for SumLikeReduceOp
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* Add thread_name.cc to the CMake file
* No need to subtract 1. Fix test segfaults
* Fix NetTest, ObserverTest
Fix tests
(cherry picked from commit 3767e66c3f365596cba3d46d3e7322c933a0ab41)
* CTCGreedyDecoderOp only has CPU implementation, test should only run on CPU
* Add a variable to avoid conversion resizing issue
* Remove the code per soumith's comments
* Remove the code per soumith's comments
* Remove blank lines in the end of file
* Resolve conflicts for torch/_thnn/utils.py
* Update MKL exporter to IDEEP ops
TSIA
* Back out "Add support for generating ATen files during fbcode build"
Original commit changeset: 28970ddba353
@override-unit-failures
(Note: this ignores all push blocking failures!)
* add dependencies for online trainer
Add some dependencies so that the online model can use DataPipeline and PredictionTransform operators
Relevent post: https://fb.intern.facebook.com/groups/1324375037655677/permalink/1740993462660497/
* Resolve conflicts for tools/jit/gen_jit_dispatch.py
* Support advanced pooling options in sum processor
* support advanced pooling options in sum processor
* remove redundant code
* support attention in sum processor
* resolve conflicts for caffe2/core/logging_is_google_glog.h and test/test_torch.py
* Revert D7962948: [caffe2][nomnigraph] Concat elim for sparseNN
This reverts commit f7f434dc5c34ca6058b9765d2ef615453d2276a9
@bypass-lint
An infra SEV is better than not reverting this diff.
If you copy this password, see you in SEV Review!
@cause_a_sev_many_files
* Remove Declarations.yaml
* Include common.h
* Change std::stoi to caffe2::stoi
* [caffe2] uprade IDEEP and hotfix for conv op accuracy issue (#8364)
* [IDEEP] Upgrade IDEEP version
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* [IDEEP] Fix accuracy issue in conv op
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* Fix build error due to lack of src in CMakeLists
Signed-off-by: Gu, Jinghui <jinghui.gu@intel.com>
* Remove the code per soumith's comments
* [ONNX] Add an ATen fallback pathway for ONNX export (#8273)
* ATen fallback for ONNX export
* Move to enum
* Fix model test
* Add comment
* Address comments
BC interface
* Remove imaginary file (#8415)
* [Caffe2] Enable AMD/MIOPEN ops for Caffe2 (#8306)
* Add hip support for caffe2 core
* Add MIOPEN header/wrapper to caffe2 core
* Add HIP device into caffe2 PB
* top level makefile change for rocm/hip
* makefile scaffolding for AMD/RocM/HIP
* Makefile scafodding for AMD/RocM/HIP; add makefile/utility for HIP files
* caffe2 PB update for AMD/ROCM HIP device
* Add AMD/RocM/Thrust dependency
* HIP threadpool update
* Fix makefile macro
* makefile fix: duplicate test/binary name
* makefile clean-up
* makefile clean-up
* add HIP operator registry
* add utilities for hip device
* Add USE_HIP to config summary
* makefile fix for BUILD_TEST
* merge latest
* Fix indentation
* code clean-up
* Guard builds without HIP and use the same cmake script as PyTorch to find HIP
* Setup rocm environment variables in build.sh (ideally should be done in the docker images)
* setup locale
* set HIP_PLATFORM
* Revert "set HIP_PLATFORM"
This reverts commit 8ec58db2b390c9259220c49fa34cd403568300ad.
* continue the build script environment variables mess
* HCC_AMDGPU_TARGET
* Cleanup the mess, has been fixed in the lastest docker images
* Assign protobuf field hip_gpu_id a new field number for backward compatibility
* change name to avoid conflict
* Fix duplicated thread pool flag
* Refactor cmake files to not add hip includes and libs globally
* Fix the wrong usage of environment variables detection in cmake
* Add MIOPEN CNN operators
* Revert "Add MIOPEN CNN operators"
This reverts commit 6e89ad4385b5b8967a7854c4adda52c012cee42a.
* Add MIOPEN pooling operator
* Add MIOPEN activation operator
* Add MIOPEN softmax operator
* Add MIOPEN spatial batch norm operator
* Add MIOPEN loacl response normalization operator
* Add MIOPEN conv operator
* Clean-up LRN ops
* enable fp16 in MIOPEN pool ops
* Enable fp16 for MIOPEN relu op
* Enable fp16 for MIOPEN spatial batch norm op
* code clean-up
* revert float16 support
* Create Caffe2 python binding for AMD/ROCM/HIP
* Add op fallback for HIP operator
* add hip src/test files in cmake
* exclude hip src/test files
* fix python binding for hip backend
* fix MIOPEN pooling op workspace
* hack to compile miopen operators
* fix include path for MIOPEN ops
* Fix include path
* Add HIP math utilities
* Fix path for HIP math utils
* cmake fix
* Cmake fix / hipcc for hip files
* suppress hipcc warning
* cmake fix /replcae USE_HIP with USE_ROCM
* revert LoadHIP.cmake change
* fix include for thrust/cub-hip
* include path fix for conversion.h
* Updated with latest upstream changes
* clang format fixes
* Context_hip updates
* Fixed typo in rocblas handle get function
* Updated hipified math utils
* Updated math hip test util
* Updated context hip test
* Updated common_hip
* Updated net async dag for HIP
* Added MIOPEN in operator hip test
* fix
* C2 dependencies clean-up
* fix include path for building custom protobuf
* Decouple miopen pool op and conv_pool_op base
* cmake refactor
* fix operator_hip_test
* move all hip/miopen ops files into caffe2/operators/hip
* sanitize cmake
* permission issue
* remove extra parenthesis
* remove artifact from resolving merge conflict
* cont. sanitize cmake files
* fix syntax error
* sanitize conversion.h
* .
* Revert "."
This reverts commit 56020cb0e996a31ae27bf1f8f491955ed0b121b9.
* clang-format
* Enable some reduce operators' ONNX backend tests (#8418)
* fix old comment to point to the right file (#8416)
* Stop pinning nccl version. (#8421)
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* Expose logsumexp docs and mark log_sum_exp in distributions for internal use (#8428)
* Enable some of the ONNX backend test on broadcasting (#8423)
* Enable some of the ONNX backend test on broadcasting
* enable gemm broadcast
* Expose proto utils and ONNX (#8073)
* Expose proto utils and ONNX from PyTorch libcaffe2.so
* Try to use protobuf from _C.so
* Fix ONNX proto header include
* Adjust order of imports for ONNX until nanopb goes away
* Set and use ONNX_NAMESPACE for PyTorch builds
* Show protobuf summary for all builds
* Add ONNX_NAMESPACE for cpp_build
* Statically link libprotobuf.a into libtorch.so
* Set ONNX_NAMESPACE on Windows build
* Move core/dispatch up as well
* Add /MD flag for Windows build of _C
* Potential Windows fix for ONNX and protobuf
* Add direct linkage from _C to ONNX on Windows
* Only include protobuf wrapper for PyTorch
* Pass extra_compile_args to _nvrtc ext build
* Remove installation of .a files
* Rebase creates some weird situations, revert them manually
* Remove more weird changes due to rebase
* Need to add thread_name.cc after merge
2018-06-13 20:10:45 +00:00
|
|
|
|
|
|
|
|
@given(
|
|
|
|
|
num_inputs=st.integers(min_value=2, max_value=10),
|
|
|
|
|
batch_size=st.integers(min_value=2, max_value=10),
|
|
|
|
|
input_dim=st.integers(min_value=5, max_value=10),
|
|
|
|
|
seed=st.integers(1, 10),
|
|
|
|
|
)
|
|
|
|
|
def testBlobWeightedSum(self, num_inputs, batch_size, input_dim, seed):
|
|
|
|
|
|
|
|
|
|
def get_blob_weighted_sum():
|
|
|
|
|
weights = []
|
|
|
|
|
for i in range(num_inputs):
|
|
|
|
|
w_blob_name = 'blob_weighted_sum/w_{0}'.format(i)
|
|
|
|
|
assert workspace.HasBlob(w_blob_name), (
|
|
|
|
|
"cannot fine blob {}".format(w_blob_name)
|
|
|
|
|
)
|
|
|
|
|
w = workspace.FetchBlob(w_blob_name)
|
|
|
|
|
weights.append(w)
|
|
|
|
|
|
|
|
|
|
result = np.sum([
|
|
|
|
|
input_data[idx] * weights[idx] for idx in range(num_inputs)
|
|
|
|
|
], axis=0)
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
np.random.seed(seed)
|
|
|
|
|
expected_output_schema = schema.Scalar((np.float32, (input_dim,)))
|
|
|
|
|
input_schema = schema.Tuple(
|
|
|
|
|
*[expected_output_schema for _ in range(num_inputs)]
|
|
|
|
|
)
|
|
|
|
|
input_data = [
|
|
|
|
|
np.random.random((batch_size, input_dim)).astype(np.float32)
|
|
|
|
|
for _ in range(num_inputs)
|
|
|
|
|
]
|
|
|
|
|
input_record = self.new_record(input_schema)
|
|
|
|
|
schema.FeedRecord(input_record, input_data)
|
|
|
|
|
|
|
|
|
|
# test output schema
|
|
|
|
|
ws_output = self.model.BlobWeightedSum(input_record)
|
|
|
|
|
self.assertEqual(len(self.model.layers), 1)
|
|
|
|
|
assert schema.equal_schemas(ws_output, expected_output_schema)
|
|
|
|
|
|
|
|
|
|
# test train net
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
workspace.RunNetOnce(train_init_net)
|
|
|
|
|
workspace.RunNetOnce(train_net)
|
|
|
|
|
output = workspace.FetchBlob(ws_output())
|
|
|
|
|
npt.assert_almost_equal(get_blob_weighted_sum(), output, decimal=5)
|
|
|
|
|
|
|
|
|
|
self.run_train_net_forward_only()
|
|
|
|
|
output = workspace.FetchBlob(ws_output())
|
|
|
|
|
npt.assert_almost_equal(get_blob_weighted_sum(), output, decimal=5)
|
|
|
|
|
|
|
|
|
|
# test eval net
|
|
|
|
|
eval_net = self.get_eval_net()
|
|
|
|
|
workspace.RunNetOnce(eval_net)
|
|
|
|
|
output = workspace.FetchBlob(ws_output())
|
|
|
|
|
npt.assert_almost_equal(get_blob_weighted_sum(), output, decimal=5)
|
|
|
|
|
|
|
|
|
|
# test pred net
|
|
|
|
|
pred_net = self.get_predict_net()
|
|
|
|
|
workspace.RunNetOnce(pred_net)
|
|
|
|
|
output = workspace.FetchBlob(ws_output())
|
|
|
|
|
npt.assert_almost_equal(get_blob_weighted_sum(), output, decimal=5)
|
2019-07-22 21:56:16 +00:00
|
|
|
|
|
|
|
|
def testFeatureSparseToDenseGetAccessedFeatures(self):
|
|
|
|
|
float_features_column = "float_features"
|
|
|
|
|
float_features_type = "FLOAT"
|
|
|
|
|
float_features_ids = [1, 2, 3]
|
|
|
|
|
|
|
|
|
|
id_list_features_column = "id_list_features"
|
|
|
|
|
id_list_features_type = "ID_LIST"
|
|
|
|
|
id_list_features_ids = [4, 5, 6]
|
|
|
|
|
|
|
|
|
|
id_score_list_features_column = "id_score_list_features"
|
|
|
|
|
id_score_list_features_type = "ID_SCORE_LIST"
|
|
|
|
|
id_score_list_features_ids = [7, 8 , 9]
|
|
|
|
|
|
|
|
|
|
feature_names = ["a", "b", "c"]
|
|
|
|
|
|
|
|
|
|
input_record = self.new_record(schema.Struct(
|
|
|
|
|
(float_features_column, schema.Map(np.int32, np.float32)),
|
|
|
|
|
(id_list_features_column,
|
|
|
|
|
schema.Map(np.int32, schema.List(np.int64))),
|
|
|
|
|
(id_score_list_features_column,
|
|
|
|
|
schema.Map(np.int32, schema.Map(np.int64, np.float32))),
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
input_specs = [
|
|
|
|
|
(
|
|
|
|
|
float_features_column,
|
|
|
|
|
schema.FeatureSpec(
|
|
|
|
|
feature_type=float_features_type,
|
|
|
|
|
feature_ids=float_features_ids,
|
|
|
|
|
feature_names=feature_names,
|
|
|
|
|
),
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
id_list_features_column,
|
|
|
|
|
schema.FeatureSpec(
|
|
|
|
|
feature_type=id_list_features_type,
|
|
|
|
|
feature_ids=id_list_features_ids,
|
|
|
|
|
feature_names=feature_names,
|
|
|
|
|
),
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
id_score_list_features_column,
|
|
|
|
|
schema.FeatureSpec(
|
|
|
|
|
feature_type=id_score_list_features_type,
|
|
|
|
|
feature_ids=id_score_list_features_ids,
|
|
|
|
|
feature_names=feature_names,
|
|
|
|
|
),
|
|
|
|
|
),
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
self.model.FeatureSparseToDense(input_record, input_specs)
|
|
|
|
|
|
|
|
|
|
expected_accessed_features = {
|
2019-08-14 17:46:05 +00:00
|
|
|
float_features_column: [
|
|
|
|
|
AccessedFeatures(float_features_type, set(float_features_ids))],
|
|
|
|
|
id_list_features_column: [
|
|
|
|
|
AccessedFeatures(id_list_features_type, set(id_list_features_ids))],
|
|
|
|
|
id_score_list_features_column: [
|
|
|
|
|
AccessedFeatures(id_score_list_features_type, set(id_score_list_features_ids))],
|
2019-07-22 21:56:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.assertEqual(len(self.model.layers), 1)
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
self.model.layers[0].get_accessed_features(),
|
|
|
|
|
expected_accessed_features
|
|
|
|
|
)
|
2019-08-28 06:22:23 +00:00
|
|
|
|
|
|
|
|
def test_get_key(self):
|
|
|
|
|
def _is_id_list(input_record):
|
|
|
|
|
return almost_equal_schemas(input_record, IdList)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_id_score_list(input_record):
|
|
|
|
|
return almost_equal_schemas(input_record,
|
|
|
|
|
IdScoreList,
|
|
|
|
|
check_field_types=False)
|
|
|
|
|
|
|
|
|
|
def old_get_sparse_key_logic(input_record):
|
|
|
|
|
if _is_id_list(input_record):
|
|
|
|
|
sparse_key = input_record.items()
|
|
|
|
|
elif _is_id_score_list(input_record):
|
|
|
|
|
sparse_key = input_record.keys()
|
|
|
|
|
else:
|
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
return sparse_key
|
|
|
|
|
|
|
|
|
|
id_score_list_record = schema.NewRecord(
|
|
|
|
|
self.model.net,
|
|
|
|
|
schema.Map(
|
|
|
|
|
schema.Scalar(
|
|
|
|
|
np.int64,
|
|
|
|
|
metadata=schema.Metadata(
|
|
|
|
|
categorical_limit=1000
|
|
|
|
|
),
|
|
|
|
|
),
|
|
|
|
|
np.float32
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
get_key(id_score_list_record)(),
|
|
|
|
|
old_get_sparse_key_logic(id_score_list_record)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
id_list_record = schema.NewRecord(
|
|
|
|
|
self.model.net,
|
|
|
|
|
schema.List(
|
|
|
|
|
schema.Scalar(
|
|
|
|
|
np.int64,
|
|
|
|
|
metadata=schema.Metadata(categorical_limit=1000)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
get_key(id_list_record)(),
|
|
|
|
|
old_get_sparse_key_logic(id_list_record)
|
|
|
|
|
)
|
2019-10-09 03:20:10 +00:00
|
|
|
|
|
|
|
|
def testSparseLookupWithAttentionWeightOnIdScoreList(self):
|
|
|
|
|
record = schema.NewRecord(
|
|
|
|
|
self.model.net,
|
|
|
|
|
schema.Map(
|
|
|
|
|
schema.Scalar(
|
|
|
|
|
np.int64,
|
|
|
|
|
metadata=schema.Metadata(categorical_limit=1000),
|
|
|
|
|
),
|
|
|
|
|
np.float32,
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
embedding_dim = 64
|
|
|
|
|
embedding_after_pooling = self.model.SparseLookup(
|
|
|
|
|
record, [embedding_dim], "Sum", use_external_weights=True
|
|
|
|
|
)
|
|
|
|
|
self.model.output_schema = schema.Struct()
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
schema.Scalar((np.float32, (embedding_dim,))), embedding_after_pooling
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
|
|
|
|
|
init_ops = self.assertNetContainOps(
|
|
|
|
|
train_init_net,
|
|
|
|
|
[OpSpec("UniformFill", None, None), OpSpec("ConstantFill", None, None)],
|
|
|
|
|
)
|
|
|
|
|
sparse_lookup_op_spec = OpSpec(
|
|
|
|
|
"SparseLengthsWeightedSum",
|
|
|
|
|
[
|
|
|
|
|
init_ops[0].output[0],
|
|
|
|
|
record.values(),
|
|
|
|
|
record.keys(),
|
|
|
|
|
record.lengths(),
|
|
|
|
|
],
|
|
|
|
|
[embedding_after_pooling()],
|
|
|
|
|
)
|
|
|
|
|
self.assertNetContainOps(train_net, [sparse_lookup_op_spec])
|
|
|
|
|
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
self.assertNetContainOps(predict_net, [sparse_lookup_op_spec])
|
2021-06-05 02:56:10 +00:00
|
|
|
|
|
|
|
|
def testSparseItemwiseDropoutWithReplacement(self):
|
|
|
|
|
input_record = schema.NewRecord(self.model.net, IdList)
|
|
|
|
|
self.model.output_schema = schema.Struct()
|
|
|
|
|
|
|
|
|
|
lengths_blob = input_record.field_blobs()[0]
|
|
|
|
|
values_blob = input_record.field_blobs()[1]
|
|
|
|
|
lengths = np.array([1] * 10).astype(np.int32)
|
|
|
|
|
values = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.int64)
|
|
|
|
|
workspace.FeedBlob(lengths_blob, lengths)
|
|
|
|
|
workspace.FeedBlob(values_blob, values)
|
|
|
|
|
|
|
|
|
|
out = self.model.SparseItemwiseDropoutWithReplacement(
|
|
|
|
|
input_record, 0.0, 0.5, 1.0, -1, output_names_or_num=1)
|
|
|
|
|
self.assertEqual(schema.List(schema.Scalar(np.int64,)), out)
|
|
|
|
|
|
|
|
|
|
train_init_net, train_net = self.get_training_nets()
|
|
|
|
|
eval_net = self.get_eval_net()
|
|
|
|
|
predict_net = self.get_predict_net()
|
|
|
|
|
|
|
|
|
|
workspace.RunNetOnce(train_init_net)
|
|
|
|
|
workspace.RunNetOnce(train_net)
|
|
|
|
|
out_values = workspace.FetchBlob(out.items())
|
|
|
|
|
out_lengths = workspace.FetchBlob(out.lengths())
|
|
|
|
|
self.assertBlobsEqual(out_values, values)
|
|
|
|
|
self.assertBlobsEqual(out_lengths, lengths)
|
|
|
|
|
|
|
|
|
|
workspace.RunNetOnce(eval_net)
|
|
|
|
|
|
|
|
|
|
workspace.RunNetOnce(predict_net)
|
|
|
|
|
predict_values = workspace.FetchBlob("values_auto_0")
|
|
|
|
|
predict_lengths = workspace.FetchBlob("lengths_auto_0")
|
|
|
|
|
self.assertBlobsEqual(predict_values, np.array([-1] * 10).astype(np.int64))
|
|
|
|
|
self.assertBlobsEqual(predict_lengths, lengths)
|