pytorch/caffe2/python/operator_test/mkl_speed_test.py
Orion Reblitz-Richardson 1d5780d42c Remove Apache headers from source.
* LICENSE file contains details, so removing from individual source files.
2018-03-27 13:10:18 -07:00

82 lines
3.3 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import unittest
import numpy as np
from caffe2.proto import caffe2_pb2
from caffe2.python import core, workspace, test_util
@unittest.skipIf(not workspace.C.has_mkldnn, "Skipping as we do not have mkldnn.")
class TestMKLBasic(test_util.TestCase):
def testReLUSpeed(self):
X = np.random.randn(128, 4096).astype(np.float32)
mkl_do = core.DeviceOption(caffe2_pb2.MKLDNN)
# Makes sure that feed works.
workspace.FeedBlob("X", X)
workspace.FeedBlob("X_mkl", X, device_option=mkl_do)
net = core.Net("test")
# Makes sure that we can run relu.
net.Relu("X", "Y")
net.Relu("X_mkl", "Y_mkl", device_option=mkl_do)
workspace.CreateNet(net)
workspace.RunNet(net)
# makes sure that the results are good.
np.testing.assert_allclose(
workspace.FetchBlob("Y"),
workspace.FetchBlob("Y_mkl"),
atol=1e-10,
rtol=1e-10)
runtime = workspace.BenchmarkNet(net.Proto().name, 1, 100, True)
# The returned runtime is the time of
# [whole_net, cpu_op, mkl_op]
# so we will assume that the MKL one runs faster than the CPU one.
# Note(Yangqing): in fact, it seems that in optimized mode, this is
# not always guaranteed - MKL runs slower than the Eigen vectorized
# version, so I am turning this assertion off.
#self.assertTrue(runtime[1] >= runtime[2])
print("Relu CPU runtime {}, MKL runtime {}.".format(runtime[1], runtime[2]))
# Note(Zhicheng): Disable the test below we implement to use
# RegisterTensorInfoFunction to register for Tensor<MKLContext>
# def testConvSpeed(self):
# # We randomly select a shape to test the speed. Intentionally we
# # test a batch size of 1 since this may be the most frequent use
# # case for MKL during deployment time.
# X = np.random.rand(1, 256, 27, 27).astype(np.float32) - 0.5
# W = np.random.rand(192, 256, 3, 3).astype(np.float32) - 0.5
# b = np.random.rand(192).astype(np.float32) - 0.5
# mkl_do = core.DeviceOption(caffe2_pb2.MKLDNN)
# # Makes sure that feed works.
# workspace.FeedBlob("X", X)
# workspace.FeedBlob("W", W)
# workspace.FeedBlob("b", b)
# workspace.FeedBlob("X_mkl", X, device_option=mkl_do)
# workspace.FeedBlob("W_mkl", W, device_option=mkl_do)
# workspace.FeedBlob("b_mkl", b, device_option=mkl_do)
# net = core.Net("test")
# # Makes sure that we can run relu.
# net.Conv(["X", "W", "b"], "Y", pad=1, stride=1, kernel=3)
# net.Conv(["X_mkl", "W_mkl", "b_mkl"], "Y_mkl",
# pad=1, stride=1, kernel=3, device_option=mkl_do)
# workspace.CreateNet(net)
# workspace.RunNet(net)
# # makes sure that the results are good.
# np.testing.assert_allclose(
# workspace.FetchBlob("Y"),
# workspace.FetchBlob("Y_mkl"),
# atol=1e-2,
# rtol=1e-2)
# runtime = workspace.BenchmarkNet(net.Proto().name, 1, 100, True)
#
# print("Conv CPU runtime {}, MKL runtime {}.".format(runtime[1], runtime[2]))
if __name__ == '__main__':
unittest.main()