pytorch/caffe2/python/operator_test/copy_ops_test.py
Aapo Kyrola 69f42e3f70 make CopyGPUToCPU/CPUToGPU handle sparse gradients
Summary:
CopyGPUToCPu and CopyGPUToCPU need to handle gradients that come sparse on their way. Added unit test and fixed the gradient makers to create copies for both value and indices.

This becomes less important with gpu sparse parameter update ops land, but nevertheless good to fix.

Reviewed By: dzhulgakov

Differential Revision: D4882327

fbshipit-source-id: aafd2df46b3e1bcb30b52b1edf40fad8271f1f88
2017-04-13 17:16:26 -07:00

188 lines
7.2 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
import unittest
from caffe2.proto import caffe2_pb2
from caffe2.python import workspace, core, cnn
class CopyOpsTest(unittest.TestCase):
def tearDown(self):
# Reset workspace after each test
# Otherwise, the multi-GPU test will use previously created tensors,
# which may have been placed on the wrong device
workspace.ResetWorkspace()
def run_test_copy_gradient(self, device_opt):
model = cnn.CNNModelHelper(name="copy_test")
with core.DeviceScope(device_opt):
x = model.net.AddExternalInputs("x")
y = model.Copy(x, "y")
loss = model.AveragedLoss(y, "loss")
gradient_map = model.AddGradientOperators([loss])
workspace.FeedBlob(x, np.random.rand(32).astype(np.float32))
workspace.RunNetOnce(model.param_init_net)
workspace.RunNetOnce(model.net)
self.assertTrue(np.array_equal(
workspace.FetchBlob(x),
workspace.FetchBlob(y),
))
self.assertTrue(np.array_equal(
workspace.FetchBlob(gradient_map[x]),
workspace.FetchBlob(gradient_map[y]),
))
def test_copy_gradient_cpu(self):
self.run_test_copy_gradient(core.DeviceOption(caffe2_pb2.CPU, 0))
@unittest.skipIf(workspace.NumCudaDevices() < 1, "Need at least 1 GPU.")
def test_copy_gradient_gpu(self):
self.run_test_copy_gradient(core.DeviceOption(caffe2_pb2.CUDA, 0))
@unittest.skipIf(workspace.NumCudaDevices() < 2, "Need at least 2 GPU.")
def test_copy_gradient_multiple_gpus(self):
model = cnn.CNNModelHelper(name="copy_test")
with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
x_cpu = model.net.AddExternalInputs("x_cpu")
with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
x_gpu_1 = model.CopyCPUToGPU(x_cpu, "x_gpu_1")
with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 1)):
x_gpu_2 = model.Copy(x_gpu_1, "x_gpu_2")
loss = model.AveragedLoss(x_gpu_2, "loss")
gradient_map = model.AddGradientOperators([loss])
workspace.FeedBlob("x_cpu", np.random.rand(32).astype(np.float32))
workspace.RunNetOnce(model.param_init_net)
workspace.RunNetOnce(model.net)
self.assertTrue(np.array_equal(
workspace.FetchBlob("x_gpu_1"),
workspace.FetchBlob("x_gpu_2"),
))
self.assertTrue(np.array_equal(
workspace.FetchBlob(gradient_map["x_gpu_1"]),
workspace.FetchBlob(gradient_map["x_gpu_2"]),
))
def get_op_with_output(model, output_blob_name):
for op in model.net.Proto().op:
if len(op.output) == 1 and op.output[0] == output_blob_name:
return op
return None
self.assertEqual(
get_op_with_output(model, "x_gpu_2_grad").device_option,
core.DeviceOption(caffe2_pb2.CUDA, 1),
)
self.assertEqual(
get_op_with_output(model, "x_cpu_grad").device_option,
core.DeviceOption(caffe2_pb2.CUDA, 0),
)
@unittest.skipIf(workspace.NumCudaDevices() < 1, "Need at least 1 GPU.")
def test_cpu2gpu_gpu2cpu_sparse_gradients(self):
model = cnn.CNNModelHelper(name="copy_test")
v = model.param_init_net.UniformFill([], ["v"], shape=[16, 4])
indices = model.param_init_net.UniformFill([], ["v"], shape=[16, 4])
cpu_opt = core.DeviceOption(caffe2_pb2.CPU, 0)
gpu_opt = core.DeviceOption(caffe2_pb2.CUDA, 0)
with core.DeviceScope(gpu_opt):
vcpu = model.CopyGPUToCPU(v, "vcpu")
with core.DeviceScope(cpu_opt):
g = model.Gather([vcpu, indices], "g")
with core.DeviceScope(gpu_opt):
ggpu = model.CopyCPUToGPU(g, "ggpu")
f = model.FC(ggpu, "out", dim_in=4, dim_out=6)
(softmax, loss) = model.SoftmaxWithLoss(
[f, "label"],
["softmax", "loss"],
)
gradient_map = model.AddGradientOperators([loss])
self.assertTrue("v" in gradient_map)
self.assertTrue(isinstance(gradient_map['v'], core.GradientSlice))
@unittest.skipIf(workspace.NumCudaDevices() < 1, "Need at least 1 GPU.")
def test_cpu2gpu_gpu2cpu_gradients(self):
model = cnn.CNNModelHelper(name="copy_test")
batch = 32
cpu_opt = core.DeviceOption(caffe2_pb2.CPU, 0)
gpu_opt = core.DeviceOption(caffe2_pb2.CUDA, 0)
with core.NameScope("cpu"):
with core.DeviceScope(cpu_opt):
x_cpu = model.FC('data', 'x_cpu', 16, 8)
with core.NameScope("gpu_0"):
with core.DeviceScope(gpu_opt):
x_gpu = model.CopyCPUToGPU(x_cpu, "x_gpu")
pred_gpu = model.FC(x_gpu, "pred_gpu", 8, 4)
pred_cpu = model.CopyGPUToCPU(pred_gpu, "pred_cpu")
with core.DeviceScope(cpu_opt):
with core.NameScope("cpu"):
(softmax, loss) = model.SoftmaxWithLoss(
[pred_cpu, "label"],
["softmax", "loss"],
)
gradient_map = model.AddGradientOperators([loss])
# Add param updates (for cpu and gpu)
init_net = model.param_init_net
with core.DeviceScope(cpu_opt):
with core.NameScope("cpu"):
ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.)
LR = init_net.ConstantFill([], "LR", shape=[1], value=-2.0)
for param in model.GetParams():
model.WeightedSum(
[param, ONE, gradient_map[param], LR],
param,
)
with core.NameScope("gpu_0"):
with core.DeviceScope(gpu_opt):
ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.)
LR = init_net.ConstantFill([], "LR", shape=[1], value=-2.0)
for param in model.GetParams():
model.WeightedSum(
[param, ONE, gradient_map[param], LR],
param,
)
with core.DeviceScope(cpu_opt):
workspace.FeedBlob(
'cpu/data',
np.random.rand(batch, 16).astype(np.float32),
)
workspace.FeedBlob(
'cpu/label',
np.random.randint(4, size=batch).astype(np.int32),
)
workspace.RunNetOnce(model.param_init_net)
workspace.CreateNet(model.net)
initial_params = {p: workspace.FetchBlob(p) for p in model.GetParams()}
workspace.RunNet(model.net.Proto().name)
updated_params = {p: workspace.FetchBlob(p) for p in model.GetParams()}
for p in model.GetParams():
g = gradient_map[p]
expected = initial_params[p] - 2.0 * workspace.FetchBlob(g)
actual = updated_params[p]
self.assertTrue(
np.array_equal(expected, updated_params[p]),
"Mismatch: {}: {}, {}".format(p, expected, actual),
)