pytorch/test/cpp/api/rnn.cpp

472 lines
15 KiB
C++
Raw Normal View History

#include <gtest/gtest.h>
#include <torch/torch.h>
#include <test/cpp/api/support.h>
using namespace torch::nn;
using namespace torch::test;
template <typename R, typename Func>
bool test_RNN_xor(Func&& model_maker, bool cuda = false) {
torch::manual_seed(0);
auto nhid = 32;
auto model = std::make_shared<SimpleContainer>();
auto l1 = model->add(Linear(1, nhid), "l1");
auto rnn = model->add(model_maker(nhid), "rnn");
auto lo = model->add(Linear(nhid, 1), "lo");
torch::optim::Adam optimizer(model->parameters(), 1e-2);
auto forward_op = [&](torch::Tensor x) {
auto T = x.size(0);
auto B = x.size(1);
x = x.view({T * B, 1});
x = l1->forward(x).view({T, B, nhid}).tanh_();
x = rnn->forward(x).output[T - 1];
x = lo->forward(x);
return x;
};
if (cuda) {
model->to(torch::kCUDA);
}
float running_loss = 1;
int epoch = 0;
auto max_epoch = 1500;
while (running_loss > 1e-2) {
auto bs = 16U;
auto nlen = 5U;
const auto backend = cuda ? torch::kCUDA : torch::kCPU;
auto inputs =
torch::rand({nlen, bs, 1}, backend).round().to(torch::kFloat32);
auto labels = inputs.sum(0).detach();
inputs.set_requires_grad(true);
auto outputs = forward_op(inputs);
torch::Tensor loss = torch::mse_loss(outputs, labels);
optimizer.zero_grad();
loss.backward();
optimizer.step();
Remove caffe2::Tensor::capacity_nbytes, at::Tensor::to##name##Data, (#11876) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/11876 Modern C++ api instead of macros, item() is aligned with Python frontend. caffe2::Tensor::capacity_nbytes is effecitvely unused and confusing w.r.t. caffe2::Tensor::nbytes(). codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCComplexDouble "item<std::complex<double>>" codemod -d tc --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" Reviewed By: ezyang Differential Revision: D9948572 fbshipit-source-id: 70c9f5390d92b82c85fdd5f8a5aebca338ab413c
2018-09-24 17:39:10 +00:00
running_loss = running_loss * 0.99 + loss.item<float>() * 0.01;
if (epoch > max_epoch) {
return false;
}
epoch++;
}
return true;
};
void check_lstm_sizes(RNNOutput output) {
2018-05-04 18:00:30 +00:00
// Expect the LSTM to have 64 outputs and 3 layers, with an input of batch
// 10 and 16 time steps (10 x 16 x n)
ASSERT_EQ(output.output.ndimension(), 3);
ASSERT_EQ(output.output.size(0), 10);
ASSERT_EQ(output.output.size(1), 16);
ASSERT_EQ(output.output.size(2), 64);
2018-05-04 18:00:30 +00:00
ASSERT_EQ(output.state.ndimension(), 4);
ASSERT_EQ(output.state.size(0), 2); // (hx, cx)
ASSERT_EQ(output.state.size(1), 3); // layers
ASSERT_EQ(output.state.size(2), 16); // Batchsize
ASSERT_EQ(output.state.size(3), 64); // 64 hidden dims
2018-05-04 18:00:30 +00:00
// Something is in the hiddens
Remove caffe2::Tensor::capacity_nbytes, at::Tensor::to##name##Data, (#11876) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/11876 Modern C++ api instead of macros, item() is aligned with Python frontend. caffe2::Tensor::capacity_nbytes is effecitvely unused and confusing w.r.t. caffe2::Tensor::nbytes(). codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCComplexDouble "item<std::complex<double>>" codemod -d tc --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" Reviewed By: ezyang Differential Revision: D9948572 fbshipit-source-id: 70c9f5390d92b82c85fdd5f8a5aebca338ab413c
2018-09-24 17:39:10 +00:00
ASSERT_GT(output.state.norm().item<float>(), 0);
2018-05-04 18:00:30 +00:00
}
struct RNNTest : torch::test::SeedingFixture {};
TEST_F(RNNTest, CheckOutputSizes) {
LSTM model(LSTMOptions(128, 64).layers(3).dropout(0.2));
// Input size is: sequence length, batch size, input size
auto x = torch::randn({10, 16, 128}, torch::requires_grad());
auto output = model->forward(x);
auto y = x.mean();
y.backward();
check_lstm_sizes(output);
auto next = model->forward(x, output.state);
check_lstm_sizes(next);
torch::Tensor diff = next.state - output.state;
// Hiddens changed
Remove caffe2::Tensor::capacity_nbytes, at::Tensor::to##name##Data, (#11876) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/11876 Modern C++ api instead of macros, item() is aligned with Python frontend. caffe2::Tensor::capacity_nbytes is effecitvely unused and confusing w.r.t. caffe2::Tensor::nbytes(). codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCComplexDouble "item<std::complex<double>>" codemod -d tc --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" Reviewed By: ezyang Differential Revision: D9948572 fbshipit-source-id: 70c9f5390d92b82c85fdd5f8a5aebca338ab413c
2018-09-24 17:39:10 +00:00
ASSERT_GT(diff.abs().sum().item<float>(), 1e-3);
}
TEST_F(RNNTest, CheckOutputValuesMatchPyTorch) {
torch::manual_seed(0);
// Make sure the outputs match pytorch outputs
LSTM model(2, 2);
for (auto& v : model->parameters()) {
Replace cursors with OrderedDict (#13427) Summary: This is a pre-cursor diff to Python <-> C++ frontend integration -- I have a follow-up PR coming for that. This PR changes the C++ frontend module interface to replace the custom "cursor"s I introduced some time ago with `OrderedDict`. I introduced cursors at the time as a convenient way of applying functions and query operations on a modules' parameters, buffers and modules, allowing things like `module.parameters().map(my_func)`. However, I noticed that (1) this functionality is easily implement-able on top of a regular data structure and (2) more importantly, using OrderedDicts is much, much easier for Python integration. This is especially true given that ScriptModule today also uses OrderedDict. Since C++ frontend modules and ScriptModules will soon too share as many implementation details as possible, it is overall the best move to ditch the custom cursor datastructure and pervasively use OrderedDict everywhere. For this I did: 1. Changed the C++ frontend module interface to more closely match the Python one by providing `parameters()`, `named_parameters()` and other methods Python provides. This is very important for the following diff which binds these into Python for inter-op with Python modules. 2. In lieu of the `Cursor::apply()` method I added `nn::Module::apply`. This again is one more unifying step between Python and C++, since Python modules have an apply function too. 3. Deleted all uses of Cursor. 4. Tidied and beefed up the `OrderedDict` class. In particular, I made `OrderedDict::Item` store an `std::pair` under the hood, because that is trivial to bind into Python and saved me a lot of headaches. `key` and `value` become methods instead of fields, which they should have been from the very start anyway because it allows exactly these kinds of changes, as per usual good software engineering principle of encapsulation. 5. Added many tests for the OrderedDict use in `nn::Module`. ebetica ezyang Pull Request resolved: https://github.com/pytorch/pytorch/pull/13427 Differential Revision: D12894092 Pulled By: goldsborough fbshipit-source-id: 715770c95a9643753a1db26d7f9da9a78619a15d
2018-11-07 18:53:07 +00:00
float size = v.numel();
auto p = static_cast<float*>(v.storage().data());
for (size_t i = 0; i < size; i++) {
p[i] = i / size;
}
}
auto x = torch::empty({3, 4, 2}, torch::requires_grad());
float size = x.numel();
auto p = static_cast<float*>(x.storage().data());
for (size_t i = 0; i < size; i++) {
p[i] = (size - i) / size;
}
auto out = model->forward(x);
ASSERT_EQ(out.output.ndimension(), 3);
ASSERT_EQ(out.output.size(0), 3);
ASSERT_EQ(out.output.size(1), 4);
ASSERT_EQ(out.output.size(2), 2);
auto flat = out.output.view(3 * 4 * 2);
float c_out[] = {0.4391, 0.5402, 0.4330, 0.5324, 0.4261, 0.5239,
0.4183, 0.5147, 0.6822, 0.8064, 0.6726, 0.7968,
0.6620, 0.7860, 0.6501, 0.7741, 0.7889, 0.9003,
0.7769, 0.8905, 0.7635, 0.8794, 0.7484, 0.8666};
for (size_t i = 0; i < 3 * 4 * 2; i++) {
Remove caffe2::Tensor::capacity_nbytes, at::Tensor::to##name##Data, (#11876) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/11876 Modern C++ api instead of macros, item() is aligned with Python frontend. caffe2::Tensor::capacity_nbytes is effecitvely unused and confusing w.r.t. caffe2::Tensor::nbytes(). codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCComplexDouble "item<std::complex<double>>" codemod -d tc --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" Reviewed By: ezyang Differential Revision: D9948572 fbshipit-source-id: 70c9f5390d92b82c85fdd5f8a5aebca338ab413c
2018-09-24 17:39:10 +00:00
ASSERT_LT(std::abs(flat[i].item<float>() - c_out[i]), 1e-3);
}
ASSERT_EQ(out.state.ndimension(), 4); // (hx, cx) x layers x B x 2
ASSERT_EQ(out.state.size(0), 2);
ASSERT_EQ(out.state.size(1), 1);
ASSERT_EQ(out.state.size(2), 4);
ASSERT_EQ(out.state.size(3), 2);
flat = out.state.view(16);
float h_out[] = {0.7889,
0.9003,
0.7769,
0.8905,
0.7635,
0.8794,
0.7484,
0.8666,
1.1647,
1.6106,
1.1425,
1.5726,
1.1187,
1.5329,
1.0931,
1.4911};
for (size_t i = 0; i < 16; i++) {
Remove caffe2::Tensor::capacity_nbytes, at::Tensor::to##name##Data, (#11876) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/11876 Modern C++ api instead of macros, item() is aligned with Python frontend. caffe2::Tensor::capacity_nbytes is effecitvely unused and confusing w.r.t. caffe2::Tensor::nbytes(). codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCComplexDouble "item<std::complex<double>>" codemod -d tc --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" Reviewed By: ezyang Differential Revision: D9948572 fbshipit-source-id: 70c9f5390d92b82c85fdd5f8a5aebca338ab413c
2018-09-24 17:39:10 +00:00
ASSERT_LT(std::abs(flat[i].item<float>() - h_out[i]), 1e-3);
}
}
TEST_F(RNNTest, EndToEndLSTM) {
ASSERT_TRUE(test_RNN_xor<LSTM>(
[](int s) { return LSTM(LSTMOptions(s, s).layers(2)); }));
}
TEST_F(RNNTest, EndToEndGRU) {
ASSERT_TRUE(
test_RNN_xor<GRU>([](int s) { return GRU(GRUOptions(s, s).layers(2)); }));
}
TEST_F(RNNTest, EndToEndRNNRelu) {
ASSERT_TRUE(test_RNN_xor<RNN>(
[](int s) { return RNN(RNNOptions(s, s).relu().layers(2)); }));
}
TEST_F(RNNTest, EndToEndRNNTanh) {
ASSERT_TRUE(test_RNN_xor<RNN>(
[](int s) { return RNN(RNNOptions(s, s).tanh().layers(2)); }));
}
2018-05-04 18:00:30 +00:00
TEST_F(RNNTest, Sizes_CUDA) {
torch::manual_seed(0);
LSTM model(LSTMOptions(128, 64).layers(3).dropout(0.2));
model->to(torch::kCUDA);
auto x =
torch::randn({10, 16, 128}, torch::requires_grad().device(torch::kCUDA));
auto output = model->forward(x);
auto y = x.mean();
y.backward();
check_lstm_sizes(output);
2018-05-04 18:00:30 +00:00
auto next = model->forward(x, output.state);
2018-05-04 18:00:30 +00:00
check_lstm_sizes(next);
2018-05-04 18:00:30 +00:00
torch::Tensor diff = next.state - output.state;
2018-05-04 18:00:30 +00:00
// Hiddens changed
Remove caffe2::Tensor::capacity_nbytes, at::Tensor::to##name##Data, (#11876) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/11876 Modern C++ api instead of macros, item() is aligned with Python frontend. caffe2::Tensor::capacity_nbytes is effecitvely unused and confusing w.r.t. caffe2::Tensor::nbytes(). codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>" codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>" codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCComplexDouble "item<std::complex<double>>" codemod -d tc --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>" Reviewed By: ezyang Differential Revision: D9948572 fbshipit-source-id: 70c9f5390d92b82c85fdd5f8a5aebca338ab413c
2018-09-24 17:39:10 +00:00
ASSERT_GT(diff.abs().sum().item<float>(), 1e-3);
}
2018-05-04 18:00:30 +00:00
TEST_F(RNNTest, EndToEndLSTM_CUDA) {
ASSERT_TRUE(test_RNN_xor<LSTM>(
[](int s) { return LSTM(LSTMOptions(s, s).layers(2)); }, true));
}
TEST_F(RNNTest, EndToEndGRU_CUDA) {
ASSERT_TRUE(test_RNN_xor<GRU>(
[](int s) { return GRU(GRUOptions(s, s).layers(2)); }, true));
}
TEST_F(RNNTest, EndToEndRNNRelu_CUDA) {
ASSERT_TRUE(test_RNN_xor<RNN>(
[](int s) { return RNN(RNNOptions(s, s).relu().layers(2)); }, true));
}
TEST_F(RNNTest, EndToEndRNNTanh_CUDA) {
ASSERT_TRUE(test_RNN_xor<RNN>(
[](int s) { return RNN(RNNOptions(s, s).tanh().layers(2)); }, true));
}
TEST_F(RNNTest, PrettyPrintRNNs) {
ASSERT_EQ(
c10::str(LSTM(LSTMOptions(128, 64).layers(3).dropout(0.2))),
"torch::nn::LSTM(input_size=128, hidden_size=64, layers=3, dropout=0.2)");
ASSERT_EQ(
c10::str(GRU(GRUOptions(128, 64).layers(3).dropout(0.5))),
"torch::nn::GRU(input_size=128, hidden_size=64, layers=3, dropout=0.5)");
ASSERT_EQ(
c10::str(RNN(RNNOptions(128, 64).layers(3).dropout(0.2).tanh())),
"torch::nn::RNN(input_size=128, hidden_size=64, layers=3, dropout=0.2, activation=tanh)");
}
// This test assures that flatten_parameters does not crash,
// when bidirectional is set to true
// https://github.com/pytorch/pytorch/issues/19545
TEST_F(RNNTest, BidirectionalFlattenParameters) {
GRU gru(GRUOptions(100, 256).layers(2).bidirectional(true));
gru->flatten_parameters();
}
template <typename Impl>
void copyParameters(torch::nn::ModuleHolder<Impl>& target, size_t t_i,
const torch::nn::ModuleHolder<Impl>& source, size_t s_i) {
at::NoGradGuard guard;
target->w_ih[t_i].copy_(source->w_ih[s_i]);
target->w_hh[t_i].copy_(source->w_hh[s_i]);
target->b_ih[t_i].copy_(source->b_ih[s_i]);
target->b_hh[t_i].copy_(source->b_hh[s_i]);
}
// This test is a port of python code introduced here:
// https://towardsdatascience.com/understanding-bidirectional-rnn-in-pytorch-5bd25a5dd66
// Reverse forward of bidrectional GRU should act
// as regular forward of unidirectional GRU
void BidirectionalGRUReverseForward(bool cuda) {
auto opt = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false)
.device(cuda ? torch::kCUDA : torch::kCPU);
auto input = torch::tensor({1, 2, 3, 4, 5}, opt).reshape({5, 1, 1});
auto input_reversed = torch::tensor({5, 4, 3, 2, 1}, opt).reshape({5, 1, 1});
auto gru_options = GRUOptions(1, 1).layers(1).batch_first(false);
GRU bi_grus {gru_options.bidirectional(true)};
GRU reverse_gru {gru_options.bidirectional(false)};
if (cuda) {
bi_grus->to(torch::kCUDA);
reverse_gru->to(torch::kCUDA);
}
// Now make sure the weights of the reverse gru layer match
// ones of the (reversed) bidirectional's:
copyParameters(reverse_gru, 0, bi_grus, 1);
auto bi_output = bi_grus->forward(input);
auto reverse_output = reverse_gru->forward(input_reversed);
if (cuda) {
bi_output.output = bi_output.output.to(torch::kCPU);
bi_output.state = bi_output.state.to(torch::kCPU);
reverse_output.output = reverse_output.output.to(torch::kCPU);
reverse_output.state = reverse_output.state.to(torch::kCPU);
}
ASSERT_EQ(bi_output.output.size(0), reverse_output.output.size(0));
auto size = bi_output.output.size(0);
for (int i = 0; i < size; i++) {
ASSERT_EQ(bi_output.output[i][0][1].item<float>(),
reverse_output.output[size - 1 - i][0][0].item<float>());
}
// The hidden states of the reversed GRUs sits
// in the odd indices in the first dimension.
ASSERT_EQ(bi_output.state[1][0][0].item<float>(),
reverse_output.state[0][0][0].item<float>());
}
TEST_F(RNNTest, BidirectionalGRUReverseForward) {
BidirectionalGRUReverseForward(false);
}
TEST_F(RNNTest, BidirectionalGRUReverseForward_CUDA) {
BidirectionalGRUReverseForward(true);
}
// Reverse forward of bidrectional LSTM should act
// as regular forward of unidirectional LSTM
void BidirectionalLSTMReverseForwardTest(bool cuda) {
auto opt = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false)
.device(cuda ? torch::kCUDA : torch::kCPU);
auto input = torch::tensor({1, 2, 3, 4, 5}, opt).reshape({5, 1, 1});
auto input_reversed = torch::tensor({5, 4, 3, 2, 1}, opt).reshape({5, 1, 1});
auto lstm_opt = GRUOptions(1, 1).layers(1).batch_first(false);
LSTM bi_lstm {lstm_opt.bidirectional(true)};
LSTM reverse_lstm {lstm_opt.bidirectional(false)};
if (cuda) {
bi_lstm->to(torch::kCUDA);
reverse_lstm->to(torch::kCUDA);
}
// Now make sure the weights of the reverse lstm layer match
// ones of the (reversed) bidirectional's:
copyParameters(reverse_lstm, 0, bi_lstm, 1);
auto bi_output = bi_lstm->forward(input);
auto reverse_output = reverse_lstm->forward(input_reversed);
if (cuda) {
bi_output.output = bi_output.output.to(torch::kCPU);
bi_output.state = bi_output.state.to(torch::kCPU);
reverse_output.output = reverse_output.output.to(torch::kCPU);
reverse_output.state = reverse_output.state.to(torch::kCPU);
}
ASSERT_EQ(bi_output.output.size(0), reverse_output.output.size(0));
auto size = bi_output.output.size(0);
for (int i = 0; i < size; i++) {
ASSERT_EQ(bi_output.output[i][0][1].item<float>(),
reverse_output.output[size - 1 - i][0][0].item<float>());
}
// The hidden states of the reversed LSTM sits
// in the odd indices in the first dimension.
ASSERT_EQ(bi_output.state[0][1][0][0].item<float>(),
reverse_output.state[0][0][0][0].item<float>());
ASSERT_EQ(bi_output.state[1][1][0][0].item<float>(),
reverse_output.state[1][0][0][0].item<float>());
}
TEST_F(RNNTest, BidirectionalLSTMReverseForward) {
BidirectionalLSTMReverseForwardTest(false);
}
TEST_F(RNNTest, BidirectionalLSTMReverseForward_CUDA) {
BidirectionalLSTMReverseForwardTest(true);
}
TEST_F(RNNTest, BidirectionalMultilayerGRU_CPU_vs_CUDA) {
// Create two GRUs with the same options
auto opt = GRUOptions(2, 4).layers(3).batch_first(false).bidirectional(true);
GRU gru_cpu {opt};
GRU gru_cuda {opt};
// Copy weights and biases from CPU GRU to CUDA GRU
{
at::NoGradGuard guard;
const auto num_directions = gru_cpu->options.bidirectional() ? 2 : 1;
for (int64_t layer = 0; layer < gru_cpu->options.layers(); layer++) {
for (auto direction = 0; direction < num_directions; direction++) {
const auto layer_idx = (layer * num_directions) + direction;
copyParameters(gru_cuda, layer_idx, gru_cpu, layer_idx);
}
}
}
gru_cpu->flatten_parameters();
gru_cuda->flatten_parameters();
// Move GRU to CUDA
gru_cuda->to(torch::kCUDA);
// Create the same inputs
auto input_opt = torch::TensorOptions()
.dtype(torch::kFloat32).requires_grad(false);
auto input_cpu = torch::tensor({1, 2, 3, 4, 5, 6}, input_opt)
.reshape({3, 1, 2});
auto input_cuda = torch::tensor({1, 2, 3, 4, 5, 6}, input_opt)
.reshape({3, 1, 2}).to(torch::kCUDA);
// Call forward on both GRUs
auto output_cpu = gru_cpu->forward(input_cpu);
auto output_cuda = gru_cuda->forward(input_cuda);
output_cpu.output = output_cpu.output.to(torch::kCPU);
output_cpu.state = output_cpu.state.to(torch::kCPU);
// Assert that the output and state are equal on CPU and CUDA
ASSERT_EQ(output_cpu.output.dim(), output_cuda.output.dim());
for (int i = 0; i < output_cpu.output.dim(); i++) {
ASSERT_EQ(output_cpu.output.size(i), output_cuda.output.size(i));
}
for (int i = 0; i < output_cpu.output.size(0); i++) {
for (int j = 0; j < output_cpu.output.size(1); j++) {
for (int k = 0; k < output_cpu.output.size(2); k++) {
ASSERT_NEAR(
output_cpu.output[i][j][k].item<float>(),
output_cuda.output[i][j][k].item<float>(), 1e-5);
}
}
}
}
TEST_F(RNNTest, BidirectionalMultilayerLSTM_CPU_vs_CUDA) {
// Create two LSTMs with the same options
auto opt = LSTMOptions(2, 4).layers(3).batch_first(false).bidirectional(true);
LSTM lstm_cpu {opt};
LSTM lstm_cuda {opt};
// Copy weights and biases from CPU LSTM to CUDA LSTM
{
at::NoGradGuard guard;
const auto num_directions = lstm_cpu->options.bidirectional() ? 2 : 1;
for (int64_t layer = 0; layer < lstm_cpu->options.layers(); layer++) {
for (auto direction = 0; direction < num_directions; direction++) {
const auto layer_idx = (layer * num_directions) + direction;
copyParameters(lstm_cuda, layer_idx, lstm_cpu, layer_idx);
}
}
}
lstm_cpu->flatten_parameters();
lstm_cuda->flatten_parameters();
// Move LSTM to CUDA
lstm_cuda->to(torch::kCUDA);
auto options = torch::TensorOptions()
.dtype(torch::kFloat32).requires_grad(false);
auto input_cpu = torch::tensor({1, 2, 3, 4, 5, 6}, options)
.reshape({3, 1, 2});
auto input_cuda = torch::tensor({1, 2, 3, 4, 5, 6}, options)
.reshape({3, 1, 2}).to(torch::kCUDA);
// Call forward on both LSTMs
auto output_cpu = lstm_cpu->forward(input_cpu);
auto output_cuda = lstm_cuda->forward(input_cuda);
output_cpu.output = output_cpu.output.to(torch::kCPU);
output_cpu.state = output_cpu.state.to(torch::kCPU);
// Assert that the output and state are equal on CPU and CUDA
ASSERT_EQ(output_cpu.output.dim(), output_cuda.output.dim());
for (int i = 0; i < output_cpu.output.dim(); i++) {
ASSERT_EQ(output_cpu.output.size(i), output_cuda.output.size(i));
}
for (int i = 0; i < output_cpu.output.size(0); i++) {
for (int j = 0; j < output_cpu.output.size(1); j++) {
for (int k = 0; k < output_cpu.output.size(2); k++) {
ASSERT_NEAR(
output_cpu.output[i][j][k].item<float>(),
output_cuda.output[i][j][k].item<float>(), 1e-5);
}
}
}
}