2018-09-22 04:12:37 +00:00
|
|
|
#include <gtest/gtest.h>
|
2018-05-01 01:36:35 +00:00
|
|
|
|
2019-10-10 16:44:55 +00:00
|
|
|
#include <torch/torch.h>
|
2018-05-01 01:36:35 +00:00
|
|
|
|
2018-09-22 04:12:37 +00:00
|
|
|
#include <test/cpp/api/support.h>
|
2018-08-23 23:01:03 +00:00
|
|
|
|
2018-05-07 21:45:00 +00:00
|
|
|
using namespace torch::nn;
|
2018-07-17 04:43:40 +00:00
|
|
|
using namespace torch::test;
|
2018-05-01 01:36:35 +00:00
|
|
|
|
|
|
|
|
template <typename R, typename Func>
|
|
|
|
|
bool test_RNN_xor(Func&& model_maker, bool cuda = false) {
|
2018-06-28 03:00:53 +00:00
|
|
|
torch::manual_seed(0);
|
|
|
|
|
|
2018-05-01 01:36:35 +00:00
|
|
|
auto nhid = 32;
|
2018-07-17 04:43:40 +00:00
|
|
|
auto model = std::make_shared<SimpleContainer>();
|
2018-06-19 02:45:53 +00:00
|
|
|
auto l1 = model->add(Linear(1, nhid), "l1");
|
Adding support for CuDNN-based LSTM with projections (#47725)
Summary:
Fixes https://github.com/pytorch/pytorch/issues/46213
I didn't yet update the documentation, will add those change soon. A few other things that I didn't do, but want to clarify if I maybe should.
1. I didn't expose projections in c++ API: torch/csrc/api/src/nn/modules/rnn.cpp. Let me know if this is desirable and I will add those changes.
2. I didn't expose projections in "lstm_cell" function and "_thnn_differentiable_lstm_cell_backward" functions from aten/src/ATen/native/RNN.cpp. As far as I understand, they are not needed for nn.LSTM CPU execution. For lstm_cell, projections don't bring any real benefit, since if cell is used separately, it can be easily added in Python. For "_thnn_differentiable_lstm_cell_backward", I'm actually not sure where exactly that function is used, so I also disabled projections there for now. Please let me know if I should change that.
3. I added check that projections are not supported for quantized LSTMs to quantized_lstm_<data/input> functions. But I didn't add any checks to LSTMCell code. It seems that since I disabled projections in "lstm_cell" function, they should also not be available for quantized models through any other API than quantized_lstm_<data/input>. Please let me know if I'm not correct and I will add checks to other places.
4. Projections are not supported for CuDNN versions < 7.1.2. Should I add the check for CuDNN version and disable projections in that case? If so, what will be the best way to do that?
5. Currently I added projection weight as the last weight, so the layout is "w_ih, w_hh, b_ih, b_hh, w_hr". This breaks the assumption that biases come after weights and thus I had to add additional if-s in various places. Alternative way would be to have "w_ih, w_hh, w_hr, b_ih, b_hh" layout, in which case the assumption will be true. But in that case I will need to split the loop in get_parameters function from aten/src/ATen/native/cudnn/RNN.cpp. And in some cases, I will still need to add an "undefined" tensor in the 3rd position, because we get all 5 weights from CuDNN most of the time. So I'm not sure which way is better. Let me know if you think I should change to the weights-then-biases layout.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/47725
Reviewed By: zou3519
Differential Revision: D25449794
Pulled By: ngimel
fbshipit-source-id: fe6ce59e481d1f5fd861a8ff7fa13d1affcedb0c
2020-12-16 19:19:30 +00:00
|
|
|
auto rnn_model = model_maker(nhid);
|
|
|
|
|
auto rnn = model->add(rnn_model, "rnn");
|
|
|
|
|
auto nout = nhid;
|
|
|
|
|
if (rnn_model.get()->options_base.proj_size() > 0) {
|
|
|
|
|
nout = rnn_model.get()->options_base.proj_size();
|
|
|
|
|
}
|
|
|
|
|
auto lo = model->add(Linear(nout, 1), "lo");
|
2018-05-01 01:36:35 +00:00
|
|
|
|
2018-06-26 17:13:14 +00:00
|
|
|
torch::optim::Adam optimizer(model->parameters(), 1e-2);
|
2018-06-25 02:03:39 +00:00
|
|
|
auto forward_op = [&](torch::Tensor x) {
|
2018-05-01 01:36:35 +00:00
|
|
|
auto T = x.size(0);
|
|
|
|
|
auto B = x.size(1);
|
|
|
|
|
x = x.view({T * B, 1});
|
2018-06-26 20:23:16 +00:00
|
|
|
x = l1->forward(x).view({T, B, nhid}).tanh_();
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
x = std::get<0>(rnn->forward(x))[T - 1];
|
2018-06-26 20:23:16 +00:00
|
|
|
x = lo->forward(x);
|
2018-05-01 01:36:35 +00:00
|
|
|
return x;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (cuda) {
|
2018-06-30 00:13:34 +00:00
|
|
|
model->to(torch::kCUDA);
|
2018-05-01 01:36:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float running_loss = 1;
|
|
|
|
|
int epoch = 0;
|
|
|
|
|
auto max_epoch = 1500;
|
|
|
|
|
while (running_loss > 1e-2) {
|
|
|
|
|
auto bs = 16U;
|
|
|
|
|
auto nlen = 5U;
|
|
|
|
|
|
2018-06-27 21:34:06 +00:00
|
|
|
const auto backend = cuda ? torch::kCUDA : torch::kCPU;
|
|
|
|
|
auto inputs =
|
2018-11-09 17:52:59 +00:00
|
|
|
torch::rand({nlen, bs, 1}, backend).round().to(torch::kFloat32);
|
2018-06-27 21:34:06 +00:00
|
|
|
auto labels = inputs.sum(0).detach();
|
|
|
|
|
inputs.set_requires_grad(true);
|
|
|
|
|
auto outputs = forward_op(inputs);
|
|
|
|
|
torch::Tensor loss = torch::mse_loss(outputs, labels);
|
2018-05-01 01:36:35 +00:00
|
|
|
|
2018-06-26 17:13:14 +00:00
|
|
|
optimizer.zero_grad();
|
2018-05-25 00:31:41 +00:00
|
|
|
loss.backward();
|
2018-06-26 17:13:14 +00:00
|
|
|
optimizer.step();
|
2018-05-01 01:36:35 +00:00
|
|
|
|
Make PyTorch code-base clang-tidy compliant (#56892)
Summary:
This is an automatic change generated by the following script:
```
#!/usr/bin/env python3
from subprocess import check_output, check_call
import os
def get_compiled_files_list():
import json
with open("build/compile_commands.json") as f:
data = json.load(f)
files = [os.path.relpath(node['file']) for node in data]
for idx, fname in enumerate(files):
if fname.startswith('build/') and fname.endswith('.DEFAULT.cpp'):
files[idx] = fname[len('build/'):-len('.DEFAULT.cpp')]
return files
def run_clang_tidy(fname):
check_call(["python3", "tools/clang_tidy.py", "-c", "build", "-x", fname,"-s"])
changes = check_output(["git", "ls-files", "-m"])
if len(changes) == 0:
return
check_call(["git", "commit","--all", "-m", f"NOLINT stubs for {fname}"])
def main():
git_files = check_output(["git", "ls-files"]).decode("ascii").split("\n")
compiled_files = get_compiled_files_list()
for idx, fname in enumerate(git_files):
if fname not in compiled_files:
continue
if fname.startswith("caffe2/contrib/aten/"):
continue
print(f"[{idx}/{len(git_files)}] Processing {fname}")
run_clang_tidy(fname)
if __name__ == "__main__":
main()
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56892
Reviewed By: H-Huang
Differential Revision: D27991944
Pulled By: malfet
fbshipit-source-id: 5415e1eb2c1b34319a4f03024bfaa087007d7179
2021-04-28 21:09:06 +00:00
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers,bugprone-narrowing-conversions)
|
Remove caffe2::Tensor::capacity_nbytes, at::Tensor::to##name##Data, (#11876)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/11876
Modern C++ api instead of macros, item() is aligned with Python frontend. caffe2::Tensor::capacity_nbytes is effecitvely unused and confusing w.r.t. caffe2::Tensor::nbytes().
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCComplexDouble "item<std::complex<double>>"
codemod -d tc --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
Reviewed By: ezyang
Differential Revision: D9948572
fbshipit-source-id: 70c9f5390d92b82c85fdd5f8a5aebca338ab413c
2018-09-24 17:39:10 +00:00
|
|
|
running_loss = running_loss * 0.99 + loss.item<float>() * 0.01;
|
2018-05-01 01:36:35 +00:00
|
|
|
if (epoch > max_epoch) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
epoch++;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
};
|
|
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
void check_lstm_sizes(std::tuple<torch::Tensor, std::tuple<torch::Tensor, torch::Tensor>> lstm_output) {
|
2018-05-04 18:00:30 +00:00
|
|
|
// Expect the LSTM to have 64 outputs and 3 layers, with an input of batch
|
|
|
|
|
// 10 and 16 time steps (10 x 16 x n)
|
|
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
torch::Tensor output = std::get<0>(lstm_output);
|
|
|
|
|
std::tuple<torch::Tensor, torch::Tensor> state = std::get<1>(lstm_output);
|
|
|
|
|
torch::Tensor hx = std::get<0>(state);
|
|
|
|
|
torch::Tensor cx = std::get<1>(state);
|
2018-05-04 18:00:30 +00:00
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
ASSERT_EQ(output.ndimension(), 3);
|
|
|
|
|
ASSERT_EQ(output.size(0), 10);
|
|
|
|
|
ASSERT_EQ(output.size(1), 16);
|
|
|
|
|
ASSERT_EQ(output.size(2), 64);
|
|
|
|
|
|
|
|
|
|
ASSERT_EQ(hx.ndimension(), 3);
|
|
|
|
|
ASSERT_EQ(hx.size(0), 3); // layers
|
|
|
|
|
ASSERT_EQ(hx.size(1), 16); // Batchsize
|
|
|
|
|
ASSERT_EQ(hx.size(2), 64); // 64 hidden dims
|
|
|
|
|
|
|
|
|
|
ASSERT_EQ(cx.ndimension(), 3);
|
|
|
|
|
ASSERT_EQ(cx.size(0), 3); // layers
|
|
|
|
|
ASSERT_EQ(cx.size(1), 16); // Batchsize
|
|
|
|
|
ASSERT_EQ(cx.size(2), 64); // 64 hidden dims
|
2018-05-04 18:00:30 +00:00
|
|
|
|
|
|
|
|
// Something is in the hiddens
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
ASSERT_GT(hx.norm().item<float>(), 0);
|
|
|
|
|
ASSERT_GT(cx.norm().item<float>(), 0);
|
2018-05-04 18:00:30 +00:00
|
|
|
}
|
|
|
|
|
|
Adding support for CuDNN-based LSTM with projections (#47725)
Summary:
Fixes https://github.com/pytorch/pytorch/issues/46213
I didn't yet update the documentation, will add those change soon. A few other things that I didn't do, but want to clarify if I maybe should.
1. I didn't expose projections in c++ API: torch/csrc/api/src/nn/modules/rnn.cpp. Let me know if this is desirable and I will add those changes.
2. I didn't expose projections in "lstm_cell" function and "_thnn_differentiable_lstm_cell_backward" functions from aten/src/ATen/native/RNN.cpp. As far as I understand, they are not needed for nn.LSTM CPU execution. For lstm_cell, projections don't bring any real benefit, since if cell is used separately, it can be easily added in Python. For "_thnn_differentiable_lstm_cell_backward", I'm actually not sure where exactly that function is used, so I also disabled projections there for now. Please let me know if I should change that.
3. I added check that projections are not supported for quantized LSTMs to quantized_lstm_<data/input> functions. But I didn't add any checks to LSTMCell code. It seems that since I disabled projections in "lstm_cell" function, they should also not be available for quantized models through any other API than quantized_lstm_<data/input>. Please let me know if I'm not correct and I will add checks to other places.
4. Projections are not supported for CuDNN versions < 7.1.2. Should I add the check for CuDNN version and disable projections in that case? If so, what will be the best way to do that?
5. Currently I added projection weight as the last weight, so the layout is "w_ih, w_hh, b_ih, b_hh, w_hr". This breaks the assumption that biases come after weights and thus I had to add additional if-s in various places. Alternative way would be to have "w_ih, w_hh, w_hr, b_ih, b_hh" layout, in which case the assumption will be true. But in that case I will need to split the loop in get_parameters function from aten/src/ATen/native/cudnn/RNN.cpp. And in some cases, I will still need to add an "undefined" tensor in the 3rd position, because we get all 5 weights from CuDNN most of the time. So I'm not sure which way is better. Let me know if you think I should change to the weights-then-biases layout.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/47725
Reviewed By: zou3519
Differential Revision: D25449794
Pulled By: ngimel
fbshipit-source-id: fe6ce59e481d1f5fd861a8ff7fa13d1affcedb0c
2020-12-16 19:19:30 +00:00
|
|
|
void check_lstm_sizes_proj(std::tuple<torch::Tensor, std::tuple<torch::Tensor, torch::Tensor>> lstm_output) {
|
|
|
|
|
// Expect the LSTM to have 32 outputs and 3 layers, with an input of batch
|
|
|
|
|
// 10 and 16 time steps (10 x 16 x n)
|
|
|
|
|
|
|
|
|
|
torch::Tensor output = std::get<0>(lstm_output);
|
|
|
|
|
std::tuple<torch::Tensor, torch::Tensor> state = std::get<1>(lstm_output);
|
|
|
|
|
torch::Tensor hx = std::get<0>(state);
|
|
|
|
|
torch::Tensor cx = std::get<1>(state);
|
|
|
|
|
|
|
|
|
|
ASSERT_EQ(output.ndimension(), 3);
|
|
|
|
|
ASSERT_EQ(output.size(0), 10);
|
|
|
|
|
ASSERT_EQ(output.size(1), 16);
|
|
|
|
|
ASSERT_EQ(output.size(2), 32);
|
|
|
|
|
|
|
|
|
|
ASSERT_EQ(hx.ndimension(), 3);
|
|
|
|
|
ASSERT_EQ(hx.size(0), 3); // layers
|
|
|
|
|
ASSERT_EQ(hx.size(1), 16); // Batchsize
|
|
|
|
|
ASSERT_EQ(hx.size(2), 32); // 32 hidden dims
|
|
|
|
|
|
|
|
|
|
ASSERT_EQ(cx.ndimension(), 3);
|
|
|
|
|
ASSERT_EQ(cx.size(0), 3); // layers
|
|
|
|
|
ASSERT_EQ(cx.size(1), 16); // Batchsize
|
|
|
|
|
ASSERT_EQ(cx.size(2), 64); // 64 cell dims
|
|
|
|
|
|
|
|
|
|
// Something is in the hiddens
|
|
|
|
|
ASSERT_GT(hx.norm().item<float>(), 0);
|
|
|
|
|
ASSERT_GT(cx.norm().item<float>(), 0);
|
|
|
|
|
}
|
|
|
|
|
|
2018-09-22 04:12:37 +00:00
|
|
|
struct RNNTest : torch::test::SeedingFixture {};
|
|
|
|
|
|
|
|
|
|
TEST_F(RNNTest, CheckOutputSizes) {
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
LSTM model(LSTMOptions(128, 64).num_layers(3).dropout(0.2));
|
2018-08-23 23:01:03 +00:00
|
|
|
// Input size is: sequence length, batch size, input size
|
|
|
|
|
auto x = torch::randn({10, 16, 128}, torch::requires_grad());
|
|
|
|
|
auto output = model->forward(x);
|
|
|
|
|
auto y = x.mean();
|
2018-05-01 01:36:35 +00:00
|
|
|
|
2018-08-23 23:01:03 +00:00
|
|
|
y.backward();
|
|
|
|
|
check_lstm_sizes(output);
|
2018-05-01 01:36:35 +00:00
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
auto next = model->forward(x, std::get<1>(output));
|
2018-05-01 01:36:35 +00:00
|
|
|
|
2018-08-23 23:01:03 +00:00
|
|
|
check_lstm_sizes(next);
|
2018-05-01 01:36:35 +00:00
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
auto output_hx = std::get<0>(std::get<1>(output));
|
|
|
|
|
auto output_cx = std::get<1>(std::get<1>(output));
|
|
|
|
|
|
|
|
|
|
auto next_hx = std::get<0>(std::get<1>(next));
|
|
|
|
|
auto next_cx = std::get<1>(std::get<1>(next));
|
|
|
|
|
|
|
|
|
|
torch::Tensor diff = torch::cat({next_hx, next_cx}, 0) - torch::cat({output_hx, output_cx}, 0);
|
2018-05-01 01:36:35 +00:00
|
|
|
|
2018-08-23 23:01:03 +00:00
|
|
|
// Hiddens changed
|
Remove caffe2::Tensor::capacity_nbytes, at::Tensor::to##name##Data, (#11876)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/11876
Modern C++ api instead of macros, item() is aligned with Python frontend. caffe2::Tensor::capacity_nbytes is effecitvely unused and confusing w.r.t. caffe2::Tensor::nbytes().
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCComplexDouble "item<std::complex<double>>"
codemod -d tc --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
Reviewed By: ezyang
Differential Revision: D9948572
fbshipit-source-id: 70c9f5390d92b82c85fdd5f8a5aebca338ab413c
2018-09-24 17:39:10 +00:00
|
|
|
ASSERT_GT(diff.abs().sum().item<float>(), 1e-3);
|
2018-08-23 23:01:03 +00:00
|
|
|
}
|
2018-05-01 01:36:35 +00:00
|
|
|
|
Adding support for CuDNN-based LSTM with projections (#47725)
Summary:
Fixes https://github.com/pytorch/pytorch/issues/46213
I didn't yet update the documentation, will add those change soon. A few other things that I didn't do, but want to clarify if I maybe should.
1. I didn't expose projections in c++ API: torch/csrc/api/src/nn/modules/rnn.cpp. Let me know if this is desirable and I will add those changes.
2. I didn't expose projections in "lstm_cell" function and "_thnn_differentiable_lstm_cell_backward" functions from aten/src/ATen/native/RNN.cpp. As far as I understand, they are not needed for nn.LSTM CPU execution. For lstm_cell, projections don't bring any real benefit, since if cell is used separately, it can be easily added in Python. For "_thnn_differentiable_lstm_cell_backward", I'm actually not sure where exactly that function is used, so I also disabled projections there for now. Please let me know if I should change that.
3. I added check that projections are not supported for quantized LSTMs to quantized_lstm_<data/input> functions. But I didn't add any checks to LSTMCell code. It seems that since I disabled projections in "lstm_cell" function, they should also not be available for quantized models through any other API than quantized_lstm_<data/input>. Please let me know if I'm not correct and I will add checks to other places.
4. Projections are not supported for CuDNN versions < 7.1.2. Should I add the check for CuDNN version and disable projections in that case? If so, what will be the best way to do that?
5. Currently I added projection weight as the last weight, so the layout is "w_ih, w_hh, b_ih, b_hh, w_hr". This breaks the assumption that biases come after weights and thus I had to add additional if-s in various places. Alternative way would be to have "w_ih, w_hh, w_hr, b_ih, b_hh" layout, in which case the assumption will be true. But in that case I will need to split the loop in get_parameters function from aten/src/ATen/native/cudnn/RNN.cpp. And in some cases, I will still need to add an "undefined" tensor in the 3rd position, because we get all 5 weights from CuDNN most of the time. So I'm not sure which way is better. Let me know if you think I should change to the weights-then-biases layout.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/47725
Reviewed By: zou3519
Differential Revision: D25449794
Pulled By: ngimel
fbshipit-source-id: fe6ce59e481d1f5fd861a8ff7fa13d1affcedb0c
2020-12-16 19:19:30 +00:00
|
|
|
TEST_F(RNNTest, CheckOutputSizesProj) {
|
|
|
|
|
LSTM model(LSTMOptions(128, 64).num_layers(3).dropout(0.2).proj_size(32));
|
|
|
|
|
// Input size is: sequence length, batch size, input size
|
|
|
|
|
auto x = torch::randn({10, 16, 128}, torch::requires_grad());
|
|
|
|
|
auto output = model->forward(x);
|
|
|
|
|
auto y = x.mean();
|
|
|
|
|
|
|
|
|
|
y.backward();
|
|
|
|
|
check_lstm_sizes_proj(output);
|
|
|
|
|
|
|
|
|
|
auto next = model->forward(x, std::get<1>(output));
|
|
|
|
|
|
|
|
|
|
check_lstm_sizes_proj(next);
|
|
|
|
|
|
|
|
|
|
auto output_hx = std::get<0>(std::get<1>(output));
|
|
|
|
|
auto output_cx = std::get<1>(std::get<1>(output));
|
|
|
|
|
|
|
|
|
|
auto next_hx = std::get<0>(std::get<1>(next));
|
|
|
|
|
auto next_cx = std::get<1>(std::get<1>(next));
|
|
|
|
|
|
|
|
|
|
torch::Tensor diff = next_hx - output_hx;
|
|
|
|
|
// Hiddens changed
|
|
|
|
|
ASSERT_GT(diff.abs().sum().item<float>(), 1e-3);
|
|
|
|
|
diff = next_cx - output_cx;
|
|
|
|
|
ASSERT_GT(diff.abs().sum().item<float>(), 1e-3);
|
|
|
|
|
}
|
|
|
|
|
|
2018-09-22 04:12:37 +00:00
|
|
|
TEST_F(RNNTest, CheckOutputValuesMatchPyTorch) {
|
2018-08-23 23:01:03 +00:00
|
|
|
torch::manual_seed(0);
|
|
|
|
|
// Make sure the outputs match pytorch outputs
|
|
|
|
|
LSTM model(2, 2);
|
|
|
|
|
for (auto& v : model->parameters()) {
|
Replace cursors with OrderedDict (#13427)
Summary:
This is a pre-cursor diff to Python <-> C++ frontend integration -- I have a follow-up PR coming for that. This PR changes the C++ frontend module interface to replace the custom "cursor"s I introduced some time ago with `OrderedDict`. I introduced cursors at the time as a convenient way of applying functions and query operations on a modules' parameters, buffers and modules, allowing things like `module.parameters().map(my_func)`. However, I noticed that (1) this functionality is easily implement-able on top of a regular data structure and (2) more importantly, using OrderedDicts is much, much easier for Python integration. This is especially true given that ScriptModule today also uses OrderedDict. Since C++ frontend modules and ScriptModules will soon too share as many implementation details as possible, it is overall the best move to ditch the custom cursor datastructure and pervasively use OrderedDict everywhere.
For this I did:
1. Changed the C++ frontend module interface to more closely match the Python one by providing `parameters()`, `named_parameters()` and other methods Python provides. This is very important for the following diff which binds these into Python for inter-op with Python modules.
2. In lieu of the `Cursor::apply()` method I added `nn::Module::apply`. This again is one more unifying step between Python and C++, since Python modules have an apply function too.
3. Deleted all uses of Cursor.
4. Tidied and beefed up the `OrderedDict` class. In particular, I made `OrderedDict::Item` store an `std::pair` under the hood, because that is trivial to bind into Python and saved me a lot of headaches. `key` and `value` become methods instead of fields, which they should have been from the very start anyway because it allows exactly these kinds of changes, as per usual good software engineering principle of encapsulation.
5. Added many tests for the OrderedDict use in `nn::Module`.
ebetica ezyang
Pull Request resolved: https://github.com/pytorch/pytorch/pull/13427
Differential Revision: D12894092
Pulled By: goldsborough
fbshipit-source-id: 715770c95a9643753a1db26d7f9da9a78619a15d
2018-11-07 18:53:07 +00:00
|
|
|
float size = v.numel();
|
|
|
|
|
auto p = static_cast<float*>(v.storage().data());
|
2018-06-28 03:00:53 +00:00
|
|
|
for (size_t i = 0; i < size; i++) {
|
2018-08-23 23:01:03 +00:00
|
|
|
p[i] = i / size;
|
2018-06-28 03:00:53 +00:00
|
|
|
}
|
2018-08-23 23:01:03 +00:00
|
|
|
}
|
2018-05-01 01:36:35 +00:00
|
|
|
|
2018-08-23 23:01:03 +00:00
|
|
|
auto x = torch::empty({3, 4, 2}, torch::requires_grad());
|
|
|
|
|
float size = x.numel();
|
|
|
|
|
auto p = static_cast<float*>(x.storage().data());
|
|
|
|
|
for (size_t i = 0; i < size; i++) {
|
|
|
|
|
p[i] = (size - i) / size;
|
|
|
|
|
}
|
2018-06-28 03:00:53 +00:00
|
|
|
|
2018-08-23 23:01:03 +00:00
|
|
|
auto out = model->forward(x);
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
ASSERT_EQ(std::get<0>(out).ndimension(), 3);
|
|
|
|
|
ASSERT_EQ(std::get<0>(out).size(0), 3);
|
|
|
|
|
ASSERT_EQ(std::get<0>(out).size(1), 4);
|
|
|
|
|
ASSERT_EQ(std::get<0>(out).size(2), 2);
|
2018-08-23 23:01:03 +00:00
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
auto flat = std::get<0>(out).view(3 * 4 * 2);
|
Make PyTorch code-base clang-tidy compliant (#56892)
Summary:
This is an automatic change generated by the following script:
```
#!/usr/bin/env python3
from subprocess import check_output, check_call
import os
def get_compiled_files_list():
import json
with open("build/compile_commands.json") as f:
data = json.load(f)
files = [os.path.relpath(node['file']) for node in data]
for idx, fname in enumerate(files):
if fname.startswith('build/') and fname.endswith('.DEFAULT.cpp'):
files[idx] = fname[len('build/'):-len('.DEFAULT.cpp')]
return files
def run_clang_tidy(fname):
check_call(["python3", "tools/clang_tidy.py", "-c", "build", "-x", fname,"-s"])
changes = check_output(["git", "ls-files", "-m"])
if len(changes) == 0:
return
check_call(["git", "commit","--all", "-m", f"NOLINT stubs for {fname}"])
def main():
git_files = check_output(["git", "ls-files"]).decode("ascii").split("\n")
compiled_files = get_compiled_files_list()
for idx, fname in enumerate(git_files):
if fname not in compiled_files:
continue
if fname.startswith("caffe2/contrib/aten/"):
continue
print(f"[{idx}/{len(git_files)}] Processing {fname}")
run_clang_tidy(fname)
if __name__ == "__main__":
main()
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56892
Reviewed By: H-Huang
Differential Revision: D27991944
Pulled By: malfet
fbshipit-source-id: 5415e1eb2c1b34319a4f03024bfaa087007d7179
2021-04-28 21:09:06 +00:00
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
2018-08-23 23:01:03 +00:00
|
|
|
float c_out[] = {0.4391, 0.5402, 0.4330, 0.5324, 0.4261, 0.5239,
|
|
|
|
|
0.4183, 0.5147, 0.6822, 0.8064, 0.6726, 0.7968,
|
|
|
|
|
0.6620, 0.7860, 0.6501, 0.7741, 0.7889, 0.9003,
|
|
|
|
|
0.7769, 0.8905, 0.7635, 0.8794, 0.7484, 0.8666};
|
|
|
|
|
for (size_t i = 0; i < 3 * 4 * 2; i++) {
|
Remove caffe2::Tensor::capacity_nbytes, at::Tensor::to##name##Data, (#11876)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/11876
Modern C++ api instead of macros, item() is aligned with Python frontend. caffe2::Tensor::capacity_nbytes is effecitvely unused and confusing w.r.t. caffe2::Tensor::nbytes().
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCComplexDouble "item<std::complex<double>>"
codemod -d tc --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
Reviewed By: ezyang
Differential Revision: D9948572
fbshipit-source-id: 70c9f5390d92b82c85fdd5f8a5aebca338ab413c
2018-09-24 17:39:10 +00:00
|
|
|
ASSERT_LT(std::abs(flat[i].item<float>() - c_out[i]), 1e-3);
|
2018-08-23 23:01:03 +00:00
|
|
|
}
|
|
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
auto hx = std::get<0>(std::get<1>(out));
|
|
|
|
|
auto cx = std::get<1>(std::get<1>(out));
|
|
|
|
|
|
|
|
|
|
ASSERT_EQ(hx.ndimension(), 3); // layers x B x 2
|
|
|
|
|
ASSERT_EQ(hx.size(0), 1);
|
|
|
|
|
ASSERT_EQ(hx.size(1), 4);
|
|
|
|
|
ASSERT_EQ(hx.size(2), 2);
|
|
|
|
|
|
|
|
|
|
ASSERT_EQ(cx.ndimension(), 3); // layers x B x 2
|
|
|
|
|
ASSERT_EQ(cx.size(0), 1);
|
|
|
|
|
ASSERT_EQ(cx.size(1), 4);
|
|
|
|
|
ASSERT_EQ(cx.size(2), 2);
|
|
|
|
|
|
|
|
|
|
flat = torch::cat({hx, cx}, 0).view(16);
|
Make PyTorch code-base clang-tidy compliant (#56892)
Summary:
This is an automatic change generated by the following script:
```
#!/usr/bin/env python3
from subprocess import check_output, check_call
import os
def get_compiled_files_list():
import json
with open("build/compile_commands.json") as f:
data = json.load(f)
files = [os.path.relpath(node['file']) for node in data]
for idx, fname in enumerate(files):
if fname.startswith('build/') and fname.endswith('.DEFAULT.cpp'):
files[idx] = fname[len('build/'):-len('.DEFAULT.cpp')]
return files
def run_clang_tidy(fname):
check_call(["python3", "tools/clang_tidy.py", "-c", "build", "-x", fname,"-s"])
changes = check_output(["git", "ls-files", "-m"])
if len(changes) == 0:
return
check_call(["git", "commit","--all", "-m", f"NOLINT stubs for {fname}"])
def main():
git_files = check_output(["git", "ls-files"]).decode("ascii").split("\n")
compiled_files = get_compiled_files_list()
for idx, fname in enumerate(git_files):
if fname not in compiled_files:
continue
if fname.startswith("caffe2/contrib/aten/"):
continue
print(f"[{idx}/{len(git_files)}] Processing {fname}")
run_clang_tidy(fname)
if __name__ == "__main__":
main()
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56892
Reviewed By: H-Huang
Differential Revision: D27991944
Pulled By: malfet
fbshipit-source-id: 5415e1eb2c1b34319a4f03024bfaa087007d7179
2021-04-28 21:09:06 +00:00
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
2018-08-23 23:01:03 +00:00
|
|
|
float h_out[] = {0.7889,
|
|
|
|
|
0.9003,
|
|
|
|
|
0.7769,
|
|
|
|
|
0.8905,
|
|
|
|
|
0.7635,
|
|
|
|
|
0.8794,
|
|
|
|
|
0.7484,
|
|
|
|
|
0.8666,
|
|
|
|
|
1.1647,
|
|
|
|
|
1.6106,
|
|
|
|
|
1.1425,
|
|
|
|
|
1.5726,
|
|
|
|
|
1.1187,
|
|
|
|
|
1.5329,
|
|
|
|
|
1.0931,
|
|
|
|
|
1.4911};
|
|
|
|
|
for (size_t i = 0; i < 16; i++) {
|
Remove caffe2::Tensor::capacity_nbytes, at::Tensor::to##name##Data, (#11876)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/11876
Modern C++ api instead of macros, item() is aligned with Python frontend. caffe2::Tensor::capacity_nbytes is effecitvely unused and confusing w.r.t. caffe2::Tensor::nbytes().
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCComplexDouble "item<std::complex<double>>"
codemod -d tc --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
Reviewed By: ezyang
Differential Revision: D9948572
fbshipit-source-id: 70c9f5390d92b82c85fdd5f8a5aebca338ab413c
2018-09-24 17:39:10 +00:00
|
|
|
ASSERT_LT(std::abs(flat[i].item<float>() - h_out[i]), 1e-3);
|
2018-05-17 21:10:15 +00:00
|
|
|
}
|
|
|
|
|
}
|
2018-05-01 01:36:35 +00:00
|
|
|
|
2018-09-22 04:12:37 +00:00
|
|
|
TEST_F(RNNTest, EndToEndLSTM) {
|
|
|
|
|
ASSERT_TRUE(test_RNN_xor<LSTM>(
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
[](int s) { return LSTM(LSTMOptions(s, s).num_layers(2)); }));
|
2018-05-17 21:10:15 +00:00
|
|
|
}
|
2018-05-01 01:36:35 +00:00
|
|
|
|
Adding support for CuDNN-based LSTM with projections (#47725)
Summary:
Fixes https://github.com/pytorch/pytorch/issues/46213
I didn't yet update the documentation, will add those change soon. A few other things that I didn't do, but want to clarify if I maybe should.
1. I didn't expose projections in c++ API: torch/csrc/api/src/nn/modules/rnn.cpp. Let me know if this is desirable and I will add those changes.
2. I didn't expose projections in "lstm_cell" function and "_thnn_differentiable_lstm_cell_backward" functions from aten/src/ATen/native/RNN.cpp. As far as I understand, they are not needed for nn.LSTM CPU execution. For lstm_cell, projections don't bring any real benefit, since if cell is used separately, it can be easily added in Python. For "_thnn_differentiable_lstm_cell_backward", I'm actually not sure where exactly that function is used, so I also disabled projections there for now. Please let me know if I should change that.
3. I added check that projections are not supported for quantized LSTMs to quantized_lstm_<data/input> functions. But I didn't add any checks to LSTMCell code. It seems that since I disabled projections in "lstm_cell" function, they should also not be available for quantized models through any other API than quantized_lstm_<data/input>. Please let me know if I'm not correct and I will add checks to other places.
4. Projections are not supported for CuDNN versions < 7.1.2. Should I add the check for CuDNN version and disable projections in that case? If so, what will be the best way to do that?
5. Currently I added projection weight as the last weight, so the layout is "w_ih, w_hh, b_ih, b_hh, w_hr". This breaks the assumption that biases come after weights and thus I had to add additional if-s in various places. Alternative way would be to have "w_ih, w_hh, w_hr, b_ih, b_hh" layout, in which case the assumption will be true. But in that case I will need to split the loop in get_parameters function from aten/src/ATen/native/cudnn/RNN.cpp. And in some cases, I will still need to add an "undefined" tensor in the 3rd position, because we get all 5 weights from CuDNN most of the time. So I'm not sure which way is better. Let me know if you think I should change to the weights-then-biases layout.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/47725
Reviewed By: zou3519
Differential Revision: D25449794
Pulled By: ngimel
fbshipit-source-id: fe6ce59e481d1f5fd861a8ff7fa13d1affcedb0c
2020-12-16 19:19:30 +00:00
|
|
|
TEST_F(RNNTest, EndToEndLSTMProj) {
|
|
|
|
|
ASSERT_TRUE(test_RNN_xor<LSTM>(
|
|
|
|
|
[](int s) { return LSTM(LSTMOptions(s, s).num_layers(2).proj_size(s / 2)); }));
|
|
|
|
|
}
|
|
|
|
|
|
2018-09-22 04:12:37 +00:00
|
|
|
TEST_F(RNNTest, EndToEndGRU) {
|
|
|
|
|
ASSERT_TRUE(
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
test_RNN_xor<GRU>([](int s) { return GRU(GRUOptions(s, s).num_layers(2)); }));
|
2018-05-17 21:10:15 +00:00
|
|
|
}
|
2018-05-15 01:24:58 +00:00
|
|
|
|
2018-09-22 04:12:37 +00:00
|
|
|
TEST_F(RNNTest, EndToEndRNNRelu) {
|
|
|
|
|
ASSERT_TRUE(test_RNN_xor<RNN>(
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
[](int s) { return RNN(RNNOptions(s, s).nonlinearity(torch::kReLU).num_layers(2)); }));
|
2018-05-01 01:36:35 +00:00
|
|
|
}
|
|
|
|
|
|
2018-09-22 04:12:37 +00:00
|
|
|
TEST_F(RNNTest, EndToEndRNNTanh) {
|
|
|
|
|
ASSERT_TRUE(test_RNN_xor<RNN>(
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
[](int s) { return RNN(RNNOptions(s, s).nonlinearity(torch::kTanh).num_layers(2)); }));
|
2018-09-22 04:12:37 +00:00
|
|
|
}
|
2018-05-04 18:00:30 +00:00
|
|
|
|
2018-09-22 04:12:37 +00:00
|
|
|
TEST_F(RNNTest, Sizes_CUDA) {
|
|
|
|
|
torch::manual_seed(0);
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
LSTM model(LSTMOptions(128, 64).num_layers(3).dropout(0.2));
|
2018-09-22 04:12:37 +00:00
|
|
|
model->to(torch::kCUDA);
|
|
|
|
|
auto x =
|
|
|
|
|
torch::randn({10, 16, 128}, torch::requires_grad().device(torch::kCUDA));
|
|
|
|
|
auto output = model->forward(x);
|
|
|
|
|
auto y = x.mean();
|
|
|
|
|
|
|
|
|
|
y.backward();
|
|
|
|
|
check_lstm_sizes(output);
|
2018-05-04 18:00:30 +00:00
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
auto next = model->forward(x, std::get<1>(output));
|
2018-05-04 18:00:30 +00:00
|
|
|
|
2018-09-22 04:12:37 +00:00
|
|
|
check_lstm_sizes(next);
|
2018-05-04 18:00:30 +00:00
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
auto output_hx = std::get<0>(std::get<1>(output));
|
|
|
|
|
auto output_cx = std::get<1>(std::get<1>(output));
|
|
|
|
|
|
|
|
|
|
auto next_hx = std::get<0>(std::get<1>(next));
|
|
|
|
|
auto next_cx = std::get<1>(std::get<1>(next));
|
|
|
|
|
|
|
|
|
|
torch::Tensor diff = torch::cat({next_hx, next_cx}, 0) - torch::cat({output_hx, output_cx}, 0);
|
2018-05-04 18:00:30 +00:00
|
|
|
|
2018-09-22 04:12:37 +00:00
|
|
|
// Hiddens changed
|
Remove caffe2::Tensor::capacity_nbytes, at::Tensor::to##name##Data, (#11876)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/11876
Modern C++ api instead of macros, item() is aligned with Python frontend. caffe2::Tensor::capacity_nbytes is effecitvely unused and confusing w.r.t. caffe2::Tensor::nbytes().
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCByte "item<uint8_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCLong "item<int64_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCInt "item<int32_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCDouble "item<double>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toByteData "data<uint8_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toLongData "data<int64_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toIntData "data<int32_t>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toDoubleData "data<double>"
codemod -d hphp --extensions cc,cpp,cu,cuh,h,py,hpp,mm toFloatData "data<float>"
codemod -d caffe2 --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCComplexDouble "item<std::complex<double>>"
codemod -d tc --extensions cc,cpp,cu,cuh,h,py,hpp,mm toCFloat "item<float>"
Reviewed By: ezyang
Differential Revision: D9948572
fbshipit-source-id: 70c9f5390d92b82c85fdd5f8a5aebca338ab413c
2018-09-24 17:39:10 +00:00
|
|
|
ASSERT_GT(diff.abs().sum().item<float>(), 1e-3);
|
2018-09-22 04:12:37 +00:00
|
|
|
}
|
2018-05-04 18:00:30 +00:00
|
|
|
|
Adding support for CuDNN-based LSTM with projections (#47725)
Summary:
Fixes https://github.com/pytorch/pytorch/issues/46213
I didn't yet update the documentation, will add those change soon. A few other things that I didn't do, but want to clarify if I maybe should.
1. I didn't expose projections in c++ API: torch/csrc/api/src/nn/modules/rnn.cpp. Let me know if this is desirable and I will add those changes.
2. I didn't expose projections in "lstm_cell" function and "_thnn_differentiable_lstm_cell_backward" functions from aten/src/ATen/native/RNN.cpp. As far as I understand, they are not needed for nn.LSTM CPU execution. For lstm_cell, projections don't bring any real benefit, since if cell is used separately, it can be easily added in Python. For "_thnn_differentiable_lstm_cell_backward", I'm actually not sure where exactly that function is used, so I also disabled projections there for now. Please let me know if I should change that.
3. I added check that projections are not supported for quantized LSTMs to quantized_lstm_<data/input> functions. But I didn't add any checks to LSTMCell code. It seems that since I disabled projections in "lstm_cell" function, they should also not be available for quantized models through any other API than quantized_lstm_<data/input>. Please let me know if I'm not correct and I will add checks to other places.
4. Projections are not supported for CuDNN versions < 7.1.2. Should I add the check for CuDNN version and disable projections in that case? If so, what will be the best way to do that?
5. Currently I added projection weight as the last weight, so the layout is "w_ih, w_hh, b_ih, b_hh, w_hr". This breaks the assumption that biases come after weights and thus I had to add additional if-s in various places. Alternative way would be to have "w_ih, w_hh, w_hr, b_ih, b_hh" layout, in which case the assumption will be true. But in that case I will need to split the loop in get_parameters function from aten/src/ATen/native/cudnn/RNN.cpp. And in some cases, I will still need to add an "undefined" tensor in the 3rd position, because we get all 5 weights from CuDNN most of the time. So I'm not sure which way is better. Let me know if you think I should change to the weights-then-biases layout.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/47725
Reviewed By: zou3519
Differential Revision: D25449794
Pulled By: ngimel
fbshipit-source-id: fe6ce59e481d1f5fd861a8ff7fa13d1affcedb0c
2020-12-16 19:19:30 +00:00
|
|
|
TEST_F(RNNTest, SizesProj_CUDA) {
|
|
|
|
|
torch::manual_seed(0);
|
|
|
|
|
LSTM model(LSTMOptions(128, 64).num_layers(3).dropout(0.2).proj_size(32));
|
|
|
|
|
model->to(torch::kCUDA);
|
|
|
|
|
auto x =
|
|
|
|
|
torch::randn({10, 16, 128}, torch::requires_grad().device(torch::kCUDA));
|
|
|
|
|
auto output = model->forward(x);
|
|
|
|
|
auto y = x.mean();
|
|
|
|
|
|
|
|
|
|
y.backward();
|
|
|
|
|
check_lstm_sizes_proj(output);
|
|
|
|
|
|
|
|
|
|
auto next = model->forward(x, std::get<1>(output));
|
|
|
|
|
|
|
|
|
|
check_lstm_sizes_proj(next);
|
|
|
|
|
|
|
|
|
|
auto output_hx = std::get<0>(std::get<1>(output));
|
|
|
|
|
auto output_cx = std::get<1>(std::get<1>(output));
|
|
|
|
|
|
|
|
|
|
auto next_hx = std::get<0>(std::get<1>(next));
|
|
|
|
|
auto next_cx = std::get<1>(std::get<1>(next));
|
|
|
|
|
|
|
|
|
|
torch::Tensor diff = next_hx - output_hx;
|
|
|
|
|
// Hiddens changed
|
|
|
|
|
ASSERT_GT(diff.abs().sum().item<float>(), 1e-3);
|
|
|
|
|
diff = next_cx - output_cx;
|
|
|
|
|
ASSERT_GT(diff.abs().sum().item<float>(), 1e-3);
|
|
|
|
|
}
|
|
|
|
|
|
2018-09-22 04:12:37 +00:00
|
|
|
TEST_F(RNNTest, EndToEndLSTM_CUDA) {
|
|
|
|
|
ASSERT_TRUE(test_RNN_xor<LSTM>(
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
[](int s) { return LSTM(LSTMOptions(s, s).num_layers(2)); }, true));
|
2018-09-22 04:12:37 +00:00
|
|
|
}
|
2018-05-01 01:36:35 +00:00
|
|
|
|
Adding support for CuDNN-based LSTM with projections (#47725)
Summary:
Fixes https://github.com/pytorch/pytorch/issues/46213
I didn't yet update the documentation, will add those change soon. A few other things that I didn't do, but want to clarify if I maybe should.
1. I didn't expose projections in c++ API: torch/csrc/api/src/nn/modules/rnn.cpp. Let me know if this is desirable and I will add those changes.
2. I didn't expose projections in "lstm_cell" function and "_thnn_differentiable_lstm_cell_backward" functions from aten/src/ATen/native/RNN.cpp. As far as I understand, they are not needed for nn.LSTM CPU execution. For lstm_cell, projections don't bring any real benefit, since if cell is used separately, it can be easily added in Python. For "_thnn_differentiable_lstm_cell_backward", I'm actually not sure where exactly that function is used, so I also disabled projections there for now. Please let me know if I should change that.
3. I added check that projections are not supported for quantized LSTMs to quantized_lstm_<data/input> functions. But I didn't add any checks to LSTMCell code. It seems that since I disabled projections in "lstm_cell" function, they should also not be available for quantized models through any other API than quantized_lstm_<data/input>. Please let me know if I'm not correct and I will add checks to other places.
4. Projections are not supported for CuDNN versions < 7.1.2. Should I add the check for CuDNN version and disable projections in that case? If so, what will be the best way to do that?
5. Currently I added projection weight as the last weight, so the layout is "w_ih, w_hh, b_ih, b_hh, w_hr". This breaks the assumption that biases come after weights and thus I had to add additional if-s in various places. Alternative way would be to have "w_ih, w_hh, w_hr, b_ih, b_hh" layout, in which case the assumption will be true. But in that case I will need to split the loop in get_parameters function from aten/src/ATen/native/cudnn/RNN.cpp. And in some cases, I will still need to add an "undefined" tensor in the 3rd position, because we get all 5 weights from CuDNN most of the time. So I'm not sure which way is better. Let me know if you think I should change to the weights-then-biases layout.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/47725
Reviewed By: zou3519
Differential Revision: D25449794
Pulled By: ngimel
fbshipit-source-id: fe6ce59e481d1f5fd861a8ff7fa13d1affcedb0c
2020-12-16 19:19:30 +00:00
|
|
|
TEST_F(RNNTest, EndToEndLSTMProj_CUDA) {
|
|
|
|
|
ASSERT_TRUE(test_RNN_xor<LSTM>(
|
|
|
|
|
[](int s) { return LSTM(LSTMOptions(s, s).num_layers(2).proj_size(s / 2)); }, true));
|
|
|
|
|
}
|
|
|
|
|
|
2018-09-22 04:12:37 +00:00
|
|
|
TEST_F(RNNTest, EndToEndGRU_CUDA) {
|
|
|
|
|
ASSERT_TRUE(test_RNN_xor<GRU>(
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
[](int s) { return GRU(GRUOptions(s, s).num_layers(2)); }, true));
|
2018-09-22 04:12:37 +00:00
|
|
|
}
|
2018-05-01 01:36:35 +00:00
|
|
|
|
2018-09-22 04:12:37 +00:00
|
|
|
TEST_F(RNNTest, EndToEndRNNRelu_CUDA) {
|
|
|
|
|
ASSERT_TRUE(test_RNN_xor<RNN>(
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
[](int s) { return RNN(RNNOptions(s, s).nonlinearity(torch::kReLU).num_layers(2)); }, true));
|
2018-09-22 04:12:37 +00:00
|
|
|
}
|
|
|
|
|
TEST_F(RNNTest, EndToEndRNNTanh_CUDA) {
|
|
|
|
|
ASSERT_TRUE(test_RNN_xor<RNN>(
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
[](int s) { return RNN(RNNOptions(s, s).nonlinearity(torch::kTanh).num_layers(2)); }, true));
|
2018-05-01 01:36:35 +00:00
|
|
|
}
|
Pretty printing of C++ modules (#15326)
Summary:
A long outstanding nicety: pretty printing of C++ modules. E.g.
```
Sequential sequential(
Linear(10, 3),
Conv2d(1, 2, 3),
Dropout(0.5),
BatchNorm(5),
Embedding(4, 10),
LSTM(4, 5));
std::cout << sequential;
```
prints
```
torch::nn::Sequential(
(0): torch::nn::Linear(in=10, out=3, with_bias=true)
(1): torch::nn::Conv2d(input_channels=1, output_channels=2, kernel_size=[3, 3], stride=[1, 1])
(2): torch::nn::Dropout(rate=0.5)
(3): torch::nn::BatchNorm(features=5, eps=1e-05, momentum=0.1, affine=true, stateful=true)
(4): torch::nn::Embedding(count=4, dimension=10)
(5): torch::nn::LSTM(input_size=4, hidden_size=5, layers=1, dropout=0)
)
```
apaszke ebetica ezyang
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15326
Differential Revision: D13518986
Pulled By: goldsborough
fbshipit-source-id: 63bf753672f0e348951de3645208f263581de5fb
2018-12-20 05:38:00 +00:00
|
|
|
|
|
|
|
|
TEST_F(RNNTest, PrettyPrintRNNs) {
|
|
|
|
|
ASSERT_EQ(
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
c10::str(LSTM(LSTMOptions(128, 64).num_layers(3).dropout(0.2))),
|
|
|
|
|
"torch::nn::LSTM(input_size=128, hidden_size=64, num_layers=3, bias=true, batch_first=false, dropout=0.2, bidirectional=false)");
|
Adding support for CuDNN-based LSTM with projections (#47725)
Summary:
Fixes https://github.com/pytorch/pytorch/issues/46213
I didn't yet update the documentation, will add those change soon. A few other things that I didn't do, but want to clarify if I maybe should.
1. I didn't expose projections in c++ API: torch/csrc/api/src/nn/modules/rnn.cpp. Let me know if this is desirable and I will add those changes.
2. I didn't expose projections in "lstm_cell" function and "_thnn_differentiable_lstm_cell_backward" functions from aten/src/ATen/native/RNN.cpp. As far as I understand, they are not needed for nn.LSTM CPU execution. For lstm_cell, projections don't bring any real benefit, since if cell is used separately, it can be easily added in Python. For "_thnn_differentiable_lstm_cell_backward", I'm actually not sure where exactly that function is used, so I also disabled projections there for now. Please let me know if I should change that.
3. I added check that projections are not supported for quantized LSTMs to quantized_lstm_<data/input> functions. But I didn't add any checks to LSTMCell code. It seems that since I disabled projections in "lstm_cell" function, they should also not be available for quantized models through any other API than quantized_lstm_<data/input>. Please let me know if I'm not correct and I will add checks to other places.
4. Projections are not supported for CuDNN versions < 7.1.2. Should I add the check for CuDNN version and disable projections in that case? If so, what will be the best way to do that?
5. Currently I added projection weight as the last weight, so the layout is "w_ih, w_hh, b_ih, b_hh, w_hr". This breaks the assumption that biases come after weights and thus I had to add additional if-s in various places. Alternative way would be to have "w_ih, w_hh, w_hr, b_ih, b_hh" layout, in which case the assumption will be true. But in that case I will need to split the loop in get_parameters function from aten/src/ATen/native/cudnn/RNN.cpp. And in some cases, I will still need to add an "undefined" tensor in the 3rd position, because we get all 5 weights from CuDNN most of the time. So I'm not sure which way is better. Let me know if you think I should change to the weights-then-biases layout.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/47725
Reviewed By: zou3519
Differential Revision: D25449794
Pulled By: ngimel
fbshipit-source-id: fe6ce59e481d1f5fd861a8ff7fa13d1affcedb0c
2020-12-16 19:19:30 +00:00
|
|
|
ASSERT_EQ(
|
|
|
|
|
c10::str(LSTM(LSTMOptions(128, 64).num_layers(3).dropout(0.2).proj_size(32))),
|
|
|
|
|
"torch::nn::LSTM(input_size=128, hidden_size=64, num_layers=3, bias=true, batch_first=false, dropout=0.2, bidirectional=false, proj_size=32)");
|
Pretty printing of C++ modules (#15326)
Summary:
A long outstanding nicety: pretty printing of C++ modules. E.g.
```
Sequential sequential(
Linear(10, 3),
Conv2d(1, 2, 3),
Dropout(0.5),
BatchNorm(5),
Embedding(4, 10),
LSTM(4, 5));
std::cout << sequential;
```
prints
```
torch::nn::Sequential(
(0): torch::nn::Linear(in=10, out=3, with_bias=true)
(1): torch::nn::Conv2d(input_channels=1, output_channels=2, kernel_size=[3, 3], stride=[1, 1])
(2): torch::nn::Dropout(rate=0.5)
(3): torch::nn::BatchNorm(features=5, eps=1e-05, momentum=0.1, affine=true, stateful=true)
(4): torch::nn::Embedding(count=4, dimension=10)
(5): torch::nn::LSTM(input_size=4, hidden_size=5, layers=1, dropout=0)
)
```
apaszke ebetica ezyang
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15326
Differential Revision: D13518986
Pulled By: goldsborough
fbshipit-source-id: 63bf753672f0e348951de3645208f263581de5fb
2018-12-20 05:38:00 +00:00
|
|
|
ASSERT_EQ(
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
c10::str(GRU(GRUOptions(128, 64).num_layers(3).dropout(0.5))),
|
|
|
|
|
"torch::nn::GRU(input_size=128, hidden_size=64, num_layers=3, bias=true, batch_first=false, dropout=0.5, bidirectional=false)");
|
Pretty printing of C++ modules (#15326)
Summary:
A long outstanding nicety: pretty printing of C++ modules. E.g.
```
Sequential sequential(
Linear(10, 3),
Conv2d(1, 2, 3),
Dropout(0.5),
BatchNorm(5),
Embedding(4, 10),
LSTM(4, 5));
std::cout << sequential;
```
prints
```
torch::nn::Sequential(
(0): torch::nn::Linear(in=10, out=3, with_bias=true)
(1): torch::nn::Conv2d(input_channels=1, output_channels=2, kernel_size=[3, 3], stride=[1, 1])
(2): torch::nn::Dropout(rate=0.5)
(3): torch::nn::BatchNorm(features=5, eps=1e-05, momentum=0.1, affine=true, stateful=true)
(4): torch::nn::Embedding(count=4, dimension=10)
(5): torch::nn::LSTM(input_size=4, hidden_size=5, layers=1, dropout=0)
)
```
apaszke ebetica ezyang
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15326
Differential Revision: D13518986
Pulled By: goldsborough
fbshipit-source-id: 63bf753672f0e348951de3645208f263581de5fb
2018-12-20 05:38:00 +00:00
|
|
|
ASSERT_EQ(
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
c10::str(RNN(RNNOptions(128, 64).num_layers(3).dropout(0.2).nonlinearity(torch::kTanh))),
|
|
|
|
|
"torch::nn::RNN(input_size=128, hidden_size=64, num_layers=3, bias=true, batch_first=false, dropout=0.2, bidirectional=false)");
|
Pretty printing of C++ modules (#15326)
Summary:
A long outstanding nicety: pretty printing of C++ modules. E.g.
```
Sequential sequential(
Linear(10, 3),
Conv2d(1, 2, 3),
Dropout(0.5),
BatchNorm(5),
Embedding(4, 10),
LSTM(4, 5));
std::cout << sequential;
```
prints
```
torch::nn::Sequential(
(0): torch::nn::Linear(in=10, out=3, with_bias=true)
(1): torch::nn::Conv2d(input_channels=1, output_channels=2, kernel_size=[3, 3], stride=[1, 1])
(2): torch::nn::Dropout(rate=0.5)
(3): torch::nn::BatchNorm(features=5, eps=1e-05, momentum=0.1, affine=true, stateful=true)
(4): torch::nn::Embedding(count=4, dimension=10)
(5): torch::nn::LSTM(input_size=4, hidden_size=5, layers=1, dropout=0)
)
```
apaszke ebetica ezyang
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15326
Differential Revision: D13518986
Pulled By: goldsborough
fbshipit-source-id: 63bf753672f0e348951de3645208f263581de5fb
2018-12-20 05:38:00 +00:00
|
|
|
}
|
2019-07-08 17:16:40 +00:00
|
|
|
|
|
|
|
|
// This test assures that flatten_parameters does not crash,
|
|
|
|
|
// when bidirectional is set to true
|
|
|
|
|
// https://github.com/pytorch/pytorch/issues/19545
|
|
|
|
|
TEST_F(RNNTest, BidirectionalFlattenParameters) {
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
GRU gru(GRUOptions(100, 256).num_layers(2).bidirectional(true));
|
2019-07-08 17:16:40 +00:00
|
|
|
gru->flatten_parameters();
|
|
|
|
|
}
|
2019-07-22 19:52:11 +00:00
|
|
|
|
|
|
|
|
template <typename Impl>
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
void copyParameters(torch::nn::ModuleHolder<Impl>& target, std::string t_suffix,
|
|
|
|
|
const torch::nn::ModuleHolder<Impl>& source, std::string s_suffix) {
|
2019-07-22 19:52:11 +00:00
|
|
|
at::NoGradGuard guard;
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
target->named_parameters()["weight_ih_l" + t_suffix].copy_(source->named_parameters()["weight_ih_l" + s_suffix]);
|
|
|
|
|
target->named_parameters()["weight_hh_l" + t_suffix].copy_(source->named_parameters()["weight_hh_l" + s_suffix]);
|
|
|
|
|
target->named_parameters()["bias_ih_l" + t_suffix].copy_(source->named_parameters()["bias_ih_l" + s_suffix]);
|
|
|
|
|
target->named_parameters()["bias_hh_l" + t_suffix].copy_(source->named_parameters()["bias_hh_l" + s_suffix]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::tuple<torch::Tensor, torch::Tensor> gru_output_to_device(
|
|
|
|
|
std::tuple<torch::Tensor, torch::Tensor> gru_output, torch::Device device) {
|
|
|
|
|
return std::make_tuple(
|
|
|
|
|
std::get<0>(gru_output).to(device),
|
|
|
|
|
std::get<1>(gru_output).to(device));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::tuple<torch::Tensor, std::tuple<torch::Tensor, torch::Tensor>> lstm_output_to_device(
|
|
|
|
|
std::tuple<torch::Tensor, std::tuple<torch::Tensor, torch::Tensor>> lstm_output, torch::Device device) {
|
|
|
|
|
auto hidden_states = std::get<1>(lstm_output);
|
|
|
|
|
return std::make_tuple(
|
|
|
|
|
std::get<0>(lstm_output).to(device),
|
|
|
|
|
std::make_tuple(
|
|
|
|
|
std::get<0>(hidden_states).to(device),
|
|
|
|
|
std::get<1>(hidden_states).to(device)));
|
2019-07-22 19:52:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// This test is a port of python code introduced here:
|
|
|
|
|
// https://towardsdatascience.com/understanding-bidirectional-rnn-in-pytorch-5bd25a5dd66
|
2020-01-18 00:01:29 +00:00
|
|
|
// Reverse forward of bidirectional GRU should act
|
2019-07-22 19:52:11 +00:00
|
|
|
// as regular forward of unidirectional GRU
|
|
|
|
|
void BidirectionalGRUReverseForward(bool cuda) {
|
|
|
|
|
auto opt = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false)
|
|
|
|
|
.device(cuda ? torch::kCUDA : torch::kCPU);
|
|
|
|
|
auto input = torch::tensor({1, 2, 3, 4, 5}, opt).reshape({5, 1, 1});
|
|
|
|
|
auto input_reversed = torch::tensor({5, 4, 3, 2, 1}, opt).reshape({5, 1, 1});
|
|
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
auto gru_options = GRUOptions(1, 1).num_layers(1).batch_first(false);
|
2019-07-22 19:52:11 +00:00
|
|
|
GRU bi_grus {gru_options.bidirectional(true)};
|
|
|
|
|
GRU reverse_gru {gru_options.bidirectional(false)};
|
|
|
|
|
|
|
|
|
|
if (cuda) {
|
|
|
|
|
bi_grus->to(torch::kCUDA);
|
|
|
|
|
reverse_gru->to(torch::kCUDA);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Now make sure the weights of the reverse gru layer match
|
|
|
|
|
// ones of the (reversed) bidirectional's:
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
copyParameters(reverse_gru, "0", bi_grus, "0_reverse");
|
2019-07-22 19:52:11 +00:00
|
|
|
|
|
|
|
|
auto bi_output = bi_grus->forward(input);
|
|
|
|
|
auto reverse_output = reverse_gru->forward(input_reversed);
|
|
|
|
|
|
|
|
|
|
if (cuda) {
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
bi_output = gru_output_to_device(bi_output, torch::kCPU);
|
|
|
|
|
reverse_output = gru_output_to_device(reverse_output, torch::kCPU);
|
2019-07-22 19:52:11 +00:00
|
|
|
}
|
|
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
ASSERT_EQ(std::get<0>(bi_output).size(0), std::get<0>(reverse_output).size(0));
|
|
|
|
|
auto size = std::get<0>(bi_output).size(0);
|
2019-07-22 19:52:11 +00:00
|
|
|
for (int i = 0; i < size; i++) {
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
ASSERT_EQ(std::get<0>(bi_output)[i][0][1].item<float>(),
|
|
|
|
|
std::get<0>(reverse_output)[size - 1 - i][0][0].item<float>());
|
2019-07-22 19:52:11 +00:00
|
|
|
}
|
|
|
|
|
// The hidden states of the reversed GRUs sits
|
|
|
|
|
// in the odd indices in the first dimension.
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
ASSERT_EQ(std::get<1>(bi_output)[1][0][0].item<float>(),
|
|
|
|
|
std::get<1>(reverse_output)[0][0][0].item<float>());
|
2019-07-22 19:52:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(RNNTest, BidirectionalGRUReverseForward) {
|
|
|
|
|
BidirectionalGRUReverseForward(false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(RNNTest, BidirectionalGRUReverseForward_CUDA) {
|
|
|
|
|
BidirectionalGRUReverseForward(true);
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-18 00:01:29 +00:00
|
|
|
// Reverse forward of bidirectional LSTM should act
|
2019-07-22 19:52:11 +00:00
|
|
|
// as regular forward of unidirectional LSTM
|
|
|
|
|
void BidirectionalLSTMReverseForwardTest(bool cuda) {
|
|
|
|
|
auto opt = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false)
|
|
|
|
|
.device(cuda ? torch::kCUDA : torch::kCPU);
|
|
|
|
|
auto input = torch::tensor({1, 2, 3, 4, 5}, opt).reshape({5, 1, 1});
|
|
|
|
|
auto input_reversed = torch::tensor({5, 4, 3, 2, 1}, opt).reshape({5, 1, 1});
|
|
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
auto lstm_opt = LSTMOptions(1, 1).num_layers(1).batch_first(false);
|
2019-07-22 19:52:11 +00:00
|
|
|
|
|
|
|
|
LSTM bi_lstm {lstm_opt.bidirectional(true)};
|
|
|
|
|
LSTM reverse_lstm {lstm_opt.bidirectional(false)};
|
|
|
|
|
|
|
|
|
|
if (cuda) {
|
|
|
|
|
bi_lstm->to(torch::kCUDA);
|
|
|
|
|
reverse_lstm->to(torch::kCUDA);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Now make sure the weights of the reverse lstm layer match
|
|
|
|
|
// ones of the (reversed) bidirectional's:
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
copyParameters(reverse_lstm, "0", bi_lstm, "0_reverse");
|
2019-07-22 19:52:11 +00:00
|
|
|
|
|
|
|
|
auto bi_output = bi_lstm->forward(input);
|
|
|
|
|
auto reverse_output = reverse_lstm->forward(input_reversed);
|
|
|
|
|
|
|
|
|
|
if (cuda) {
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
bi_output = lstm_output_to_device(bi_output, torch::kCPU);
|
|
|
|
|
reverse_output = lstm_output_to_device(reverse_output, torch::kCPU);
|
2019-07-22 19:52:11 +00:00
|
|
|
}
|
|
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
ASSERT_EQ(std::get<0>(bi_output).size(0), std::get<0>(reverse_output).size(0));
|
|
|
|
|
auto size = std::get<0>(bi_output).size(0);
|
2019-07-22 19:52:11 +00:00
|
|
|
for (int i = 0; i < size; i++) {
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
ASSERT_EQ(std::get<0>(bi_output)[i][0][1].item<float>(),
|
|
|
|
|
std::get<0>(reverse_output)[size - 1 - i][0][0].item<float>());
|
2019-07-22 19:52:11 +00:00
|
|
|
}
|
|
|
|
|
// The hidden states of the reversed LSTM sits
|
|
|
|
|
// in the odd indices in the first dimension.
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
ASSERT_EQ(std::get<0>(std::get<1>(bi_output))[1][0][0].item<float>(),
|
|
|
|
|
std::get<0>(std::get<1>(reverse_output))[0][0][0].item<float>());
|
|
|
|
|
ASSERT_EQ(std::get<1>(std::get<1>(bi_output))[1][0][0].item<float>(),
|
|
|
|
|
std::get<1>(std::get<1>(reverse_output))[0][0][0].item<float>());
|
2019-07-22 19:52:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(RNNTest, BidirectionalLSTMReverseForward) {
|
|
|
|
|
BidirectionalLSTMReverseForwardTest(false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(RNNTest, BidirectionalLSTMReverseForward_CUDA) {
|
|
|
|
|
BidirectionalLSTMReverseForwardTest(true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(RNNTest, BidirectionalMultilayerGRU_CPU_vs_CUDA) {
|
|
|
|
|
// Create two GRUs with the same options
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
auto opt = GRUOptions(2, 4).num_layers(3).batch_first(false).bidirectional(true);
|
2019-07-22 19:52:11 +00:00
|
|
|
GRU gru_cpu {opt};
|
|
|
|
|
GRU gru_cuda {opt};
|
|
|
|
|
|
|
|
|
|
// Copy weights and biases from CPU GRU to CUDA GRU
|
|
|
|
|
{
|
|
|
|
|
at::NoGradGuard guard;
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
for (const auto& param : gru_cpu->named_parameters(/*recurse=*/false)) {
|
|
|
|
|
gru_cuda->named_parameters()[param.key()].copy_(gru_cpu->named_parameters()[param.key()]);
|
2019-07-22 19:52:11 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
gru_cpu->flatten_parameters();
|
|
|
|
|
gru_cuda->flatten_parameters();
|
|
|
|
|
|
|
|
|
|
// Move GRU to CUDA
|
|
|
|
|
gru_cuda->to(torch::kCUDA);
|
|
|
|
|
|
|
|
|
|
// Create the same inputs
|
|
|
|
|
auto input_opt = torch::TensorOptions()
|
|
|
|
|
.dtype(torch::kFloat32).requires_grad(false);
|
|
|
|
|
auto input_cpu = torch::tensor({1, 2, 3, 4, 5, 6}, input_opt)
|
|
|
|
|
.reshape({3, 1, 2});
|
|
|
|
|
auto input_cuda = torch::tensor({1, 2, 3, 4, 5, 6}, input_opt)
|
|
|
|
|
.reshape({3, 1, 2}).to(torch::kCUDA);
|
|
|
|
|
|
|
|
|
|
// Call forward on both GRUs
|
|
|
|
|
auto output_cpu = gru_cpu->forward(input_cpu);
|
|
|
|
|
auto output_cuda = gru_cuda->forward(input_cuda);
|
|
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
output_cpu = gru_output_to_device(output_cpu, torch::kCPU);
|
2019-07-22 19:52:11 +00:00
|
|
|
|
|
|
|
|
// Assert that the output and state are equal on CPU and CUDA
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
ASSERT_EQ(std::get<0>(output_cpu).dim(), std::get<0>(output_cuda).dim());
|
|
|
|
|
for (int i = 0; i < std::get<0>(output_cpu).dim(); i++) {
|
|
|
|
|
ASSERT_EQ(std::get<0>(output_cpu).size(i), std::get<0>(output_cuda).size(i));
|
2019-07-22 19:52:11 +00:00
|
|
|
}
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
for (int i = 0; i < std::get<0>(output_cpu).size(0); i++) {
|
|
|
|
|
for (int j = 0; j < std::get<0>(output_cpu).size(1); j++) {
|
|
|
|
|
for (int k = 0; k < std::get<0>(output_cpu).size(2); k++) {
|
2019-07-22 19:52:11 +00:00
|
|
|
ASSERT_NEAR(
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
std::get<0>(output_cpu)[i][j][k].item<float>(),
|
|
|
|
|
std::get<0>(output_cuda)[i][j][k].item<float>(), 1e-5);
|
2019-07-22 19:52:11 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(RNNTest, BidirectionalMultilayerLSTM_CPU_vs_CUDA) {
|
|
|
|
|
// Create two LSTMs with the same options
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
auto opt = LSTMOptions(2, 4).num_layers(3).batch_first(false).bidirectional(true);
|
2019-07-22 19:52:11 +00:00
|
|
|
LSTM lstm_cpu {opt};
|
|
|
|
|
LSTM lstm_cuda {opt};
|
|
|
|
|
|
|
|
|
|
// Copy weights and biases from CPU LSTM to CUDA LSTM
|
|
|
|
|
{
|
|
|
|
|
at::NoGradGuard guard;
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
for (const auto& param : lstm_cpu->named_parameters(/*recurse=*/false)) {
|
|
|
|
|
lstm_cuda->named_parameters()[param.key()].copy_(lstm_cpu->named_parameters()[param.key()]);
|
2019-07-22 19:52:11 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lstm_cpu->flatten_parameters();
|
|
|
|
|
lstm_cuda->flatten_parameters();
|
|
|
|
|
|
|
|
|
|
// Move LSTM to CUDA
|
|
|
|
|
lstm_cuda->to(torch::kCUDA);
|
|
|
|
|
|
|
|
|
|
auto options = torch::TensorOptions()
|
|
|
|
|
.dtype(torch::kFloat32).requires_grad(false);
|
|
|
|
|
auto input_cpu = torch::tensor({1, 2, 3, 4, 5, 6}, options)
|
|
|
|
|
.reshape({3, 1, 2});
|
|
|
|
|
auto input_cuda = torch::tensor({1, 2, 3, 4, 5, 6}, options)
|
|
|
|
|
.reshape({3, 1, 2}).to(torch::kCUDA);
|
|
|
|
|
|
|
|
|
|
// Call forward on both LSTMs
|
Adding support for CuDNN-based LSTM with projections (#47725)
Summary:
Fixes https://github.com/pytorch/pytorch/issues/46213
I didn't yet update the documentation, will add those change soon. A few other things that I didn't do, but want to clarify if I maybe should.
1. I didn't expose projections in c++ API: torch/csrc/api/src/nn/modules/rnn.cpp. Let me know if this is desirable and I will add those changes.
2. I didn't expose projections in "lstm_cell" function and "_thnn_differentiable_lstm_cell_backward" functions from aten/src/ATen/native/RNN.cpp. As far as I understand, they are not needed for nn.LSTM CPU execution. For lstm_cell, projections don't bring any real benefit, since if cell is used separately, it can be easily added in Python. For "_thnn_differentiable_lstm_cell_backward", I'm actually not sure where exactly that function is used, so I also disabled projections there for now. Please let me know if I should change that.
3. I added check that projections are not supported for quantized LSTMs to quantized_lstm_<data/input> functions. But I didn't add any checks to LSTMCell code. It seems that since I disabled projections in "lstm_cell" function, they should also not be available for quantized models through any other API than quantized_lstm_<data/input>. Please let me know if I'm not correct and I will add checks to other places.
4. Projections are not supported for CuDNN versions < 7.1.2. Should I add the check for CuDNN version and disable projections in that case? If so, what will be the best way to do that?
5. Currently I added projection weight as the last weight, so the layout is "w_ih, w_hh, b_ih, b_hh, w_hr". This breaks the assumption that biases come after weights and thus I had to add additional if-s in various places. Alternative way would be to have "w_ih, w_hh, w_hr, b_ih, b_hh" layout, in which case the assumption will be true. But in that case I will need to split the loop in get_parameters function from aten/src/ATen/native/cudnn/RNN.cpp. And in some cases, I will still need to add an "undefined" tensor in the 3rd position, because we get all 5 weights from CuDNN most of the time. So I'm not sure which way is better. Let me know if you think I should change to the weights-then-biases layout.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/47725
Reviewed By: zou3519
Differential Revision: D25449794
Pulled By: ngimel
fbshipit-source-id: fe6ce59e481d1f5fd861a8ff7fa13d1affcedb0c
2020-12-16 19:19:30 +00:00
|
|
|
auto output_cpu = lstm_cpu->forward(input_cpu);
|
|
|
|
|
auto output_cuda = lstm_cuda->forward(input_cuda);
|
|
|
|
|
|
|
|
|
|
output_cpu = lstm_output_to_device(output_cpu, torch::kCPU);
|
|
|
|
|
|
|
|
|
|
// Assert that the output and state are equal on CPU and CUDA
|
|
|
|
|
ASSERT_EQ(std::get<0>(output_cpu).dim(), std::get<0>(output_cuda).dim());
|
|
|
|
|
for (int i = 0; i < std::get<0>(output_cpu).dim(); i++) {
|
|
|
|
|
ASSERT_EQ(std::get<0>(output_cpu).size(i), std::get<0>(output_cuda).size(i));
|
|
|
|
|
}
|
|
|
|
|
for (int i = 0; i < std::get<0>(output_cpu).size(0); i++) {
|
|
|
|
|
for (int j = 0; j < std::get<0>(output_cpu).size(1); j++) {
|
|
|
|
|
for (int k = 0; k < std::get<0>(output_cpu).size(2); k++) {
|
|
|
|
|
ASSERT_NEAR(
|
|
|
|
|
std::get<0>(output_cpu)[i][j][k].item<float>(),
|
|
|
|
|
std::get<0>(output_cuda)[i][j][k].item<float>(), 1e-5);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(RNNTest, BidirectionalMultilayerLSTMProj_CPU_vs_CUDA) {
|
|
|
|
|
// Create two LSTMs with the same options
|
|
|
|
|
auto opt = LSTMOptions(2, 4).num_layers(3).batch_first(false).bidirectional(true).proj_size(2);
|
|
|
|
|
LSTM lstm_cpu {opt};
|
|
|
|
|
LSTM lstm_cuda {opt};
|
|
|
|
|
|
|
|
|
|
// Copy weights and biases from CPU LSTM to CUDA LSTM
|
|
|
|
|
{
|
|
|
|
|
at::NoGradGuard guard;
|
|
|
|
|
for (const auto& param : lstm_cpu->named_parameters(/*recurse=*/false)) {
|
|
|
|
|
lstm_cuda->named_parameters()[param.key()].copy_(lstm_cpu->named_parameters()[param.key()]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lstm_cpu->flatten_parameters();
|
|
|
|
|
lstm_cuda->flatten_parameters();
|
|
|
|
|
|
|
|
|
|
// Move LSTM to CUDA
|
|
|
|
|
lstm_cuda->to(torch::kCUDA);
|
|
|
|
|
|
|
|
|
|
auto options = torch::TensorOptions()
|
|
|
|
|
.dtype(torch::kFloat32).requires_grad(false);
|
|
|
|
|
auto input_cpu = torch::tensor({1, 2, 3, 4, 5, 6}, options)
|
|
|
|
|
.reshape({3, 1, 2});
|
|
|
|
|
auto input_cuda = torch::tensor({1, 2, 3, 4, 5, 6}, options)
|
|
|
|
|
.reshape({3, 1, 2}).to(torch::kCUDA);
|
|
|
|
|
|
|
|
|
|
// Call forward on both LSTMs
|
2019-07-22 19:52:11 +00:00
|
|
|
auto output_cpu = lstm_cpu->forward(input_cpu);
|
|
|
|
|
auto output_cuda = lstm_cuda->forward(input_cuda);
|
|
|
|
|
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
output_cpu = lstm_output_to_device(output_cpu, torch::kCPU);
|
2019-07-22 19:52:11 +00:00
|
|
|
|
|
|
|
|
// Assert that the output and state are equal on CPU and CUDA
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
ASSERT_EQ(std::get<0>(output_cpu).dim(), std::get<0>(output_cuda).dim());
|
|
|
|
|
for (int i = 0; i < std::get<0>(output_cpu).dim(); i++) {
|
|
|
|
|
ASSERT_EQ(std::get<0>(output_cpu).size(i), std::get<0>(output_cuda).size(i));
|
2019-07-22 19:52:11 +00:00
|
|
|
}
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
for (int i = 0; i < std::get<0>(output_cpu).size(0); i++) {
|
|
|
|
|
for (int j = 0; j < std::get<0>(output_cpu).size(1); j++) {
|
|
|
|
|
for (int k = 0; k < std::get<0>(output_cpu).size(2); k++) {
|
2019-07-22 19:52:11 +00:00
|
|
|
ASSERT_NEAR(
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
std::get<0>(output_cpu)[i][j][k].item<float>(),
|
|
|
|
|
std::get<0>(output_cuda)[i][j][k].item<float>(), 1e-5);
|
2019-07-22 19:52:11 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
[C++ API] RNN / GRU / LSTM layer refactoring (#34322)
Summary:
This PR refactors RNN / GRU / LSTM layers in C++ API to exactly match the implementation in Python API.
**BC-breaking changes:**
- Instead of returning `RNNOutput`, RNN / GRU forward method now returns `std::tuple<Tensor, Tensor>`, and LSTM forward method now returns `std::tuple<Tensor, std::tuple<Tensor, Tensor>>`, matching Python API.
- RNN / LSTM / GRU forward method now accepts the same inputs (input tensor and optionally hidden state), matching Python API.
- RNN / LSTM / GRU layers now have `forward_with_packed_input` method which accepts `PackedSequence` as input and optionally hidden state, matching the `forward(PackedSequence, ...)` variant in Python API.
- RNN / LSTM / GRU layers no longer have these fields: `w_ih` / `w_hh` / `b_ih` / `b_hh`. Instead, to access the weights and biases of the gates, users should do e.g. `rnn->named_parameters()["weight_ih_l0"]`, which mirrors the Python API `rnn.weight_ih_l0`.
- In `RNNOptions`
- `tanh()` / `relu()` / `activation` are removed. Instead, `nonlinearity` is added which takes either `torch::kTanh` or `torch::kReLU`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `LSTMOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
- In `GRUOptions`
- `layers` -> `num_layers`
- `with_bias` -> `bias`
The majority of the changes in this PR focused on refactoring the implementations in `torch/csrc/api/src/nn/modules/rnn.cpp` to match the Python API. RNN tests are then changed to reflected the revised API design.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34322
Differential Revision: D20458302
Pulled By: yf225
fbshipit-source-id: ffff2ae1ddb1c742c966956f6ad4d7fba03dc54d
2020-03-16 00:45:47 +00:00
|
|
|
|
|
|
|
|
TEST_F(RNNTest, UsePackedSequenceAsInput) {
|
|
|
|
|
{
|
|
|
|
|
torch::manual_seed(0);
|
|
|
|
|
auto m = RNN(2, 3);
|
|
|
|
|
torch::nn::utils::rnn::PackedSequence packed_input = torch::nn::utils::rnn::pack_sequence({torch::ones({3, 2})});
|
|
|
|
|
auto rnn_output = m->forward_with_packed_input(packed_input);
|
|
|
|
|
auto expected_output = torch::tensor(
|
|
|
|
|
{{-0.0645, -0.7274, 0.4531},
|
|
|
|
|
{-0.3970, -0.6950, 0.6009},
|
|
|
|
|
{-0.3877, -0.7310, 0.6806}});
|
|
|
|
|
ASSERT_TRUE(torch::allclose(std::get<0>(rnn_output).data(), expected_output, 1e-05, 2e-04));
|
|
|
|
|
|
|
|
|
|
// Test passing optional argument to `RNN::forward_with_packed_input`
|
|
|
|
|
rnn_output = m->forward_with_packed_input(packed_input, torch::Tensor());
|
|
|
|
|
ASSERT_TRUE(torch::allclose(std::get<0>(rnn_output).data(), expected_output, 1e-05, 2e-04));
|
|
|
|
|
}
|
|
|
|
|
{
|
|
|
|
|
torch::manual_seed(0);
|
|
|
|
|
auto m = LSTM(2, 3);
|
|
|
|
|
torch::nn::utils::rnn::PackedSequence packed_input = torch::nn::utils::rnn::pack_sequence({torch::ones({3, 2})});
|
|
|
|
|
auto rnn_output = m->forward_with_packed_input(packed_input);
|
|
|
|
|
auto expected_output = torch::tensor(
|
|
|
|
|
{{-0.2693, -0.1240, 0.0744},
|
|
|
|
|
{-0.3889, -0.1919, 0.1183},
|
|
|
|
|
{-0.4425, -0.2314, 0.1386}});
|
|
|
|
|
ASSERT_TRUE(torch::allclose(std::get<0>(rnn_output).data(), expected_output, 1e-05, 2e-04));
|
|
|
|
|
|
|
|
|
|
// Test passing optional argument to `LSTM::forward_with_packed_input`
|
|
|
|
|
rnn_output = m->forward_with_packed_input(packed_input, torch::nullopt);
|
|
|
|
|
ASSERT_TRUE(torch::allclose(std::get<0>(rnn_output).data(), expected_output, 1e-05, 2e-04));
|
|
|
|
|
}
|
|
|
|
|
{
|
|
|
|
|
torch::manual_seed(0);
|
|
|
|
|
auto m = GRU(2, 3);
|
|
|
|
|
torch::nn::utils::rnn::PackedSequence packed_input = torch::nn::utils::rnn::pack_sequence({torch::ones({3, 2})});
|
|
|
|
|
auto rnn_output = m->forward_with_packed_input(packed_input);
|
|
|
|
|
auto expected_output = torch::tensor(
|
|
|
|
|
{{-0.1134, 0.0467, 0.2336},
|
|
|
|
|
{-0.1189, 0.0502, 0.2960},
|
|
|
|
|
{-0.1138, 0.0484, 0.3110}});
|
|
|
|
|
ASSERT_TRUE(torch::allclose(std::get<0>(rnn_output).data(), expected_output, 1e-05, 2e-04));
|
|
|
|
|
|
|
|
|
|
// Test passing optional argument to `GRU::forward_with_packed_input`
|
|
|
|
|
rnn_output = m->forward_with_packed_input(packed_input, torch::Tensor());
|
|
|
|
|
ASSERT_TRUE(torch::allclose(std::get<0>(rnn_output).data(), expected_output, 1e-05, 2e-04));
|
|
|
|
|
}
|
|
|
|
|
}
|