mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-15 21:00:47 +00:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/9939 Pull Request resolved: https://github.com/facebookresearch/weakly-supervised-action-detection/pull/13 Pull Request resolved: https://github.com/pytorch/translate/pull/166 Pull Request resolved: https://github.com/pytorch/pytorch/pull/9125 Closes https://github.com/pytorch/pytorch/pull/9125 Use inheritance for polymorphism, and remove template parameter This is to change the templating in call sites, the core implementations will change later Before Caffe2 Tensor class was compile-time fixed to bind to a particular device/context. With this change, we're making it a runtime property (stored inside the tensor), but preserve the same semantics. For example, one has to specify device type in order to create a Tensor - there are no uninitialized tensors. More specifically the changes are: 1. We added an extra argument *DeviceType* to most of the constructors of the tensor, e.g. (Tensor(DeviceType type)), 2. Semantics of constructor Tensor(const Tensor<SrcContext>& src, ContextForCopy* context); is changed, in this constructor, the second context is passed in to enable us to call the templated Copy function, it could be in a different context as source and target previously, now we'll enforce that the context should have same device type as src, if it is provided. 3. To preserve 'get-or-construct' semantics of Blob, we added specialized getter Blob::GetMutableTensor that verifies both that Blob contains a Tensor and that it's of a correct type 4. Specifically, Tensor type is not default-constructible any more (as we don't have unknown device tensors) and thus some of the code handling STL containers needs to change Note: Some changes are postponed just to keep this diff a bit smaller. Please see `TODO`s. Reviewed By: ezyang, houseroad Differential Revision: D9024330 fbshipit-source-id: e0b8295d2dc6ebe2963383ded5af799ad17164ba
113 lines
3.5 KiB
C++
113 lines
3.5 KiB
C++
#ifndef CAFFE2_OPERATORS_NUMPY_TILE_OP_H_
|
|
#define CAFFE2_OPERATORS_NUMPY_TILE_OP_H_
|
|
|
|
#include "caffe2/core/common_omp.h"
|
|
#include "caffe2/core/context.h"
|
|
#include "caffe2/core/logging.h"
|
|
#include "caffe2/core/operator.h"
|
|
#include "caffe2/utils/math.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
// Copy a Blob n times along a specified axis.
|
|
template <class Context>
|
|
class NumpyTileOp : public Operator<Context> {
|
|
public:
|
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
|
NumpyTileOp(const OperatorDef& operator_def, Workspace* ws)
|
|
: Operator<Context>(operator_def, ws) {}
|
|
~NumpyTileOp() {}
|
|
|
|
bool RunOnDevice() override {
|
|
const auto& input = Input(0);
|
|
const auto& repeats = Input(1);
|
|
|
|
// Check that the `repeats` tensor has the correct rank, has a number of
|
|
// elements equal to the number of axes of `input`.
|
|
CAFFE_ENFORCE_EQ(repeats.ndim(), 1, "repeats input must be a 1-d tensor");
|
|
CAFFE_ENFORCE_EQ(repeats.size(), input.ndim(), "repeats input have the same"
|
|
" number of elements as `inputs` has dimensions.");
|
|
const int64_t *repeats_data = repeats.template data<int64_t>();
|
|
for (size_t i=0; i<repeats.size(); ++i) {
|
|
CAFFE_ENFORCE_GE(repeats_data[i], 0);
|
|
}
|
|
|
|
auto* output = Output(0);
|
|
|
|
// Alternate inputs and outputs between two buffers. Repeatedly apply the
|
|
// Tile kernel along each axis. Then copy out the resulting data into the
|
|
// output tensor.
|
|
Tensor *src = &buffer, *dst = output;
|
|
src->CopyFrom(input);
|
|
vector<TIndex> output_dims(input.dims());
|
|
for (size_t i = 0; i < repeats.size(); ++i) {
|
|
if (repeats_data[i] == 1) {
|
|
continue;
|
|
}
|
|
// size up to (and not including) axis
|
|
const auto outer_dim = src->size_to_dim(i);
|
|
// size from axis up
|
|
const auto inner_dim = src->size_from_dim(i);
|
|
|
|
dst->Resize(outer_dim, inner_dim * repeats_data[i]);
|
|
|
|
/**
|
|
* How this works:
|
|
* Imagine a 2D tensor (matrix) of size 3x10, tiled 2 times.
|
|
* - Tiling along axis 0 (row) means copying the entire 3x10 Matrix 2
|
|
* times. outer_dim = 0, inner_dim = 30.
|
|
* - Tiling along axis 1 (column) means copying each row 2 times, then
|
|
* proceed to the next row, until the end. outer_dim = 3, inner_dim = 10.
|
|
*/
|
|
const char* src_data = static_cast<const char*>(src->raw_data());
|
|
char* dst_data =
|
|
static_cast<char*>(dst->raw_mutable_data(src->meta()));
|
|
|
|
DoTile(
|
|
src->meta(),
|
|
src->itemsize(),
|
|
outer_dim,
|
|
inner_dim,
|
|
repeats_data[i],
|
|
src_data,
|
|
dst_data);
|
|
|
|
output_dims[i] *= repeats_data[i];
|
|
dst->Reshape(output_dims);
|
|
|
|
std::swap(src, dst);
|
|
}
|
|
|
|
// NB: because we have the swap at the end of the above loop, our real
|
|
// result tensor is going to live in *src when we reach this line
|
|
// whether we entered the loop or not :)
|
|
if (output != src)
|
|
output->CopyFrom(*src);
|
|
|
|
return true;
|
|
}
|
|
|
|
private:
|
|
void DoTile(
|
|
const TypeMeta& meta,
|
|
int item_size,
|
|
int outer_dim,
|
|
int inner_dim,
|
|
int64_t num_tiles,
|
|
const char* input_data,
|
|
char* output_data) {
|
|
for (auto i = 0; i < outer_dim; ++i) {
|
|
for (auto t = 0; t < num_tiles; ++t) {
|
|
context_.CopyItemsSameDevice(meta, inner_dim, input_data, output_data);
|
|
output_data += inner_dim * item_size;
|
|
}
|
|
input_data += inner_dim * item_size;
|
|
}
|
|
}
|
|
|
|
Tensor buffer{Context::GetDeviceType()};
|
|
};
|
|
|
|
} // namespace caffe2
|
|
|
|
#endif // CAFFE2_OPERATORS_NUMPY_TILE_OP_H_
|