mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-15 21:00:47 +00:00
[SymmetricMemory] set the storage_offset of tensors returned by get_buffer() to 0 (#137569)
It seems that there's a bug in `TensorMaker` - it would treat `storage_offset` as bytes when calculating the storage size, but as numel when setting the tensor `storage_offset`. This seems to be causing tensors returned by get_buffer() with non-0 offset to report wrong storage size. Will look into the `TensorMaker` issue further. But for `get_buffer()`, it seems more natural to just incorporate the offset into the data pointer. Pull Request resolved: https://github.com/pytorch/pytorch/pull/137569 Approved by: https://github.com/weifengpy ghstack dependencies: #137567
This commit is contained in:
parent
96bab021c0
commit
ea83c78174
2 changed files with 6 additions and 2 deletions
|
|
@ -90,6 +90,9 @@ class SymmetricMemoryTest(MultiProcessTestCase):
|
|||
self.assertEqual(symm_mem.world_size, 2)
|
||||
|
||||
buf = symm_mem.get_buffer(0, (64, 64), torch.float32)
|
||||
self.assertEqual(buf.storage_offset(), 0)
|
||||
self.assertEqual(buf.storage().size(), 64 * 64)
|
||||
|
||||
if symm_mem.rank == 0:
|
||||
symm_mem.wait_signal(src_rank=1)
|
||||
self.assertTrue(buf.eq(42).all())
|
||||
|
|
|
|||
|
|
@ -371,10 +371,11 @@ at::Tensor CUDASymmetricMemory::get_buffer(
|
|||
" bytes) exceeds the allocated size (",
|
||||
buffer_size_,
|
||||
" bytes)");
|
||||
auto data_ptr = reinterpret_cast<uint8_t*>(buffers_[rank]) +
|
||||
storage_offset * element_size;
|
||||
auto device = c10::Device(c10::DeviceType::CUDA, local_device_idx_);
|
||||
auto options = at::TensorOptions().dtype(dtype).device(device);
|
||||
return at::for_blob(buffers_[rank], sizes)
|
||||
.storage_offset(storage_offset)
|
||||
return at::for_blob(data_ptr, sizes)
|
||||
.options(options)
|
||||
.target_device(device)
|
||||
.make_tensor();
|
||||
|
|
|
|||
Loading…
Reference in a new issue