Jaliyae/chunk buffer fix (#17409)

Summary:
The chunk buffer had a possibility to hang when no data is read and the buffer size is lower than chunk size. We detected this while running with larger dataset and hence the fix. I added a test to mimic the situation and validated that the fix is working. Thank you Xueyun for finding this issue.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/17409

Differential Revision: D14198546

Pulled By: soumith

fbshipit-source-id: b8ca43b0400deaae2ebb6601fdc65b47f32b0554
This commit is contained in:
Jaliya Ekanayake 2019-02-23 08:46:24 -08:00 committed by Facebook Github Bot
parent 5ea6344c54
commit bb3a2d99ac
2 changed files with 44 additions and 0 deletions

View file

@ -1843,3 +1843,39 @@ TEST(DataLoaderTest, CanAccessChunkSamplerWithChunkDataSet) {
// 3 chunks, and when exhausted the value is already incremented.
ASSERT_EQ(chunk_sampler.index(), 3);
}
TEST(DataLoaderTest, ChunkDatasetDoesNotHang) {
const size_t prefetch_count = 2;
const size_t batch_size = 5;
// this will make the preloaders to wait till the `get_batch()` calls.
const size_t cache_size = 10;
DummyChunkDataReader data_reader;
samplers::SequentialSampler sampler(0);
datasets::SharedBatchDataset<datasets::ChunkDataset<
DummyChunkDataReader,
samplers::SequentialSampler,
samplers::SequentialSampler>>
dataset = datasets::make_shared_dataset<datasets::ChunkDataset<
DummyChunkDataReader,
samplers::SequentialSampler,
samplers::SequentialSampler>>(
data_reader,
sampler,
sampler,
datasets::ChunkDatasetOptions(
prefetch_count, batch_size, cache_size));
samplers::SequentialSampler& chunk_sampler = dataset->chunk_sampler();
auto data_loader = torch::data::make_data_loader(
dataset.map(transforms::BatchLambda<std::vector<int>, int>(
[](std::vector<int> batch) {
return std::accumulate(batch.begin(), batch.end(), 0);
})),
DataLoaderOptions(batch_size).workers(0));
// simply creates the iterator but no iteration. chunk preloaders are waiting
// to fill the batch buffer but it is not draining. Still we need to exit
// cleanly.
auto iterator = data_loader->begin();
}

View file

@ -293,6 +293,10 @@ class ChunkDataset final
running_preloaders_(0) {}
virtual ~ChunkDataset() {
// stop batch buffer first.
if (batch_buffer_) {
batch_buffer_->stop();
}
free_workers();
}
@ -317,6 +321,10 @@ class ChunkDataset final
/// This will clear any internal state and starts the internal prefetching
/// mechanism for the chunk dataset.
void reset() override {
// We need this to support partial data reads via dataloader iterator.
if (batch_buffer_) {
batch_buffer_->stop();
}
// free workers from previous reset if there is any.
free_workers();
preload_threads_.clear();