From 3ec33957eb8d774946d79db9d8d051be2c2c88cf Mon Sep 17 00:00:00 2001 From: cyy Date: Sun, 8 Oct 2023 23:43:45 +0000 Subject: [PATCH] [1/N] Enable Wunused-result and Wunused-variable in torch targets (#110722) They are useful for checking results of function calls. Pull Request resolved: https://github.com/pytorch/pytorch/pull/110722 Approved by: https://github.com/Skylion007 --- aten/src/ATen/native/NNPACK.cpp | 6 +++++- .../flash_attn/flash_fwd_launch_template.h | 6 ++++-- aten/src/ATen/test/half_test.cpp | 6 ++++-- aten/src/ATen/test/rng_test.h | 6 +++--- caffe2/serialize/file_adapter.cc | 21 +++++++++---------- .../common_subexpression_elimination_test.cc | 6 ++---- caffe2/utils/threadpool/WorkersPool.h | 3 ++- third_party/nvfuser/test/test_gpu2.cpp | 2 -- third_party/nvfuser/test/test_gpu3.cpp | 3 --- third_party/nvfuser/test/test_gpu_shift.cpp | 3 --- torch/csrc/cuda/Module.cpp | 4 ---- .../csrc/distributed/c10d/TCPStoreBackend.cpp | 16 +++++++++++++- torch/csrc/jit/passes/onnx/constant_fold.cpp | 1 - .../jit/passes/onnx/shape_type_inference.cpp | 1 - torch/csrc/utils/throughput_benchmark-inl.h | 2 +- torch/lib/libshm/core.cpp | 3 ++- torch/lib/libshm/manager.cpp | 20 +++++++++++++----- 17 files changed, 63 insertions(+), 46 deletions(-) diff --git a/aten/src/ATen/native/NNPACK.cpp b/aten/src/ATen/native/NNPACK.cpp index 2c324cf7402..e0a52ea6c43 100644 --- a/aten/src/ATen/native/NNPACK.cpp +++ b/aten/src/ATen/native/NNPACK.cpp @@ -121,7 +121,11 @@ struct Workspace { constexpr size_t nnpack_memory_alignment_boundary = 64; // Won't work on Windows, but NNPACK doesn't support Windows either - posix_memalign(&buffer, nnpack_memory_alignment_boundary, size); + auto res = posix_memalign(&buffer, nnpack_memory_alignment_boundary, size); + if (res != 0) { + TORCH_CHECK(false, "posix_memalign failed:", strerror(errno), " (", errno, ")"); + } + return; } ~Workspace() { diff --git a/aten/src/ATen/native/transformers/cuda/flash_attn/flash_fwd_launch_template.h b/aten/src/ATen/native/transformers/cuda/flash_attn/flash_fwd_launch_template.h index 6cab33f47e7..ecd96e52ea8 100644 --- a/aten/src/ATen/native/transformers/cuda/flash_attn/flash_fwd_launch_template.h +++ b/aten/src/ATen/native/transformers/cuda/flash_attn/flash_fwd_launch_template.h @@ -231,14 +231,16 @@ void run_mha_fwd_hdim224(Flash_fwd_params ¶ms, cudaStream_t stream) { template void run_mha_fwd_hdim256(Flash_fwd_params ¶ms, cudaStream_t stream) { constexpr int Headdim = 256; - int device; + int device = -1; cudaGetDevice(&device); - int max_smem_per_sm, max_smem_per_block; + int max_smem_per_sm = 0, max_smem_per_block = 0; cudaError status_ = cudaDeviceGetAttribute( &max_smem_per_sm, cudaDevAttrMaxSharedMemoryPerMultiprocessor, device); + C10_CUDA_CHECK(status_); status_ = cudaDeviceGetAttribute( &max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device); // printf("max_smem_per_sm = %d, max_smem_per_block = %d\n", max_smem_per_sm, max_smem_per_block); + C10_CUDA_CHECK(status_); BOOL_SWITCH(params.p_dropout < 1.f, Is_dropout, [&] { BOOL_SWITCH(params.is_causal, Is_causal, [&] { // For A100, we want to run with 128 x 64 (128KB smem). diff --git a/aten/src/ATen/test/half_test.cpp b/aten/src/ATen/test/half_test.cpp index 4a61cfe6400..32a04d5d740 100644 --- a/aten/src/ATen/test/half_test.cpp +++ b/aten/src/ATen/test/half_test.cpp @@ -1,12 +1,12 @@ #include #include +#include +#include #include #include #include -#include #include -#include using namespace at; @@ -118,7 +118,9 @@ ASSERT_SAME_TYPE(traps); ASSERT_SAME_TYPE(tinyness_before); TEST(TestHalf, CommonMath) { +#ifndef NDEBUG float threshold = 0.00001; +#endif assert(std::abs(std::lgamma(Half(10.0)) - std::lgamma(10.0f)) <= threshold); assert(std::abs(std::exp(Half(1.0)) - std::exp(1.0f)) <= threshold); assert(std::abs(std::log(Half(1.0)) - std::log(1.0f)) <= threshold); diff --git a/aten/src/ATen/test/rng_test.h b/aten/src/ATen/test/rng_test.h index 1c8f7468a9d..c7ac20edecb 100644 --- a/aten/src/ATen/test/rng_test.h +++ b/aten/src/ATen/test/rng_test.h @@ -62,9 +62,6 @@ constexpr int64_t _max_to() { template void test_random_from_to(const at::Device& device) { - constexpr int64_t min_val = _min_val(); - constexpr int64_t min_from = _min_from(); - constexpr int64_t max_val = _max_val(); constexpr int64_t max_to = _max_to(); @@ -81,6 +78,7 @@ void test_random_from_to(const at::Device& device) { static_cast>(c10::nullopt) }; } else if constexpr (::std::is_signed::value) { + constexpr int64_t min_from = _min_from(); froms = { min_from, -42L, @@ -161,6 +159,8 @@ void test_random_from_to(const at::Device& device) { } if constexpr (::std::is_same_v) { ASSERT_TRUE(full_64_bit_range_case_covered); + } else { + (void)full_64_bit_range_case_covered; } ASSERT_TRUE(from_to_case_covered); ASSERT_TRUE(from_case_covered); diff --git a/caffe2/serialize/file_adapter.cc b/caffe2/serialize/file_adapter.cc index 1fddce970a8..67634d7f7fd 100644 --- a/caffe2/serialize/file_adapter.cc +++ b/caffe2/serialize/file_adapter.cc @@ -11,18 +11,21 @@ namespace serialize { FileAdapter::RAIIFile::RAIIFile(const std::string& file_name) { fp_ = fopen(file_name.c_str(), "rb"); if (fp_ == nullptr) { + auto old_errno = errno; +#if defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER)) char buf[1024]; buf[0] = '\0'; -#if defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER)) - strerror_s(buf, sizeof(buf), errno); + char* error_msg = buf; + strerror_s(buf, sizeof(buf), old_errno); #else - strerror_r(errno, buf, sizeof(buf)); + auto error_msg = + std::system_category().default_error_condition(old_errno).message(); #endif AT_ERROR( "open file failed because of errno ", - errno, + old_errno, " on fopen: ", - buf, + error_msg, ", file path: ", file_name); } @@ -35,7 +38,7 @@ FileAdapter::RAIIFile::~RAIIFile() { } // FileAdapter directly calls C file API. -FileAdapter::FileAdapter(const std::string& file_name): file_(file_name) { +FileAdapter::FileAdapter(const std::string& file_name) : file_(file_name) { const int fseek_ret = fseek(file_.fp_, 0L, SEEK_END); TORCH_CHECK(fseek_ret == 0, "fseek returned ", fseek_ret); #if defined(_MSC_VER) @@ -68,11 +71,7 @@ size_t FileAdapter::read(uint64_t pos, void* buf, size_t n, const char* what) const int fseek_ret = fseeko(file_.fp_, pos, SEEK_SET); #endif TORCH_CHECK( - fseek_ret == 0, - "fseek returned ", - fseek_ret, - ", context: ", - what); + fseek_ret == 0, "fseek returned ", fseek_ret, ", context: ", what); return fread(buf, 1, n, file_.fp_); } diff --git a/caffe2/transforms/common_subexpression_elimination_test.cc b/caffe2/transforms/common_subexpression_elimination_test.cc index 743dd6ab131..5e3919f0ecd 100644 --- a/caffe2/transforms/common_subexpression_elimination_test.cc +++ b/caffe2/transforms/common_subexpression_elimination_test.cc @@ -21,8 +21,7 @@ using transform::Graph; */ TEST(CommonSubexpressionEliminationTest, TestSimple) { NetDef netdef; - // NOLINTNEXTLINE(cppcoreguidelines-init-variables) - OperatorDef* op; + OperatorDef* op [[maybe_unused]] = nullptr; // This operator simply reads input and outputs it. // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) @@ -76,8 +75,7 @@ TEST(CommonSubexpressionEliminationTest, TestSimple) { */ TEST(CommonSubexpressionEliminationTest, TestFromExternal) { NetDef netdef; - // NOLINTNEXTLINE(cppcoreguidelines-init-variables) - OperatorDef* op; + OperatorDef* op [[maybe_unused]] = nullptr; // This operator simply reads input and outputs it. // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) diff --git a/caffe2/utils/threadpool/WorkersPool.h b/caffe2/utils/threadpool/WorkersPool.h index e210db6ca0f..b6bbc60f209 100644 --- a/caffe2/utils/threadpool/WorkersPool.h +++ b/caffe2/utils/threadpool/WorkersPool.h @@ -40,7 +40,8 @@ struct AllocAligned { #elif defined(_MSC_VER) p = _aligned_malloc(sizeof(T), kGEMMLOWPCacheLineSize); #else - posix_memalign((void**)&p, kGEMMLOWPCacheLineSize, sizeof(T)); + auto res = posix_memalign((void**)&p, kGEMMLOWPCacheLineSize, sizeof(T)); + (void)res; #endif if (p) { diff --git a/third_party/nvfuser/test/test_gpu2.cpp b/third_party/nvfuser/test/test_gpu2.cpp index 87781f4f48d..ca0761df4e4 100644 --- a/third_party/nvfuser/test/test_gpu2.cpp +++ b/third_party/nvfuser/test/test_gpu2.cpp @@ -2926,7 +2926,6 @@ void testWelford(DataType dtype, int red_axis, int odim, int rdim) { fusion.addOutput(tv_N); auto options = at::TensorOptions().dtype(aten_dtype).device(at::kCUDA, 0); - auto options_int = at::TensorOptions().dtype(at::kLong).device(at::kCUDA, 0); at::manual_seed(0); std::vector outputs_of_red; at::Tensor aten_input = @@ -7704,7 +7703,6 @@ TEST_F(NVFuserTest, FusionIssue970_CUDA) { tv1->split(1, 4); auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); - auto options_int = at::TensorOptions().dtype(at::kLong).device(at::kCUDA, 0); at::manual_seed(0); at::Tensor t0 = at::randn({nelm, nelm}, options); diff --git a/third_party/nvfuser/test/test_gpu3.cpp b/third_party/nvfuser/test/test_gpu3.cpp index b5386bafdb1..6632bd2a599 100644 --- a/third_party/nvfuser/test/test_gpu3.cpp +++ b/third_party/nvfuser/test/test_gpu3.cpp @@ -3149,7 +3149,6 @@ TEST_F(NVFuserTest, FusionPropagateParallelTypesToSiblings_CUDA) { } auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); - auto options_int = at::TensorOptions().dtype(at::kLong).device(at::kCUDA, 0); at::manual_seed(0); at::Tensor t0 = at::randn({9999}, options); @@ -4943,8 +4942,6 @@ TEST_F(NVFuserTest, FusionIssueRepro1844_CUDA) { const auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); - const auto mask_options = - at::TensorOptions().dtype(at::kBool).device(at::kCUDA, 0); at::manual_seed(0); at::Tensor a = at::randn(shape, options); diff --git a/third_party/nvfuser/test/test_gpu_shift.cpp b/third_party/nvfuser/test/test_gpu_shift.cpp index cda9b713c5b..45658f0e4af 100644 --- a/third_party/nvfuser/test/test_gpu_shift.cpp +++ b/third_party/nvfuser/test/test_gpu_shift.cpp @@ -4012,7 +4012,6 @@ TEST_F(NVFuserTest, FusionShiftNoPaddingChain_CUDA) { int numel_y = 101; auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); - auto options_int = at::TensorOptions().dtype(at::kLong).device(at::kCUDA, 0); at::manual_seed(0); at::Tensor t0 = at::randn({numel_x, numel_y}, options); std::vector inputs = {t0}; @@ -4162,7 +4161,6 @@ TEST_F(NVFuserTest, FusionPartialSplit1_CUDA) { "Invalid extent of outer domain of partial split"); auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); - auto options_int = at::TensorOptions().dtype(at::kLong).device(at::kCUDA, 0); at::manual_seed(0); at::Tensor t0 = at::randn({numel_x}, options); std::vector inputs = {t0}; @@ -4242,7 +4240,6 @@ TEST_F(NVFuserTest, FusionPartialSplit3_CUDA) { const int numel_y = 32 + 3; auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); - auto options_int = at::TensorOptions().dtype(at::kLong).device(at::kCUDA, 0); at::manual_seed(0); at::Tensor t0 = at::randn({numel_x, numel_y}, options); std::vector inputs = {t0}; diff --git a/torch/csrc/cuda/Module.cpp b/torch/csrc/cuda/Module.cpp index 156fe103704..8c5931efd56 100644 --- a/torch/csrc/cuda/Module.cpp +++ b/torch/csrc/cuda/Module.cpp @@ -960,8 +960,6 @@ void addStorageDeleterFns( if (storage_pair != storages.end()) { auto ctx = storage_pair->second->data_ptr().get_context(); TORCH_CHECK(ctx == nullptr, " Not expecting deleter function"); - - auto curr_deleter = storage_pair->second->data_ptr().get_deleter(); storage_pair->second->set_data_ptr_noswap(std::move(data_ptr)); } else { data_ptr.release_context(); @@ -1102,7 +1100,6 @@ static void registerCudaPluggableAllocator(PyObject* module) { m.def("_has_Standard_Deleter", [](size_t storage_impl_ptr) { c10::StorageImpl* storage_impl = (c10::StorageImpl*)storage_impl_ptr; auto alloc = c10::cuda::CUDACachingAllocator::get(); - auto data_ptr = storage_impl->data_ptr().get(); return (storage_impl->data_ptr().get_deleter() == alloc->raw_deleter()); }); @@ -1197,7 +1194,6 @@ static void registerCudaPluggableAllocator(PyObject* module) { auto delta = c10::cuda::CUDACachingAllocator::setCheckpointPoolState( device, pps); auto& freed_pointers = delta.ptrs_freed; - auto& allocd_pointers = delta.dataptrs_allocd; std::unordered_set allocd_set; for (auto& data_ptr : delta.dataptrs_allocd) { diff --git a/torch/csrc/distributed/c10d/TCPStoreBackend.cpp b/torch/csrc/distributed/c10d/TCPStoreBackend.cpp index 860526b2aa6..b4b13037e39 100644 --- a/torch/csrc/distributed/c10d/TCPStoreBackend.cpp +++ b/torch/csrc/distributed/c10d/TCPStoreBackend.cpp @@ -165,7 +165,21 @@ void TCPStoreMasterDaemon::closeStopSignal() { void TCPStoreMasterDaemon::stop() { if (controlPipeFd_[1] != -1) { - ::write(controlPipeFd_[1], "\0", 1); + ssize_t written_bytes = -1; + while (true) { + written_bytes = ::write(controlPipeFd_[1], "\0", 1); + if (written_bytes < 0) { + if (errno == EAGAIN) { + continue; + } + TORCH_CHECK(false, "Failed to write the control pipe:", errno); + } + break; + } + if (written_bytes == 0) { + TORCH_CHECK(false, "Failed to write the control pipe"); + } + // close the write end of the pipe ::close(controlPipeFd_[1]); controlPipeFd_[1] = -1; diff --git a/torch/csrc/jit/passes/onnx/constant_fold.cpp b/torch/csrc/jit/passes/onnx/constant_fold.cpp index ca44b989195..309b1648e54 100644 --- a/torch/csrc/jit/passes/onnx/constant_fold.cpp +++ b/torch/csrc/jit/passes/onnx/constant_fold.cpp @@ -489,7 +489,6 @@ c10::optional runTorchBackendForOnnx( } // If the device of indices tensor is not the same with it of the input // tensor, move it to the device of the input tensor - auto indices_val = node->input(1); if (inputTensorValues[0].device() != indices.device()) { indices = indices.to(inputTensorValues[0].device()); } diff --git a/torch/csrc/jit/passes/onnx/shape_type_inference.cpp b/torch/csrc/jit/passes/onnx/shape_type_inference.cpp index d8fe34712e6..6fb75ac2031 100644 --- a/torch/csrc/jit/passes/onnx/shape_type_inference.cpp +++ b/torch/csrc/jit/passes/onnx/shape_type_inference.cpp @@ -154,7 +154,6 @@ TensorTypePtr TorchTensorTypeFromONNX( ListTypePtr TorchListTypeFromONNX( const onnx::TypeProto_Sequence& onnx_sequence_type, SymbolDimMap& symbol_dim_map) { - c10::optional scalar_type; if (onnx_sequence_type.has_elem_type()) { const auto& onnx_seq_elem_type = onnx_sequence_type.elem_type(); if (onnx_seq_elem_type.has_tensor_type()) { diff --git a/torch/csrc/utils/throughput_benchmark-inl.h b/torch/csrc/utils/throughput_benchmark-inl.h index 74c61b0b1da..9193a32736e 100644 --- a/torch/csrc/utils/throughput_benchmark-inl.h +++ b/torch/csrc/utils/throughput_benchmark-inl.h @@ -41,7 +41,7 @@ BenchmarkExecutionStats BenchmarkHelper::benchmark( for (const auto thread_id : c10::irange(config.num_calling_threads)) { // Just in case we generate num_iters inputs for each of the threads // This was if one thread does all the work we will be fine - for (const auto i : + for (const auto i [[maybe_unused]] : c10::irange(config.num_iters + config.num_warmup_iters)) { thread_inputs[thread_id].push_back(cloneInput(inputs_[dist(engine)])); } diff --git a/torch/lib/libshm/core.cpp b/torch/lib/libshm/core.cpp index 15e6ff181da..e9ff9b0b6b9 100644 --- a/torch/lib/libshm/core.cpp +++ b/torch/lib/libshm/core.cpp @@ -38,7 +38,8 @@ void start_manager() { std::string msg("ERROR: execl failed: "); msg += std::strerror(errno); msg += '\n'; - write(1, msg.c_str(), msg.size()); + auto res = write(1, msg.c_str(), msg.size()); + (void)res; exit(1); } diff --git a/torch/lib/libshm/manager.cpp b/torch/lib/libshm/manager.cpp index 2155d5a0fa1..6aa70d8c211 100644 --- a/torch/lib/libshm/manager.cpp +++ b/torch/lib/libshm/manager.cpp @@ -54,9 +54,19 @@ void unregister_fd(int fd) { client_sessions.erase(fd); } -void print_init_message(const char* message) { - write(1, message, strlen(message)); - write(1, "\n", 1); +void print_init_message(std::string_view message) { + ssize_t written_bytes = -1; + while (!message.empty()) { + // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) + SYSCHECK_ERR_RETURN_NEG1( + written_bytes = write(1, message.data(), message.size())); + message.remove_prefix(written_bytes); + } + written_bytes = 0; + while (written_bytes != 1) { + // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) + SYSCHECK_ERR_RETURN_NEG1(written_bytes = write(1, "\n", 1)); + } } bool object_exists(const char* name) { @@ -111,10 +121,10 @@ int main(int argc, char* argv[]) { std::vector to_add; std::vector to_remove; for (;;) { - // NOLINTNEXTLINE(cppcoreguidelines-init-variables) - int nevents; + int nevents = -1; if (client_sessions.empty()) timeout = SHUTDOWN_TIMEOUT; + // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) SYSCHECK_ERR_RETURN_NEG1( nevents = poll(pollfds.data(), pollfds.size(), timeout)); timeout = -1;