mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
follow-up https://github.com/pytorch/pytorch/pull/93901. Unexpected numerical mismatches observed in some foreach functions' backward result seemed to be caused by the wrong order of `IndexRangeGenerator::range` call. This pr has `args_with_derivatives` have the same or similar order of `foreach_native_function.func.arguments.flat_non_out` --- what the current master generates for `_foreach_mul.List`: ```cpp variable_list ForeachMulBackward0List::apply(variable_list&& grads) { std::lock_guard<std::mutex> lock(mutex_); TORCH_CHECK(!other_released_, ERR_BACKWARD_TWICE); TORCH_CHECK(!self_released_, ERR_BACKWARD_TWICE); IndexRangeGenerator gen; auto other_ix = gen.range(other_size_); auto self_ix = gen.range(self_size_); variable_list grad_inputs(gen.size()); auto other = unpack_list(other_); auto self = unpack_list(self_); if (task_should_compute_output({ other_ix })) { std::vector<Tensor> grad_result; grad_result.reserve(grads.size()); for (const auto & i : c10::irange(grads.size())) { grad_result.emplace_back(mul_tensor_backward(grads[i], self[i], other[i].scalar_type())); } copy_range(grad_inputs, other_ix, grad_result); } if (task_should_compute_output({ self_ix })) { std::vector<Tensor> grad_result; grad_result.reserve(grads.size()); for (const auto & i : c10::irange(grads.size())) { grad_result.emplace_back(mul_tensor_backward(grads[i], other[i], self[i].scalar_type())); } copy_range(grad_inputs, self_ix, grad_result); } return grad_inputs; } ``` with this PR the generated backward is ```cpp variable_list ForeachMulBackward0List::apply(variable_list&& grads) { std::lock_guard<std::mutex> lock(mutex_); TORCH_CHECK(!self_released_, ERR_BACKWARD_TWICE); TORCH_CHECK(!other_released_, ERR_BACKWARD_TWICE); IndexRangeGenerator gen; auto self_ix = gen.range(self_size_); <----- diff auto other_ix = gen.range(other_size_); <----- diff variable_list grad_inputs(gen.size()); auto self = unpack_list(self_); auto other = unpack_list(other_); if (task_should_compute_output({ other_ix })) { std::vector<Tensor> grad_result; grad_result.reserve(grads.size()); for (const auto & i : c10::irange(grads.size())) { grad_result.emplace_back(mul_tensor_backward(grads[i], self[i], other[i].scalar_type())); } copy_range(grad_inputs, other_ix, grad_result); } if (task_should_compute_output({ self_ix })) { std::vector<Tensor> grad_result; grad_result.reserve(grads.size()); for (const auto & i : c10::irange(grads.size())) { grad_result.emplace_back(mul_tensor_backward(grads[i], other[i], self[i].scalar_type())); } copy_range(grad_inputs, self_ix, grad_result); } return grad_inputs; } ``` The change is to fix the order of `self_ix` and `other_ix`.[](url) Pull Request resolved: https://github.com/pytorch/pytorch/pull/95263 Approved by: https://github.com/soulitzer |
||
|---|---|---|
| .. | ||
| api | ||
| decompositions | ||
| dest | ||
| executorch | ||
| operator_versions | ||
| selective_build | ||
| shape_functions | ||
| static_runtime | ||
| __init__.py | ||
| BUCK.oss | ||
| BUILD.bazel | ||
| build.bzl | ||
| code_template.py | ||
| context.py | ||
| gen.py | ||
| gen_backend_stubs.py | ||
| gen_executorch.py | ||
| gen_functionalization_type.py | ||
| gen_lazy_tensor.py | ||
| gen_vmap_plumbing.py | ||
| local.py | ||
| model.py | ||
| native_function_generation.py | ||
| utils.py | ||