[foreach] Set `SavedVariable.is_output` to `true` for `grad_fn->result_` (#105504)
fixes #105502
The scope of this pull request is out-of-place foreach functions that depend on their output tensorlist for backward such as `_foreach_exp`. An example of the generated code with this update is as follows:
```c++
variable_list ForeachExpBackward0::apply(variable_list&& grads) {
std::lock_guard<std::mutex> lock(mutex_);
TORCH_CHECK(!result_released_, ERR_BACKWARD_TWICE);
IndexRangeGenerator gen;
auto self_ix = gen.range(self_size_);
variable_list grad_inputs(gen.size());
auto result = unpack_list(result_, shared_from_this());
if (task_should_compute_output({ self_ix })) {
std::vector<Tensor> grad_result;
grad_result.reserve(grads.size());
for (const auto & i : c10::irange(grads.size())) {
if (grads[i].defined()) {
grad_result.emplace_back(grads[i] * result[i].conj());
} else {
grad_result.emplace_back(Tensor());
}
}
copy_range(grad_inputs, self_ix, grad_result);
}
return grad_inputs;
}
::std::vector<at::Tensor> _foreach_exp(c10::DispatchKeySet ks, at::TensorList self) {
auto self_ = unpack(self, "self", 0);
[[maybe_unused]] auto _any_requires_grad = compute_requires_grad( self );
std::shared_ptr<ForeachExpBackward0> grad_fn;
if (_any_requires_grad) {
grad_fn = std::shared_ptr<ForeachExpBackward0>(new ForeachExpBackward0(), deleteNode);
grad_fn->set_next_edges(collect_next_edges( self ));
grad_fn->self_size_ = self.size();
}
#ifndef NDEBUG
std::vector<c10::optional<Storage>> self__storage_saved(self_.size());
for (const Tensor& tensor : self_)
self__storage_saved.push_back(
tensor.has_storage() ? c10::optional<Storage>(tensor.storage()) : c10::nullopt);
std::vector<c10::intrusive_ptr<TensorImpl>> self__impl_saved(self_.size());
for (size_t i=0; i<self_.size(); i++)
if (self_[i].defined()) self__impl_saved[i] = self_[i].getIntrusivePtr();
#endif
auto _tmp = ([&]() {
if ((isFwGradDefinedTensorList(self))) {
static c10::OperatorName full_name("aten::_foreach_exp", "");
static c10::optional<c10::OperatorHandle> opt_op = c10::Dispatcher::singleton().findSchema(full_name);
return impl::run_jit_decomposition_with_args_for_jvp<::std::vector<at::Tensor>>("_foreach_exp", *opt_op, ks, self);
} else {
at::AutoDispatchBelowADInplaceOrView guard;
return at::redispatch::_foreach_exp(ks & c10::after_autograd_keyset, self_);
}
})();
auto result = std::move(_tmp);
#ifndef NDEBUG
for (size_t i=0; i<self_.size() && !at::impl::dispatch_mode_enabled(); i++) {
if (self__storage_saved[i].has_value() && !at::impl::tensorlist_has_dispatch(self_))
TORCH_INTERNAL_ASSERT(self__storage_saved[i].value().is_alias_of(self_[i].storage()));
}
for (size_t i=0; i<self_.size() && !at::impl::dispatch_mode_enabled(); i++) {
if (self__impl_saved[i] && !at::impl::tensorlist_has_dispatch(self_))
TORCH_INTERNAL_ASSERT(self__impl_saved[i] == self_[i].getIntrusivePtr());
}
#endif
if (grad_fn) {
set_history(flatten_tensor_args( result ), grad_fn);
}
if (grad_fn) {
grad_fn->result_ = make_saved_variable_list(result, true);
}
return result;
}
```
A bit of context:
- https://github.com/pytorch/pytorch/pull/105368#issuecomment-1640912479
Pull Request resolved: https://github.com/pytorch/pytorch/pull/105504
Approved by: https://github.com/soulitzer