Reland "Add forward mode AD to out-place foreach functions (#102409) (#106043)
forward-mode AD of out-of-place foreach functions, finally.
rel:
- #102409
- #105504
- #58833
- #100695
---
# Generated Foreach
```c++
::std::vector<at::Tensor> _foreach_sinh(c10::DispatchKeySet ks, at::TensorList self) {
auto self_ = unpack(self, "self", 0);
[[maybe_unused]] auto _any_requires_grad = compute_requires_grad( self );
std::vector<bool> _any_has_forward_grad_result(self.size());
for (const auto& i : c10::irange(self.size())) {
_any_has_forward_grad_result[i] = isFwGradDefined(self[i]);
}
std::shared_ptr<ForeachSinhBackward0> grad_fn;
if (_any_requires_grad) {
grad_fn = std::shared_ptr<ForeachSinhBackward0>(new ForeachSinhBackward0(), deleteNode);
grad_fn->set_next_edges(collect_next_edges( self ));
grad_fn->self_ = make_saved_variable_list(self);
grad_fn->self_size_ = self.size();
}
#ifndef NDEBUG
std::vector<c10::optional<Storage>> self__storage_saved(self_.size());
for (const Tensor& tensor : self_)
self__storage_saved.push_back(
tensor.has_storage() ? c10::optional<Storage>(tensor.storage()) : c10::nullopt);
std::vector<c10::intrusive_ptr<TensorImpl>> self__impl_saved(self_.size());
for (size_t i=0; i<self_.size(); i++)
if (self_[i].defined()) self__impl_saved[i] = self_[i].getIntrusivePtr();
#endif
auto _tmp = ([&]() {
at::AutoDispatchBelowADInplaceOrView guard;
return at::redispatch::_foreach_sinh(ks & c10::after_autograd_keyset, self_);
})();
auto result = std::move(_tmp);
#ifndef NDEBUG
for (size_t i=0; i<self_.size() && !at::impl::dispatch_mode_enabled(); i++) {
if (self__storage_saved[i].has_value() && !at::impl::tensorlist_has_dispatch(self_))
TORCH_INTERNAL_ASSERT(self__storage_saved[i].value().is_alias_of(self_[i].storage()));
}
for (size_t i=0; i<self_.size() && !at::impl::dispatch_mode_enabled(); i++) {
if (self__impl_saved[i] && !at::impl::tensorlist_has_dispatch(self_))
TORCH_INTERNAL_ASSERT(self__impl_saved[i] == self_[i].getIntrusivePtr());
}
#endif
if (grad_fn) {
set_history(flatten_tensor_args( result ), grad_fn);
}
std::vector<c10::optional<at::Tensor>> result_new_fw_grad_opts(self.size(), c10::nullopt);
for (const auto& i : c10::irange(result_new_fw_grad_opts.size())) {
if (_any_has_forward_grad_result[i]) {
auto self_t_raw = toNonOptFwGrad(self[i]);
auto self_tensor = toNonOptTensor(self[i]);
auto self_t = (self_t_raw.defined() || !self_tensor.defined())
? self_t_raw : at::_efficientzerotensor(self_tensor.sizes(), self_tensor.options());
auto self_p = toNonOptPrimal(self[i]);
result_new_fw_grad_opts[i] = (self_t.conj() * self_p.cosh().conj()).conj();
}
}
for (const auto& i : c10::irange(result_new_fw_grad_opts.size())) {
auto& result_new_fw_grad_opt = result_new_fw_grad_opts[i];
if (result_new_fw_grad_opt.has_value() && result_new_fw_grad_opt.value().defined() && result[i].defined()) {
// The hardcoded 0 here will need to be updated once we support multiple levels.
result[i]._set_fw_grad(result_new_fw_grad_opt.value(), /* level */ 0, /* is_inplace_op */ false);
}
}
return result;
}
::std::vector<at::Tensor> _foreach_norm_Scalar(c10::DispatchKeySet ks, at::TensorList self, const at::Scalar & ord) {
auto self_ = unpack(self, "self", 0);
[[maybe_unused]] auto _any_requires_grad = compute_requires_grad( self );
std::vector<bool> _any_has_forward_grad_result(self.size());
for (const auto& i : c10::irange(self.size())) {
_any_has_forward_grad_result[i] = isFwGradDefined(self[i]);
}
std::shared_ptr<ForeachNormBackward0> grad_fn;
if (_any_requires_grad) {
grad_fn = std::shared_ptr<ForeachNormBackward0>(new ForeachNormBackward0(), deleteNode);
grad_fn->set_next_edges(collect_next_edges( self ));
grad_fn->ord = ord;
grad_fn->self_ = make_saved_variable_list(self);
grad_fn->self_size_ = self.size();
}
#ifndef NDEBUG
std::vector<c10::optional<Storage>> self__storage_saved(self_.size());
for (const Tensor& tensor : self_)
self__storage_saved.push_back(
tensor.has_storage() ? c10::optional<Storage>(tensor.storage()) : c10::nullopt);
std::vector<c10::intrusive_ptr<TensorImpl>> self__impl_saved(self_.size());
for (size_t i=0; i<self_.size(); i++)
if (self_[i].defined()) self__impl_saved[i] = self_[i].getIntrusivePtr();
#endif
auto _tmp = ([&]() {
at::AutoDispatchBelowADInplaceOrView guard;
return at::redispatch::_foreach_norm(ks & c10::after_autograd_keyset, self_, ord);
})();
auto result = std::move(_tmp);
#ifndef NDEBUG
for (size_t i=0; i<self_.size() && !at::impl::dispatch_mode_enabled(); i++) {
if (self__storage_saved[i].has_value() && !at::impl::tensorlist_has_dispatch(self_))
TORCH_INTERNAL_ASSERT(self__storage_saved[i].value().is_alias_of(self_[i].storage()));
}
for (size_t i=0; i<self_.size() && !at::impl::dispatch_mode_enabled(); i++) {
if (self__impl_saved[i] && !at::impl::tensorlist_has_dispatch(self_))
TORCH_INTERNAL_ASSERT(self__impl_saved[i] == self_[i].getIntrusivePtr());
}
#endif
if (grad_fn) {
set_history(flatten_tensor_args( result ), grad_fn);
}
std::vector<c10::optional<at::Tensor>> result_new_fw_grad_opts(self.size(), c10::nullopt);
for (const auto& i : c10::irange(result_new_fw_grad_opts.size())) {
if (_any_has_forward_grad_result[i]) {
auto self_t_raw = toNonOptFwGrad(self[i]);
auto self_tensor = toNonOptTensor(self[i]);
auto self_t = (self_t_raw.defined() || !self_tensor.defined())
? self_t_raw : at::_efficientzerotensor(self_tensor.sizes(), self_tensor.options());
auto self_p = toNonOptPrimal(self[i]);
result_new_fw_grad_opts[i] = norm_jvp(self_p, self_t, ord, result[i]);
}
}
for (const auto& i : c10::irange(result_new_fw_grad_opts.size())) {
auto& result_new_fw_grad_opt = result_new_fw_grad_opts[i];
if (result_new_fw_grad_opt.has_value() && result_new_fw_grad_opt.value().defined() && result[i].defined()) {
// The hardcoded 0 here will need to be updated once we support multiple levels.
result[i]._set_fw_grad(result_new_fw_grad_opt.value(), /* level */ 0, /* is_inplace_op */ false);
}
}
if (grad_fn) {
grad_fn->result = result;
}
return result;
}
```
# Reference
```c++
at::Tensor sinh(c10::DispatchKeySet ks, const at::Tensor & self) {
auto& self_ = unpack(self, "self", 0);
[[maybe_unused]] auto _any_requires_grad = compute_requires_grad( self );
[[maybe_unused]] auto _any_has_forward_grad_result = (isFwGradDefined(self));
std::shared_ptr<SinhBackward0> grad_fn;
if (_any_requires_grad) {
grad_fn = std::shared_ptr<SinhBackward0>(new SinhBackward0(), deleteNode);
grad_fn->set_next_edges(collect_next_edges( self ));
grad_fn->self_ = SavedVariable(self, false);
}
#ifndef NDEBUG
c10::optional<Storage> self__storage_saved =
self_.has_storage() ? c10::optional<Storage>(self_.storage()) : c10::nullopt;
c10::intrusive_ptr<TensorImpl> self__impl_saved;
if (self_.defined()) self__impl_saved = self_.getIntrusivePtr();
#endif
auto _tmp = ([&]() {
at::AutoDispatchBelowADInplaceOrView guard;
return at::redispatch::sinh(ks & c10::after_autograd_keyset, self_);
})();
auto result = std::move(_tmp);
#ifndef NDEBUG
if (self__storage_saved.has_value() &&
!at::impl::dispatch_mode_enabled() &&
!at::impl::tensor_has_dispatch(self_))
TORCH_INTERNAL_ASSERT(self__storage_saved.value().is_alias_of(self_.storage()));
if (self__impl_saved && !at::impl::dispatch_mode_enabled() && !at::impl::tensor_has_dispatch(self_))
TORCH_INTERNAL_ASSERT(self__impl_saved == self_.getIntrusivePtr());
if (result.has_storage() && !at::impl::dispatch_mode_enabled() && !at::impl::tensor_has_dispatch(result)) {
TORCH_INTERNAL_ASSERT(result.storage().use_count() == 1, "function: sinh");
}
if (!at::impl::dispatch_mode_enabled() && !at::impl::tensor_has_dispatch(result))
TORCH_INTERNAL_ASSERT(result.use_count() <= 1, "function: sinh");
#endif
if (grad_fn) {
set_history(flatten_tensor_args( result ), grad_fn);
}
c10::optional<at::Tensor> result_new_fw_grad_opt = c10::nullopt;
if (_any_has_forward_grad_result && (result.defined())) {
auto self_t_raw = toNonOptFwGrad(self);
auto self_tensor = toNonOptTensor(self);
auto self_t = (self_t_raw.defined() || !self_tensor.defined())
? self_t_raw : at::_efficientzerotensor(self_tensor.sizes(), self_tensor.options());
auto self_p = toNonOptPrimal(self);
result_new_fw_grad_opt = (self_t.conj() * self_p.cosh().conj()).conj();
}
if (result_new_fw_grad_opt.has_value() && result_new_fw_grad_opt.value().defined() && result.defined()) {
// The hardcoded 0 here will need to be updated once we support multiple levels.
result._set_fw_grad(result_new_fw_grad_opt.value(), /* level */ 0, /* is_inplace_op */ false);
}
return result;
}
at::Tensor norm_Scalar(c10::DispatchKeySet ks, const at::Tensor & self, const at::Scalar & p) {
auto& self_ = unpack(self, "self", 0);
[[maybe_unused]] auto _any_requires_grad = compute_requires_grad( self );
[[maybe_unused]] auto _any_has_forward_grad_result = (isFwGradDefined(self));
std::shared_ptr<NormBackward0> grad_fn;
if (_any_requires_grad) {
grad_fn = std::shared_ptr<NormBackward0>(new NormBackward0(), deleteNode);
grad_fn->set_next_edges(collect_next_edges( self ));
grad_fn->p = p;
grad_fn->self_ = SavedVariable(self, false);
}
#ifndef NDEBUG
c10::optional<Storage> self__storage_saved =
self_.has_storage() ? c10::optional<Storage>(self_.storage()) : c10::nullopt;
c10::intrusive_ptr<TensorImpl> self__impl_saved;
if (self_.defined()) self__impl_saved = self_.getIntrusivePtr();
#endif
auto _tmp = ([&]() {
at::AutoDispatchBelowADInplaceOrView guard;
return at::redispatch::norm(ks & c10::after_autograd_keyset, self_, p);
})();
auto result = std::move(_tmp);
#ifndef NDEBUG
if (self__storage_saved.has_value() &&
!at::impl::dispatch_mode_enabled() &&
!at::impl::tensor_has_dispatch(self_))
TORCH_INTERNAL_ASSERT(self__storage_saved.value().is_alias_of(self_.storage()));
if (self__impl_saved && !at::impl::dispatch_mode_enabled() && !at::impl::tensor_has_dispatch(self_))
TORCH_INTERNAL_ASSERT(self__impl_saved == self_.getIntrusivePtr());
if (result.has_storage() && !at::impl::dispatch_mode_enabled() && !at::impl::tensor_has_dispatch(result)) {
TORCH_INTERNAL_ASSERT(result.storage().use_count() == 1, "function: norm_Scalar");
}
if (!at::impl::dispatch_mode_enabled() && !at::impl::tensor_has_dispatch(result))
TORCH_INTERNAL_ASSERT(result.use_count() <= 1, "function: norm_Scalar");
#endif
if (grad_fn) {
set_history(flatten_tensor_args( result ), grad_fn);
}
throw_error_for_complex_autograd(result, "norm");
c10::optional<at::Tensor> result_new_fw_grad_opt = c10::nullopt;
if (_any_has_forward_grad_result && (result.defined())) {
auto self_t_raw = toNonOptFwGrad(self);
auto self_tensor = toNonOptTensor(self);
auto self_t = (self_t_raw.defined() || !self_tensor.defined())
? self_t_raw : at::_efficientzerotensor(self_tensor.sizes(), self_tensor.options());
auto self_p = toNonOptPrimal(self);
result_new_fw_grad_opt = norm_jvp(self_p, self_t, p, result);
}
if (result_new_fw_grad_opt.has_value() && result_new_fw_grad_opt.value().defined() && result.defined()) {
// The hardcoded 0 here will need to be updated once we support multiple levels.
result._set_fw_grad(result_new_fw_grad_opt.value(), /* level */ 0, /* is_inplace_op */ false);
}
if (grad_fn) {
grad_fn->result_ = SavedVariable(result, true);
}
return result;
}
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/106043
Approved by: https://github.com/soulitzer