Function at::_efficient_attention_forward¶

Function Documentation¶

inline ::std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor, c10::SymInt, c10::SymInt> at::_efficient_attention_forward(const at::Tensor &query, const at::Tensor &key, const at::Tensor &value, const ::std::optional<at::Tensor> &bias, const ::std::optional<at::Tensor> &cu_seqlens_q, const ::std::optional<at::Tensor> &cu_seqlens_k, ::std::optional<int64_t> max_seqlen_q, ::std::optional<int64_t> max_seqlen_k, double dropout_p, int64_t custom_mask_type, bool compute_log_sumexp = false, ::std::optional<double> scale = ::std::nullopt, const ::std::optional<at::Tensor> &causal_diagonal = {}, const ::std::optional<at::Tensor> &seqlen_k = {}, ::std::optional<int64_t> window_size = ::std::nullopt)¶