Rate this Page

Function at::_triton_multi_head_attention#

Function Documentation#

inline at::Tensor at::_triton_multi_head_attention(const at::Tensor &query, const at::Tensor &key, const at::Tensor &value, int64_t embed_dim, int64_t num_head, const at::Tensor &qkv_weight, const at::Tensor &qkv_bias, const at::Tensor &proj_weight, const at::Tensor &proj_bias, const ::std::optional<at::Tensor> &mask = {})#