From 2437ba1d71d3c1ac94847323f3c6adec0e260113 Mon Sep 17 00:00:00 2001 From: Sai Sunku Date: Mon, 27 Jan 2025 17:31:10 +0000 Subject: [PATCH] prov/efa: Only support HMEM in efa-direct if p2p support is available efa-direct provider does not support copy based protocols, so it cannot support HMEM transfers if the HMEM device does not have p2p support. Signed-off-by: Sai Sunku --- prov/efa/src/efa_prov_info.c | 37 +++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/prov/efa/src/efa_prov_info.c b/prov/efa/src/efa_prov_info.c index 9c298fa6f39..1ac8b12e7be 100644 --- a/prov/efa/src/efa_prov_info.c +++ b/prov/efa/src/efa_prov_info.c @@ -442,20 +442,47 @@ static int efa_prov_info_set_nic_attr(struct fi_info *prov_info, struct efa_devi } #if HAVE_CUDA || HAVE_NEURON || HAVE_SYNAPSEAI -void efa_prov_info_set_hmem_flags(struct fi_info *prov_info) +void efa_prov_info_set_hmem_flags(struct fi_info *prov_info, enum efa_info_type info_type) { - if (prov_info->ep_attr->type == FI_EP_RDM && - (ofi_hmem_is_initialized(FI_HMEM_CUDA) || + int i; + enum fi_hmem_iface iface; + struct efa_hmem_info *hmem_info; + bool enable_hmem = false; + + if ((ofi_hmem_is_initialized(FI_HMEM_CUDA) || ofi_hmem_is_initialized(FI_HMEM_NEURON) || ofi_hmem_is_initialized(FI_HMEM_SYNAPSEAI))) { + if (info_type == EFA_INFO_RDM) + enable_hmem = true; + + if (info_type == EFA_INFO_DIRECT) { + /* EFA direct only supports HMEM when p2p support is available */ + EFA_HMEM_IFACE_FOREACH(i) { + iface = efa_hmem_ifaces[i]; + hmem_info = &g_efa_hmem_info[iface]; + if (hmem_info->initialized && !hmem_info->p2p_supported_by_device) { + EFA_WARN(FI_LOG_CORE, + "EFA direct provider was compiled with support for %s HMEM interface " + "but the interface does not support p2p transfers. " + "EFA direct provider does not support HMEM transfers without p2p support. " + "HMEM support will be disabled.\n", fi_tostr(&iface, FI_TYPE_HMEM_IFACE)); + break; + } + } + enable_hmem = true; + } + } + + if (enable_hmem) { prov_info->caps |= FI_HMEM; prov_info->tx_attr->caps |= FI_HMEM; prov_info->rx_attr->caps |= FI_HMEM; prov_info->domain_attr->mr_mode |= FI_MR_HMEM; } + } #else -void efa_prov_info_set_hmem_flags(struct fi_info *prov_info) +void efa_prov_info_set_hmem_flags(struct fi_info *prov_info, enum efa_info_type info_type) { } #endif @@ -533,7 +560,7 @@ int efa_prov_info_alloc(struct fi_info **prov_info_ptr, goto err_free; } - efa_prov_info_set_hmem_flags(prov_info); + efa_prov_info_set_hmem_flags(prov_info, info_type); *prov_info_ptr = prov_info; return 0;