Skip to content

Commit

Permalink
[VP] AI Kernel Integration Framework
Browse files Browse the repository at this point in the history
make it easyier for developer to integrate AI features. Only derive SwAiBaseFilter/SwFilterAiBaseHandler and fill in the AI_KERNEL_SETTING_PIPE
  • Loading branch information
peiyigu-intel authored and intel-mediadev committed Jan 21, 2025
1 parent f118c0e commit 2ee08d3
Show file tree
Hide file tree
Showing 40 changed files with 2,025 additions and 36 deletions.
3 changes: 2 additions & 1 deletion media_common/agnostic/common/hw/mhw_render.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
#define MHW_RENDER_ENGINE_EU_INDEX_MAX 12
#define MHW_RENDER_ENGINE_SIZE_REGISTERS_PER_THREAD 0x1800
#define MHW_RENDER_ENGINE_NUMBER_OF_THREAD_UNIT 32
#define MHW_RENDER_ENGINE_MAX_NUMBER_OF_THREAD (1024 / MHW_RENDER_ENGINE_NUMBER_OF_THREAD_UNIT)

#define MHW_MAX_DEPENDENCY_COUNT 8

Expand Down Expand Up @@ -314,6 +313,8 @@ typedef struct _MHW_GPGPU_WALKER_PARAMS
PMHW_INLINE_DATA_PARAMS inlineDataParamBase;
uint32_t inlineDataParamSize;

uint32_t simdSize;

} MHW_GPGPU_WALKER_PARAMS, *PMHW_GPGPU_WALKER_PARAMS;

typedef struct _MHW_MEDIA_OBJECT_PARAMS
Expand Down
5 changes: 5 additions & 0 deletions media_common/agnostic/common/vp/hal/vp_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ extern "C" {
//! Structure VPHAL_SURFACE
//! \brief DDI-VPHAL surface definition
//!
struct VP_SURFACE;
struct VPHAL_SURFACE
{
// Color Information
Expand Down Expand Up @@ -123,6 +124,10 @@ struct VPHAL_SURFACE

// VPHAL_SURFACE Linked list
PVPHAL_SURFACE pNext = nullptr;

// For Multiple SwPipe Usage in SwFilterPipeFactory::Create
// This is an intermediate surface for multiple pipe in/out
VP_SURFACE *pPipeIntermediateSurface = nullptr;

//--------------------------------------
// FIELDS TO BE SETUP BY VPHAL int32_tERNALLY
Expand Down
4 changes: 4 additions & 0 deletions media_common/agnostic/common/vp/hal/vp_common_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,10 @@ enum VpKernelID
kernelOclFc420PL3Input,
kernelOclFc420PL3Output,
kernelOclFc422HVInput,

// AI Common
kernelAiCommon,

baseKernelMaxNumID
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,18 @@ class Impl : public render::Impl<mhw::render::xe2_hpg_next::Cmd>
cmd.DW2.IndirectDataLength = params.IndirectDataLength;
cmd.DW3.IndirectDataStartAddress = params.IndirectDataStartAddress >> MHW_COMPUTE_INDIRECT_SHIFT;

cmd.DW4.SIMDSize = 2;
cmd.DW4.MessageSIMD = Cmd::COMPUTE_WALKER_CMD::MESSAGE_SIMD_SIMT32;
cmd.DW5.ExecutionMask = 0xffffffff;
if (params.simdSize == 16)
{
cmd.DW4.SIMDSize = 1;
cmd.DW4.MessageSIMD = Cmd::COMPUTE_WALKER_CMD::MESSAGE_SIMD_SIMT16;
cmd.DW5.ExecutionMask = 0xffff;
}
else
{
cmd.DW4.SIMDSize = 2;
cmd.DW4.MessageSIMD = Cmd::COMPUTE_WALKER_CMD::MESSAGE_SIMD_SIMT32;
cmd.DW5.ExecutionMask = 0xffffffff;
}
cmd.DW6.LocalXMaximum = params.ThreadWidth - 1;
cmd.DW6.LocalYMaximum = params.ThreadHeight - 1;
cmd.DW6.LocalZMaximum = params.ThreadDepth - 1;
Expand Down
1 change: 1 addition & 0 deletions media_softlet/agnostic/common/hw/mhw_render_cmdpar.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ struct _MHW_PAR_T(COMPUTE_WALKER)
bool isGenerateLocalId = false;
MHW_EMIT_LOCAL_MODE emitLocal = MHW_EMIT_LOCAL_NONE;
uint32_t preferredSlmAllocationSize = 0;
uint32_t simdSize = 0;
_MHW_PAR_T(CFE_STATE) cfeState = {};
MHW_HEAPS_RESOURCE heapsResource = {};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1227,10 +1227,11 @@ MOS_STATUS XRenderHal_Platform_Interface_Next::SendComputeWalker(
if (pGpGpuWalkerParams->isGenerateLocalID && pGpGpuWalkerParams->emitLocal != MHW_EMIT_LOCAL_NONE)
{
//When COMPUTE_WALKER Emit Local ID is enabled, thread group number need to divide MHW_RENDER_ENGINE_NUMBER_OF_THREAD_UNIT
mhwIdEntryParams.dwNumberofThreadsInGPGPUGroup = pGpGpuWalkerParams->ThreadWidth * pGpGpuWalkerParams->ThreadHeight / MHW_RENDER_ENGINE_NUMBER_OF_THREAD_UNIT;
if (mhwIdEntryParams.dwNumberofThreadsInGPGPUGroup > MHW_RENDER_ENGINE_MAX_NUMBER_OF_THREAD)
uint32_t engineNumberOfThreadUnit = (pGpGpuWalkerParams->simdSize == 16) ? 16 : MHW_RENDER_ENGINE_NUMBER_OF_THREAD_UNIT;
mhwIdEntryParams.dwNumberofThreadsInGPGPUGroup = MOS_MAX(1, pGpGpuWalkerParams->ThreadWidth * pGpGpuWalkerParams->ThreadHeight / engineNumberOfThreadUnit);
if (mhwIdEntryParams.dwNumberofThreadsInGPGPUGroup > 1024 / engineNumberOfThreadUnit)
{
MHW_RENDERHAL_ASSERTMESSAGE("Number of Threads In GpGpuGroup %d Exceeds the Max Number %d", mhwIdEntryParams.dwNumberofThreadsInGPGPUGroup, MHW_RENDER_ENGINE_MAX_NUMBER_OF_THREAD);
MHW_RENDERHAL_ASSERTMESSAGE("Number of Threads In GpGpuGroup %d Exceeds the Max Number %d", mhwIdEntryParams.dwNumberofThreadsInGPGPUGroup, 1024 / engineNumberOfThreadUnit);
MHW_RENDERHAL_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
}
}
Expand Down Expand Up @@ -1544,6 +1545,7 @@ MHW_SETPAR_DECL_SRC(COMPUTE_WALKER, XRenderHal_Platform_Interface_Next)
MHW_RENDERHAL_CHK_NULL_RETURN(m_gpgpuWalkerParams);
MHW_RENDERHAL_CHK_NULL_RETURN(m_interfaceDescriptorParams);

params.simdSize = m_gpgpuWalkerParams->simdSize;
params.IndirectDataLength = m_gpgpuWalkerParams->IndirectDataLength;
params.IndirectDataStartAddress = m_gpgpuWalkerParams->IndirectDataStartAddress;
params.ThreadWidth = m_gpgpuWalkerParams->ThreadWidth;
Expand Down Expand Up @@ -1576,6 +1578,10 @@ MHW_SETPAR_DECL_SRC(COMPUTE_WALKER, XRenderHal_Platform_Interface_Next)
{
params.GroupDepth = 1;
}
if (m_gpgpuWalkerParams->simdSize == 0)
{
params.simdSize = 32;
}

params.isEmitInlineParameter = m_gpgpuWalkerParams->isEmitInlineParameter;
if (m_gpgpuWalkerParams->inlineDataLength > 0 && m_gpgpuWalkerParams->inlineData)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1237,7 +1237,7 @@ MOS_STATUS RenderCmdPacket::PrepareComputeWalkerParams(KERNEL_WALKER_PARAMS para
gpgpuWalker.GroupHeight += gpgpuWalker.GroupStartingY;
}

if (params.threadDepth && params.threadWidth && params.threadHeight)
if (params.threadDepth && params.threadWidth && params.threadHeight && params.isGenerateLocalID && params.emitLocal != MHW_EMIT_LOCAL_NONE)
{
gpgpuWalker.ThreadWidth = params.threadWidth;
gpgpuWalker.ThreadHeight = params.threadHeight;
Expand All @@ -1249,6 +1249,7 @@ MOS_STATUS RenderCmdPacket::PrepareComputeWalkerParams(KERNEL_WALKER_PARAMS para
gpgpuWalker.ThreadHeight = COMPUTE_WALKER_THREAD_SPACE_HEIGHT;
gpgpuWalker.ThreadDepth = COMPUTE_WALKER_THREAD_SPACE_DEPTH;
}
gpgpuWalker.simdSize = params.simdSize;
gpgpuWalker.IndirectDataStartAddress = params.iCurbeOffset;
// Indirect Data Length is a multiple of 64 bytes (size of L3 cacheline). Bits [5:0] are zero.
gpgpuWalker.IndirectDataLength = MOS_ALIGN_CEIL(params.iCurbeLength, 1 << MHW_COMPUTE_INDIRECT_SHIFT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ typedef struct _KERNEL_WALKER_PARAMS
uint32_t slmSize;
PMHW_INLINE_DATA_PARAMS inlineDataParamBase;
uint32_t inlineDataParamSize;

uint32_t simdSize;
}KERNEL_WALKER_PARAMS, * PKERNEL_WALKER_PARAMS;

typedef struct _KERNEL_PACKET_RENDER_DATA
Expand Down
80 changes: 79 additions & 1 deletion media_softlet/agnostic/common/vp/hal/bufferMgr/vp_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,24 @@ VP_SURFACE *VpAllocator::AllocateVpSurface(VPHAL_SURFACE &vphalSurf)
VP_FUNC_CALL();
if (Mos_ResourceIsNull(&vphalSurf.OsResource))
{
if (vphalSurf.pPipeIntermediateSurface)
{
VP_SURFACE *surf = MOS_New(VP_SURFACE);
if (surf == nullptr)
{
return nullptr;
}
surf->osSurface = MOS_New(MOS_SURFACE);
if (MOS_FAILED(CopyVpSurface(*surf, *vphalSurf.pPipeIntermediateSurface)))
{
MOS_Delete(surf->osSurface);
MOS_Delete(surf);
return nullptr;
}
return surf;
}
return nullptr;
}

VP_SURFACE *surf = MOS_New(VP_SURFACE);

if (nullptr == surf)
Expand Down Expand Up @@ -1042,6 +1057,69 @@ MOS_STATUS VpAllocator::ReAllocateSurface(
}
#endif

MOS_STATUS VpAllocator::ReAllocateVpSurfaceWithSameConfigOfVphalSurface(
VP_SURFACE *&surface,
const PVPHAL_SURFACE &vphalSurface,
PCCHAR surfaceName,
bool &allocated)
{
VP_PUBLIC_CHK_NULL_RETURN(vphalSurface);

MOS_GFXRES_TYPE defaultResType = MOS_GFXRES_INVALID;
MOS_TILE_TYPE defaultTileType = MOS_TILE_LINEAR;
MOS_HW_RESOURCE_DEF resUsageType = MOS_HW_RESOURCE_DEF_MAX;
MOS_TILE_MODE_GMM tileModeByForce = MOS_TILE_UNSET_GMM;
if (Mos_ResourceIsNull(&vphalSurface->OsResource))
{
bool isBuffer = (vphalSurface->Format == Format_Buffer || vphalSurface->Format == Format_RAW);
defaultResType = isBuffer ? MOS_GFXRES_BUFFER : MOS_GFXRES_2D;
defaultTileType = isBuffer ? MOS_TILE_LINEAR : MOS_TILE_Y;
resUsageType = vphalSurface->OsResource.mocsMosResUsageType;
tileModeByForce = vphalSurface->OsResource.TileModeGMM;
}
else
{
defaultResType = m_osInterface->pfnGetResType(&vphalSurface->OsResource);
defaultTileType = vphalSurface->OsResource.TileType;
resUsageType = MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER;
tileModeByForce = MOS_TILE_UNSET_GMM;
}
VP_PUBLIC_CHK_STATUS_RETURN(ReAllocateSurface(
surface,
surfaceName,
vphalSurface->Format,
defaultResType,
defaultTileType,
vphalSurface->dwWidth,
vphalSurface->dwHeight,
vphalSurface->bCompressible,
vphalSurface->CompressionMode,
allocated,
false,
false,
resUsageType,
tileModeByForce));

// Initialize other parameters in vp surface according to vphal surface.
surface->ColorSpace = vphalSurface->ColorSpace;
surface->ExtendedGamut = vphalSurface->ExtendedGamut;
surface->Palette = vphalSurface->Palette;
surface->bQueryVariance = vphalSurface->bQueryVariance;
surface->FrameID = vphalSurface->FrameID;
surface->uFwdRefCount = vphalSurface->uFwdRefCount;
surface->uBwdRefCount = vphalSurface->uBwdRefCount;
surface->pFwdRef = vphalSurface->pFwdRef;
surface->pBwdRef = vphalSurface->pBwdRef;
surface->SurfType = vphalSurface->SurfType;
surface->SampleType = vphalSurface->SampleType;
surface->ChromaSiting = vphalSurface->ChromaSiting;
surface->rcSrc = vphalSurface->rcSrc;
surface->rcDst = vphalSurface->rcDst;
surface->rcMaxSrc = vphalSurface->rcMaxSrc;

return MOS_STATUS_SUCCESS;
}

MOS_STATUS VpAllocator::OsFillResource(
PMOS_RESOURCE osResource,
uint32_t size,
Expand Down
24 changes: 24 additions & 0 deletions media_softlet/agnostic/common/vp/hal/bufferMgr/vp_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,30 @@ class VpAllocator
MOS_HW_RESOURCE_DEF resUsageType = MOS_HW_RESOURCE_DEF_MAX, // [in] resource usage type
MOS_TILE_MODE_GMM tileModeByForce = MOS_TILE_UNSET_GMM); // [in] Flag to indicate if GMM flag tile64 need set
#endif

//!
//! \brief Reallocates the VP Surface w/ the same config of a Vphal Surface
//! \details Reallocates the VP Surface w/ the same config of a Vphal Surface
//! - if the surface is not already allocated OR
//! - resource dimenisions OR format changed
//! \param [in,out] surface
//! Pointer to VP_SURFACE
//! \param [in] surfaceName
//! Pointer to surface name
//! \param [in] vphalSurface
//! Pointer to vphal surface which is the source config of allocated vp surface
//! \param [out] allocated
//! true if allocated, false for not

//! \return MOS_STATUS
//! MOS_STATUS_SUCCESS if success. Error code otherwise
//!
MOS_STATUS ReAllocateVpSurfaceWithSameConfigOfVphalSurface(
VP_SURFACE *&surface,
const PVPHAL_SURFACE &vphalSurface,
PCCHAR surfaceName,
bool &allocated);

//!
//! \brief Unified OS fill Resource
//! \details Locks the surface and fills the resource with data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,11 @@ VpResourceManager::~VpResourceManager()
m_allocator.DestroyVpSurface(m_fcIntermediaSurfaceOutput);
}

for (auto& handle : m_aiIntermediateSurface)
{
m_allocator.DestroyVpSurface(handle.second);
}

m_allocator.CleanRecycler();
}

Expand Down Expand Up @@ -1238,6 +1243,68 @@ MOS_STATUS VpResourceManager::AssignFcResources(VP_EXECUTE_CAPS &caps, std::vect
return MOS_STATUS_SUCCESS;
}

MOS_STATUS VpResourceManager::AssignAiKernelResource(VP_EXECUTE_CAPS &caps, std::vector<VP_SURFACE *> &inputSurfaces, VP_SURFACE *outputSurface, SwFilterPipe &executedFilters, VP_SURFACE_SETTING &surfSetting)
{
VP_FUNC_CALL();
bool allocated = false;

for (uint32_t i = 0; i < executedFilters.GetSurfaceCount(true); ++i)
{
SwFilterSubPipe *subPipe = executedFilters.GetSwFilterSubPipe(true, i);
if (subPipe == nullptr)
{
continue;
}
surfSetting.surfGroup.insert(std::make_pair((SurfaceType)(SurfaceTypeAiInput0 + i), inputSurfaces[i]));
SwFilterAiBase *ai = nullptr;
VP_PUBLIC_CHK_STATUS_RETURN(subPipe->GetAiSwFilter(ai));
if (ai == nullptr)
{
continue;
}
FeatureParamAi &aiParam = ai->GetSwFilterParams();
if (aiParam.stageIndex != 0)
{
continue;
}
for (AI_SINGLE_LAYER_SETTING &singleLayerSetting : aiParam.kernelSettings)
{
AI_SURFACE_ALLOCATION_MAP aiSurfaceMap = {};
VP_PUBLIC_CHK_NULL_RETURN(singleLayerSetting.pfnGetIntermediateSurfaceSetting);
VP_PUBLIC_CHK_STATUS_RETURN(singleLayerSetting.pfnGetIntermediateSurfaceSetting(i, executedFilters, aiSurfaceMap));
for (auto const &aiSurfaceSetting : aiSurfaceMap)
{
auto handle = m_aiIntermediateSurface.find(aiSurfaceSetting.first);
if (handle == m_aiIntermediateSurface.end())
{
handle = m_aiIntermediateSurface.insert(std::make_pair(aiSurfaceSetting.first, nullptr)).first;
VP_PUBLIC_CHK_NOT_FOUND_RETURN(handle, &m_aiIntermediateSurface);
}
VP_SURFACE *&intermediateSurface = handle->second;
VP_PUBLIC_CHK_STATUS_RETURN(m_allocator.ReAllocateSurface(
intermediateSurface,
aiSurfaceSetting.second.surfaceName.c_str(),
aiSurfaceSetting.second.format,
aiSurfaceSetting.second.resourceType,
aiSurfaceSetting.second.tileType,
aiSurfaceSetting.second.width,
aiSurfaceSetting.second.height,
false,
MOS_MMC_DISABLED,
allocated,
false,
IsDeferredResourceDestroyNeeded(),
MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER));

surfSetting.surfGroup.insert(std::make_pair(aiSurfaceSetting.first, intermediateSurface));
}
}
}
surfSetting.surfGroup.insert(std::make_pair((SurfaceType)(SurfaceTypeAiTarget0), outputSurface));

return MOS_STATUS_SUCCESS;
}

MOS_STATUS VpResourceManager::AssignRenderResource(VP_EXECUTE_CAPS &caps, std::vector<VP_SURFACE *> &inputSurfaces, VP_SURFACE *outputSurface,
std::vector<VP_SURFACE *> &pastSurfaces, std::vector<VP_SURFACE *> &futureSurfaces, RESOURCE_ASSIGNMENT_HINT resHint, VP_SURFACE_SETTING &surfSetting, SwFilterPipe& executedFilters)
{
Expand All @@ -1259,6 +1326,10 @@ MOS_STATUS VpResourceManager::AssignRenderResource(VP_EXECUTE_CAPS &caps, std::v
{
VP_PUBLIC_CHK_STATUS_RETURN(AssignHdrResource(caps, inputSurfaces, outputSurface, resHint, surfSetting, executedFilters));
}
else if (caps.bAiPath)
{
VP_PUBLIC_CHK_STATUS_RETURN(AssignAiKernelResource(caps, inputSurfaces, outputSurface, executedFilters, surfSetting));
}
else
{
if (1 != inputSurfaces.size())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ class VpResourceManager
virtual MOS_STATUS AssignFcResources(VP_EXECUTE_CAPS &caps, std::vector<VP_SURFACE *> &inputSurfaces, VP_SURFACE *outputSurface,
std::vector<VP_SURFACE *> &pastSurfaces, std::vector<VP_SURFACE *> &futureSurfaces,
RESOURCE_ASSIGNMENT_HINT resHint, VP_SURFACE_SETTING &surfSetting);
virtual MOS_STATUS AssignAiKernelResource(VP_EXECUTE_CAPS &caps, std::vector<VP_SURFACE *> &inputSurfaces, VP_SURFACE *outputSurface, SwFilterPipe &executedFilters, VP_SURFACE_SETTING &surfSetting);
virtual MOS_STATUS AssignVeboxResourceForRender(VP_EXECUTE_CAPS &caps, VP_SURFACE *inputSurface, RESOURCE_ASSIGNMENT_HINT resHint, VP_SURFACE_SETTING &surfSetting);
virtual MOS_STATUS AssignVeboxResource(VP_EXECUTE_CAPS& caps, VP_SURFACE* inputSurface, VP_SURFACE* outputSurface, VP_SURFACE* pastSurface, VP_SURFACE* futureSurface,
RESOURCE_ASSIGNMENT_HINT resHint, VP_SURFACE_SETTING& surfSetting, SwFilterPipe& executedFilters);
Expand Down Expand Up @@ -561,6 +562,9 @@ class VpResourceManager
VphdrResourceManager *m_hdrResourceManager = nullptr;
MediaUserSettingSharedPtr m_userSettingPtr = nullptr; //!< usersettingInstance

// AI Resource
std::map<SurfaceType, VP_SURFACE *> m_aiIntermediateSurface = {};

MediaCopyWrapper *m_mediaCopyWrapper = nullptr;

MEDIA_CLASS_DEFINE_END(vp__VpResourceManager)
Expand Down
Loading

0 comments on commit 2ee08d3

Please sign in to comment.