Skip to content

Commit

Permalink
ih264d: Small optimizations and experiments with multi-threading
Browse files Browse the repository at this point in the history
Using the multi-threaded decoder doesn't seem to be worth it but at least we have a way to enable it now
  • Loading branch information
Exzap committed Sep 14, 2023
1 parent f04c757 commit fda5ec2
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 15 deletions.
6 changes: 6 additions & 0 deletions dependencies/ih264d/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -183,4 +183,10 @@ endif()

if(MSVC)
set_property(TARGET ih264d PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")

# tune settings for slightly better performance
target_compile_options(ih264d PRIVATE $<$<CONFIG:Release,RelWithDebInfo>:/Oi>) # enable intrinsic functions
target_compile_options(ih264d PRIVATE $<$<CONFIG:Release,RelWithDebInfo>:/Ot>) # favor speed
target_compile_options(ih264d PRIVATE "/GS-") # disable runtime checks

endif()
50 changes: 39 additions & 11 deletions dependencies/ih264d/common/ithread.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,28 +85,59 @@ UWORD32 ithread_get_mutex_lock_size(void)
return sizeof(CRITICAL_SECTION);
}

struct _ithread_launch_param
{
void (*startFunc)(void* argument);
void* argument;
};

DWORD WINAPI _ithread_WinThreadStartRoutine(LPVOID lpThreadParameter)
{
struct _ithread_launch_param* param = (struct _ithread_launch_param*)lpThreadParameter;
typedef void *(*ThreadStartRoutineType)(void *);
ThreadStartRoutineType pfnThreadRoutine = (ThreadStartRoutineType)param->startFunc;
void* arg = param->argument;
free(param);
pfnThreadRoutine(arg);
return 0;
}

WORD32 ithread_create(void* thread_handle, void* attribute, void* strt, void* argument)
{
//UNUSED(attribute);
//return pthread_create((pthread_t*)thread_handle, NULL, (void* (*)(void*)) strt, argument);
__debugbreak();
UNUSED(attribute);
struct _ithread_launch_param* param = malloc(sizeof(struct _ithread_launch_param));
param->startFunc = (void (*)(void*))strt;
param->argument = argument;
HANDLE *handle = (HANDLE*)thread_handle;
*handle = CreateThread(NULL, 0, _ithread_WinThreadStartRoutine, param, 0, NULL);
if(*handle == NULL)
{
return -1;
}
return 0;
}

WORD32 ithread_join(void* thread_handle, void** val_ptr)
{
//UNUSED(val_ptr);
//pthread_t* pthread_handle = (pthread_t*)thread_handle;
//return pthread_join(*pthread_handle, NULL);

__debugbreak();
return 0;
HANDLE *handle = (HANDLE*)thread_handle;
DWORD result = WaitForSingleObject(*handle, INFINITE);
if(result == WAIT_OBJECT_0)
{
CloseHandle(*handle);
return 0;
}
else
{
return -1;
}
}

WORD32 ithread_get_mutex_struct_size(void)
{
return sizeof(CRITICAL_SECTION);
}

WORD32 ithread_mutex_init(void* mutex)
{
InitializeCriticalSection((LPCRITICAL_SECTION)mutex);
Expand Down Expand Up @@ -153,7 +184,6 @@ UWORD32 ithread_get_sem_struct_size(void)
//return(sizeof(sem_t));
}


WORD32 ithread_sem_init(void* sem, WORD32 pshared, UWORD32 value)
{
__debugbreak();
Expand All @@ -168,15 +198,13 @@ WORD32 ithread_sem_post(void* sem)
//return sem_post((sem_t*)sem);
}


WORD32 ithread_sem_wait(void* sem)
{
__debugbreak();
return 0;
//return sem_wait((sem_t*)sem);
}


WORD32 ithread_sem_destroy(void* sem)
{
__debugbreak();
Expand Down
4 changes: 1 addition & 3 deletions dependencies/ih264d/common/x86/ih264_platform_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,8 @@
static inline int __builtin_clz(unsigned x)
{
unsigned long n;
if (x == 0)
return 32;
_BitScanReverse(&n, x);
return 31 - n;
return n ^ 31;
}

static inline int __builtin_ctz(unsigned x) {
Expand Down
16 changes: 15 additions & 1 deletion src/Cafe/OS/libs/h264_avc/H264Dec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ namespace H264
m_codecCtx->pv_fxns = (void*)&ih264d_api_function;
m_codecCtx->u4_size = sizeof(iv_obj_t);

SetDecoderCoreCount(1);

m_isBufferedMode = isBufferedMode;

UpdateParameters(false);
Expand All @@ -278,6 +280,19 @@ namespace H264
m_codecCtx = nullptr;
}

void SetDecoderCoreCount(uint32 coreCount)
{
ih264d_ctl_set_num_cores_ip_t s_set_cores_ip;
ih264d_ctl_set_num_cores_op_t s_set_cores_op;
s_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL;
s_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_SET_NUM_CORES;
s_set_cores_ip.u4_num_cores = coreCount; // valid numbers are 1-4
s_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t);
s_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t);
IV_API_CALL_STATUS_T status = ih264d_api_function(m_codecCtx, (void *)&s_set_cores_ip, (void *)&s_set_cores_op);
cemu_assert(status == IV_SUCCESS);
}

static bool GetImageInfo(uint8* stream, uint32 length, uint32& imageWidth, uint32& imageHeight)
{
// create temporary decoder
Expand Down Expand Up @@ -702,7 +717,6 @@ namespace H264
decodeResult = m_bufferedResults.front();
m_bufferedResults.erase(m_bufferedResults.begin());
}

private:
iv_obj_t* m_codecCtx{nullptr};
bool m_hasBufferSizeInfo{ false };
Expand Down

0 comments on commit fda5ec2

Please sign in to comment.