How about this patch? On Saturday, April 9, 2016, Ximing Cheng <[email protected]> wrote:
> With the two patches, I just do a simple test on my Windows PC, and I test > for several times and finally get the follow results: > > ./x265.exe --fps 30 --input-res 1280x720 /d/project/sharp/lol.yuv -o > test1.265 > encoded 500 frames in 28.83s (17.34 fps), 1330.90 kb/s, Avg QP:34.94 > > ./x265_withpatch.exe --fps 30 --input-res 1280x720 > /d/project/sharp/lol.yuv -o test2.265 > encoded 500 frames in 28.14s (17.77 fps), 1330.90 kb/s, Avg QP:34.94 > > so just about 1% ~ 2% speed up on Windows in my test, but my test covers > little YUV sequence. > > On Fri, Apr 8, 2016 at 6:07 PM, Deepthi Nandakumar < > [email protected] > <javascript:_e(%7B%7D,'cvml','[email protected]');>> wrote: > >> Thank you for the patches. The Slim RW locks should be a performance >> improvement from the documentation. I'm not so sure about the POSIX RWlocks >> though. Did you do any performance testing - does the change in behaviour >> of the reconRow and framefilter locks improve performance? >> >> On Sun, Apr 3, 2016 at 2:00 PM, Ximing Cheng <[email protected] >> <javascript:_e(%7B%7D,'cvml','[email protected]');>> wrote: >> >>> # HG changeset patch >>> # User Ximing Cheng <[email protected] >>> <javascript:_e(%7B%7D,'cvml','[email protected]');>> >>> # Date 1459672199 -28800 >>> # Sun Apr 03 16:29:59 2016 +0800 >>> # Node ID 601877ef465c549efe24063afa0479a39e369010 >>> # Parent 5b01678f6fb4e89e23cd41295592a9aa5d51d4ba >>> ThreadSafeInteger: change default lock into read write lock >>> >>> diff -r 5b01678f6fb4 -r 601877ef465c source/common/frame.cpp >>> --- a/source/common/frame.cpp Sat Apr 02 19:08:49 2016 +0100 >>> +++ b/source/common/frame.cpp Sun Apr 03 16:29:59 2016 +0800 >>> @@ -30,6 +30,7 @@ >>> >>> Frame::Frame() >>> { >>> + m_reconRowCount.init(); >>> m_bChromaExtended = false; >>> m_lowresInit = false; >>> m_reconRowCount.set(0); >>> @@ -55,6 +56,8 @@ >>> X265_CHECK((m_reconColCount == NULL), "m_reconColCount was >>> initialized"); >>> m_numRows = (m_fencPic->m_picHeight + g_maxCUSize - 1) / >>> g_maxCUSize; >>> m_reconColCount = new ThreadSafeInteger[m_numRows]; >>> + for (int i = 0; i < m_numRows; i++) >>> + m_reconColCount[i].init(true); >>> >>> if (quantOffsets) >>> { >>> diff -r 5b01678f6fb4 -r 601877ef465c source/common/threading.h >>> --- a/source/common/threading.h Sat Apr 02 19:08:49 2016 +0100 >>> +++ b/source/common/threading.h Sun Apr 03 16:29:59 2016 +0800 >>> @@ -174,73 +174,127 @@ >>> class ThreadSafeInteger >>> { >>> public: >>> - >>> - ThreadSafeInteger() >>> + /* useReadWriteLock is useless, just be compatible with pthread >>> version */ >>> + void init(bool useReadWriteLock = true) >>> { >>> + /* disable MSVC warnnings */ >>> + (void)useReadWriteLock; >>> m_val = 0; >>> + InitializeConditionVariable(&m_cv); >>> + /* Slim Reader/Writer (SRW) Locks minimum supported in Windows >>> Vista */ >>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA >>> InitializeCriticalSection(&m_cs); >>> - InitializeConditionVariable(&m_cv); >>> +#else >>> + InitializeSRWLock(&m_rwlock); >>> +#endif >>> } >>> >>> ~ThreadSafeInteger() >>> { >>> + /* SRW locks do not need to be explicitly destroyed */ >>> +#if defined(_WIN32_WINNT) && _WIN32_WINNT < _WIN32_WINNT_VISTA >>> DeleteCriticalSection(&m_cs); >>> +#endif >>> XP_CONDITION_VAR_FREE(&m_cv); >>> } >>> >>> int waitForChange(int prev) >>> { >>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA >>> EnterCriticalSection(&m_cs); >>> - if (m_val == prev) >>> + while (m_val == prev) >>> SleepConditionVariableCS(&m_cv, &m_cs, INFINITE); >>> LeaveCriticalSection(&m_cs); >>> +#else >>> + AcquireSRWLockShared(&m_rwlock); >>> + while (m_val == prev) >>> + SleepConditionVariableSRW(&m_cv, &m_rwlock, INFINITE, >>> CONDITION_VARIABLE_LOCKMODE_SHARED); >>> + ReleaseSRWLockShared(&m_rwlock); >>> +#endif >>> return m_val; >>> } >>> >>> int get() >>> { >>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA >>> EnterCriticalSection(&m_cs); >>> int ret = m_val; >>> LeaveCriticalSection(&m_cs); >>> +#else >>> + AcquireSRWLockShared(&m_rwlock); >>> + int ret = m_val; >>> + ReleaseSRWLockShared(&m_rwlock); >>> +#endif >>> return ret; >>> } >>> >>> int getIncr(int n = 1) >>> { >>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA >>> EnterCriticalSection(&m_cs); >>> int ret = m_val; >>> m_val += n; >>> LeaveCriticalSection(&m_cs); >>> +#else >>> + AcquireSRWLockExclusive(&m_rwlock); >>> + int ret = m_val; >>> + m_val += n; >>> + ReleaseSRWLockExclusive(&m_rwlock); >>> +#endif >>> return ret; >>> } >>> >>> void set(int newval) >>> { >>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA >>> EnterCriticalSection(&m_cs); >>> m_val = newval; >>> WakeAllConditionVariable(&m_cv); >>> LeaveCriticalSection(&m_cs); >>> +#else >>> + AcquireSRWLockExclusive(&m_rwlock); >>> + m_val = newval; >>> + WakeAllConditionVariable(&m_cv); >>> + ReleaseSRWLockExclusive(&m_rwlock); >>> +#endif >>> } >>> >>> void poke(void) >>> { >>> /* awaken all waiting threads, but make no change */ >>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA >>> EnterCriticalSection(&m_cs); >>> WakeAllConditionVariable(&m_cv); >>> LeaveCriticalSection(&m_cs); >>> +#else >>> + AcquireSRWLockExclusive(&m_rwlock); >>> + WakeAllConditionVariable(&m_cv); >>> + ReleaseSRWLockExclusive(&m_rwlock); >>> +#endif >>> } >>> >>> void incr() >>> { >>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA >>> EnterCriticalSection(&m_cs); >>> m_val++; >>> WakeAllConditionVariable(&m_cv); >>> LeaveCriticalSection(&m_cs); >>> +#else >>> + AcquireSRWLockExclusive(&m_rwlock); >>> + m_val++; >>> + WakeAllConditionVariable(&m_cv); >>> + ReleaseSRWLockExclusive(&m_rwlock); >>> +#endif >>> } >>> >>> protected: >>> >>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA >>> CRITICAL_SECTION m_cs; >>> +#else >>> + SRWLOCK m_rwlock; >>> +#endif >>> CONDITION_VARIABLE m_cv; >>> int m_val; >>> }; >>> @@ -369,27 +423,38 @@ >>> class ThreadSafeInteger >>> { >>> public: >>> - >>> - ThreadSafeInteger() >>> + /* pthread_cond_wait only accept mutex param */ >>> + void init(bool useReadWriteLock = false) >>> { >>> + m_useReadWriteLock = useReadWriteLock; >>> m_val = 0; >>> - if (pthread_mutex_init(&m_mutex, NULL) || >>> - pthread_cond_init(&m_cond, NULL)) >>> + if (!m_useReadWriteLock && (pthread_mutex_init(&m_mutex, NULL) >>> || >>> + pthread_cond_init(&m_cond, NULL))) >>> { >>> x265_log(NULL, X265_LOG_ERROR, "fatal: unable to initialize >>> conditional variable\n"); >>> } >>> + else if (m_useReadWriteLock && pthread_rwlock_init(&m_rwlock, >>> NULL)) >>> + { >>> + x265_log(NULL, X265_LOG_ERROR, "fatal: unable to initialize >>> read write lock\n"); >>> + } >>> } >>> >>> ~ThreadSafeInteger() >>> { >>> - pthread_cond_destroy(&m_cond); >>> - pthread_mutex_destroy(&m_mutex); >>> + if (!m_useReadWriteLock) >>> + { >>> + pthread_cond_destroy(&m_cond); >>> + pthread_mutex_destroy(&m_mutex); >>> + } >>> + else >>> + pthread_rwlock_destroy(&m_rwlock); >>> } >>> >>> int waitForChange(int prev) >>> { >>> + X265_CHECK(!m_useReadWriteLock, "ThreadSafeInteger with >>> waitForChange should disable read write lock!\n"); >>> pthread_mutex_lock(&m_mutex); >>> - if (m_val == prev) >>> + while (m_val == prev) >>> pthread_cond_wait(&m_cond, &m_mutex); >>> pthread_mutex_unlock(&m_mutex); >>> return m_val; >>> @@ -397,31 +462,62 @@ >>> >>> int get() >>> { >>> - pthread_mutex_lock(&m_mutex); >>> - int ret = m_val; >>> - pthread_mutex_unlock(&m_mutex); >>> + int ret; >>> + if (!m_useReadWriteLock) >>> + { >>> + pthread_mutex_lock(&m_mutex); >>> + ret = m_val; >>> + pthread_mutex_unlock(&m_mutex); >>> + } >>> + else >>> + { >>> + pthread_rwlock_rdlock(&m_rwlock); >>> + ret = m_val; >>> + pthread_rwlock_unlock(&m_rwlock); >>> + } >>> return ret; >>> } >>> >>> int getIncr(int n = 1) >>> { >>> - pthread_mutex_lock(&m_mutex); >>> - int ret = m_val; >>> - m_val += n; >>> - pthread_mutex_unlock(&m_mutex); >>> + int ret; >>> + if (!m_useReadWriteLock) >>> + { >>> + pthread_mutex_lock(&m_mutex); >>> + ret = m_val; >>> + m_val += n; >>> + pthread_mutex_unlock(&m_mutex); >>> + } >>> + else >>> + { >>> + pthread_rwlock_wrlock(&m_rwlock); >>> + ret = m_val; >>> + m_val += n; >>> + pthread_rwlock_unlock(&m_rwlock); >>> + } >>> return ret; >>> } >>> >>> void set(int newval) >>> { >>> - pthread_mutex_lock(&m_mutex); >>> - m_val = newval; >>> - pthread_cond_broadcast(&m_cond); >>> - pthread_mutex_unlock(&m_mutex); >>> + if (!m_useReadWriteLock) >>> + { >>> + pthread_mutex_lock(&m_mutex); >>> + m_val = newval; >>> + pthread_cond_broadcast(&m_cond); >>> + pthread_mutex_unlock(&m_mutex); >>> + } >>> + else >>> + { >>> + pthread_rwlock_wrlock(&m_rwlock); >>> + m_val = newval; >>> + pthread_rwlock_unlock(&m_rwlock); >>> + } >>> } >>> >>> void poke(void) >>> { >>> + X265_CHECK(!m_useReadWriteLock, "ThreadSafeInteger with poke >>> should disable read write lock!\n"); >>> /* awaken all waiting threads, but make no change */ >>> pthread_mutex_lock(&m_mutex); >>> pthread_cond_broadcast(&m_cond); >>> @@ -430,17 +526,28 @@ >>> >>> void incr() >>> { >>> - pthread_mutex_lock(&m_mutex); >>> - m_val++; >>> - pthread_cond_broadcast(&m_cond); >>> - pthread_mutex_unlock(&m_mutex); >>> + if (!m_useReadWriteLock) >>> + { >>> + pthread_mutex_lock(&m_mutex); >>> + m_val++; >>> + pthread_cond_broadcast(&m_cond); >>> + pthread_mutex_unlock(&m_mutex); >>> + } >>> + else >>> + { >>> + pthread_rwlock_wrlock(&m_rwlock); >>> + m_val++; >>> + pthread_rwlock_unlock(&m_rwlock); >>> + } >>> } >>> >>> protected: >>> >>> - pthread_mutex_t m_mutex; >>> - pthread_cond_t m_cond; >>> - int m_val; >>> + pthread_mutex_t m_mutex; >>> + pthread_rwlock_t m_rwlock; >>> + pthread_cond_t m_cond; >>> + int m_val; >>> + bool m_useReadWriteLock; >>> }; >>> >>> #endif // ifdef _WIN32 >>> diff -r 5b01678f6fb4 -r 601877ef465c source/common/threadpool.h >>> --- a/source/common/threadpool.h Sat Apr 02 19:08:49 2016 +0100 >>> +++ b/source/common/threadpool.h Sun Apr 03 16:29:59 2016 +0800 >>> @@ -129,7 +129,7 @@ >>> int m_jobTotal; >>> int m_jobAcquired; >>> >>> - BondedTaskGroup() { m_bondedPeerCount = m_jobTotal = m_jobAcquired >>> = 0; } >>> + BondedTaskGroup() { m_bondedPeerCount = m_jobTotal = m_jobAcquired >>> = 0; m_exitedPeerCount.init(); } >>> >>> /* Do not allow the instance to be destroyed before all bonded >>> peers have >>> * exited processTasks() */ >>> diff -r 5b01678f6fb4 -r 601877ef465c source/encoder/framefilter.h >>> --- a/source/encoder/framefilter.h Sat Apr 02 19:08:49 2016 +0100 >>> +++ b/source/encoder/framefilter.h Sun Apr 03 16:29:59 2016 +0800 >>> @@ -82,6 +82,9 @@ >>> , m_encData(NULL) >>> , m_prevRow(NULL) >>> { >>> + m_lastCol.init(true); >>> + m_allowedCol.init(true); >>> + m_lastDeblocked.init(true); >>> } >>> >>> ~ParallelFilter() >>> diff -r 5b01678f6fb4 -r 601877ef465c source/encoder/ratecontrol.cpp >>> --- a/source/encoder/ratecontrol.cpp Sat Apr 02 19:08:49 2016 +0100 >>> +++ b/source/encoder/ratecontrol.cpp Sun Apr 03 16:29:59 2016 +0800 >>> @@ -166,6 +166,7 @@ >>> >>> RateControl::RateControl(x265_param& p) >>> { >>> + m_startEndOrder.init(); >>> m_param = &p; >>> int lowresCuWidth = ((m_param->sourceWidth / 2) + >>> X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS; >>> int lowresCuHeight = ((m_param->sourceHeight / 2) + >>> X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS; >>> diff -r 5b01678f6fb4 -r 601877ef465c source/input/y4m.cpp >>> --- a/source/input/y4m.cpp Sat Apr 02 19:08:49 2016 +0100 >>> +++ b/source/input/y4m.cpp Sun Apr 03 16:29:59 2016 +0800 >>> @@ -43,6 +43,8 @@ >>> >>> Y4MInput::Y4MInput(InputFileInfo& info) >>> { >>> + readCount.init(); >>> + writeCount.init(); >>> for (int i = 0; i < QUEUE_SIZE; i++) >>> buf[i] = NULL; >>> >>> diff -r 5b01678f6fb4 -r 601877ef465c source/input/yuv.cpp >>> --- a/source/input/yuv.cpp Sat Apr 02 19:08:49 2016 +0100 >>> +++ b/source/input/yuv.cpp Sun Apr 03 16:29:59 2016 +0800 >>> @@ -41,6 +41,8 @@ >>> >>> YUVInput::YUVInput(InputFileInfo& info) >>> { >>> + readCount.init(); >>> + writeCount.init(); >>> for (int i = 0; i < QUEUE_SIZE; i++) >>> buf[i] = NULL; >>> >>> diff -r 5b01678f6fb4 -r 601877ef465c source/output/reconplay.cpp >>> --- a/source/output/reconplay.cpp Sat Apr 02 19:08:49 2016 +0100 >>> +++ b/source/output/reconplay.cpp Sun Apr 03 16:29:59 2016 +0800 >>> @@ -54,7 +54,8 @@ >>> if (signal(SIGPIPE, sigpipe_handler) == SIG_ERR) >>> general_log(¶m, "exec", X265_LOG_ERROR, "Unable to register >>> SIGPIPE handler: %s\n", strerror(errno)); >>> #endif >>> - >>> + readCount.init(); >>> + writeCount.init(); >>> width = param.sourceWidth; >>> height = param.sourceHeight; >>> colorSpace = param.internalCsp; >>> >>> >>> _______________________________________________ >>> x265-devel mailing list >>> [email protected] >>> <javascript:_e(%7B%7D,'cvml','[email protected]');> >>> https://mailman.videolan.org/listinfo/x265-devel >>> >> >> >> >> -- >> Deepthi Nandakumar >> Engineering Manager, x265 >> Multicoreware, Inc >> >> _______________________________________________ >> x265-devel mailing list >> [email protected] >> <javascript:_e(%7B%7D,'cvml','[email protected]');> >> https://mailman.videolan.org/listinfo/x265-devel >> >> >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
