Mesa has a number of utility functions that would be nice to use, but the swr/rasterizer directory is intended to be usable standalone, so it has some duplication of common utilities.
-Tim > On May 17, 2016, at 6:07 PM, Ian Romanick <i...@freedesktop.org> wrote: > > Gallium already has os_malloc_aligned (in > src/gallium/auxiliary/os/os_memory_stdc.h) for this purpose. Does this > duplication add anything? > > On 05/17/2016 03:36 PM, Tim Rowley wrote: >> --- >> src/gallium/drivers/swr/rasterizer/common/os.h | 19 +++++++++++++++++-- >> src/gallium/drivers/swr/rasterizer/core/api.cpp | 16 ++++++++-------- >> src/gallium/drivers/swr/rasterizer/core/arena.h | 4 ++-- >> src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 8 ++++---- >> src/gallium/drivers/swr/rasterizer/core/ringbuffer.h | 4 ++-- >> src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp | 2 +- >> src/gallium/drivers/swr/rasterizer/core/tilemgr.h | 4 ++-- >> src/gallium/drivers/swr/swr_screen.cpp | 8 ++++---- >> 8 files changed, 40 insertions(+), 25 deletions(-) >> >> diff --git a/src/gallium/drivers/swr/rasterizer/common/os.h >> b/src/gallium/drivers/swr/rasterizer/common/os.h >> index 1d68585..8b15670 100644 >> --- a/src/gallium/drivers/swr/rasterizer/common/os.h >> +++ b/src/gallium/drivers/swr/rasterizer/common/os.h >> @@ -49,6 +49,16 @@ >> >> #define PRAGMA_WARNING_POP() __pragma(warning(pop)) >> >> +static inline void *AlignedMalloc(size_t _Size, size_t _Alignment) >> +{ >> + return _aligned_malloc(_Size, _Alignment); >> +} >> + >> +static inline void AlignedFree(void* p) >> +{ >> + return _aligned_free(p); >> +} >> + >> #if defined(_WIN64) >> #define BitScanReverseSizeT BitScanReverse64 >> #define BitScanForwardSizeT BitScanForward64 >> @@ -155,7 +165,7 @@ unsigned char _BitScanReverse(unsigned int *Index, >> unsigned int Mask) >> } >> >> inline >> -void *_aligned_malloc(unsigned int size, unsigned int alignment) >> +void *AlignedMalloc(unsigned int size, unsigned int alignment) >> { >> void *ret; >> if (posix_memalign(&ret, alignment, size)) >> @@ -171,12 +181,17 @@ unsigned char _bittest(const LONG *a, LONG b) >> return ((*(unsigned *)(a) & (1 << b)) != 0); >> } >> >> +static inline >> +void AlignedFree(void* p) >> +{ >> + free(p); >> +} >> + >> #define GetCurrentProcessId getpid >> #define GetCurrentThreadId gettid >> >> #define CreateDirectory(name, pSecurity) mkdir(name, 0777) >> >> -#define _aligned_free free >> #define InterlockedCompareExchange(Dest, Exchange, Comparand) >> __sync_val_compare_and_swap(Dest, Comparand, Exchange) >> #define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, >> Value) >> #define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1) >> diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp >> b/src/gallium/drivers/swr/rasterizer/core/api.cpp >> index 3c25370..9d6f250 100644 >> --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp >> +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp >> @@ -57,7 +57,7 @@ HANDLE SwrCreateContext( >> RDTSC_RESET(); >> RDTSC_INIT(0); >> >> - void* pContextMem = _aligned_malloc(sizeof(SWR_CONTEXT), >> KNOB_SIMD_WIDTH * 4); >> + void* pContextMem = AlignedMalloc(sizeof(SWR_CONTEXT), KNOB_SIMD_WIDTH >> * 4); >> memset(pContextMem, 0, sizeof(SWR_CONTEXT)); >> SWR_CONTEXT *pContext = new (pContextMem) SWR_CONTEXT(); >> >> @@ -67,8 +67,8 @@ HANDLE SwrCreateContext( >> pContext->dcRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT); >> pContext->dsRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT); >> >> - pContext->pMacroTileManagerArray = >> (MacroTileMgr*)_aligned_malloc(sizeof(MacroTileMgr) * >> KNOB_MAX_DRAWS_IN_FLIGHT, 64); >> - pContext->pDispatchQueueArray = >> (DispatchQueue*)_aligned_malloc(sizeof(DispatchQueue) * >> KNOB_MAX_DRAWS_IN_FLIGHT, 64); >> + pContext->pMacroTileManagerArray = >> (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * >> KNOB_MAX_DRAWS_IN_FLIGHT, 64); >> + pContext->pDispatchQueueArray = >> (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * >> KNOB_MAX_DRAWS_IN_FLIGHT, 64); >> >> for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc) >> { >> @@ -110,7 +110,7 @@ HANDLE SwrCreateContext( >> MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE, >> numaNode); >> #else >> - pContext->pScratch[i] = (uint8_t*)_aligned_malloc(32 * >> sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4); >> + pContext->pScratch[i] = (uint8_t*)AlignedMalloc(32 * >> sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4); >> #endif >> } >> >> @@ -152,8 +152,8 @@ void SwrDestroyContext(HANDLE hContext) >> pContext->pDispatchQueueArray[i].~DispatchQueue(); >> } >> >> - _aligned_free(pContext->pDispatchQueueArray); >> - _aligned_free(pContext->pMacroTileManagerArray); >> + AlignedFree(pContext->pDispatchQueueArray); >> + AlignedFree(pContext->pMacroTileManagerArray); >> >> // Free scratch space. >> for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i) >> @@ -161,14 +161,14 @@ void SwrDestroyContext(HANDLE hContext) >> #if defined(_WIN32) >> VirtualFree(pContext->pScratch[i], 0, MEM_RELEASE); >> #else >> - _aligned_free(pContext->pScratch[i]); >> + AlignedFree(pContext->pScratch[i]); >> #endif >> } >> >> delete(pContext->pHotTileMgr); >> >> pContext->~SWR_CONTEXT(); >> - _aligned_free((SWR_CONTEXT*)hContext); >> + AlignedFree((SWR_CONTEXT*)hContext); >> } >> >> void CopyState(DRAW_STATE& dst, const DRAW_STATE& src) >> diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h >> b/src/gallium/drivers/swr/rasterizer/core/arena.h >> index 26785db..1db0972 100644 >> --- a/src/gallium/drivers/swr/rasterizer/core/arena.h >> +++ b/src/gallium/drivers/swr/rasterizer/core/arena.h >> @@ -54,7 +54,7 @@ public: >> { >> SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock)); >> >> - ArenaBlock* p = new (_aligned_malloc(size, align)) ArenaBlock(); >> + ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock(); >> p->blockSize = size; >> return p; >> } >> @@ -64,7 +64,7 @@ public: >> if (pMem) >> { >> SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd)); >> - _aligned_free(pMem); >> + AlignedFree(pMem); >> } >> } >> }; >> diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp >> b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp >> index 6bb9b12..d6643c6 100644 >> --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp >> +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp >> @@ -946,7 +946,7 @@ static void AllocateTessellationData(SWR_CONTEXT* >> pContext) >> if (gt_pTessellationThreadData == nullptr) >> { >> gt_pTessellationThreadData = (TessellationThreadLocalData*) >> - _aligned_malloc(sizeof(TessellationThreadLocalData), 64); >> + AlignedMalloc(sizeof(TessellationThreadLocalData), 64); >> memset(gt_pTessellationThreadData, 0, >> sizeof(*gt_pTessellationThreadData)); >> } >> } >> @@ -985,7 +985,7 @@ static void TessellationStages( >> gt_pTessellationThreadData->tsCtxSize); >> if (tsCtx == nullptr) >> { >> - gt_pTessellationThreadData->pTxCtx = >> _aligned_malloc(gt_pTessellationThreadData->tsCtxSize, 64); >> + gt_pTessellationThreadData->pTxCtx = >> AlignedMalloc(gt_pTessellationThreadData->tsCtxSize, 64); >> tsCtx = TSInitCtx( >> tsState.domain, >> tsState.partitioning, >> @@ -1063,8 +1063,8 @@ static void TessellationStages( >> size_t requiredAllocSize = sizeof(simdvector) * >> requiredDSOutputVectors; >> if (requiredDSOutputVectors > >> gt_pTessellationThreadData->numDSOutputVectors) >> { >> - _aligned_free(gt_pTessellationThreadData->pDSOutput); >> - gt_pTessellationThreadData->pDSOutput = >> (simdscalar*)_aligned_malloc(requiredAllocSize, 64); >> + AlignedFree(gt_pTessellationThreadData->pDSOutput); >> + gt_pTessellationThreadData->pDSOutput = >> (simdscalar*)AlignedMalloc(requiredAllocSize, 64); >> gt_pTessellationThreadData->numDSOutputVectors = >> requiredDSOutputVectors; >> } >> SWR_ASSERT(gt_pTessellationThreadData->pDSOutput); >> diff --git a/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h >> b/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h >> index d8eb50a..b9076de 100644 >> --- a/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h >> +++ b/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h >> @@ -47,14 +47,14 @@ public: >> { >> SWR_ASSERT(numEntries > 0); >> mNumEntries = numEntries; >> - mpRingBuffer = (T*)_aligned_malloc(sizeof(T)*numEntries, 64); >> + mpRingBuffer = (T*)AlignedMalloc(sizeof(T)*numEntries, 64); >> SWR_ASSERT(mpRingBuffer != nullptr); >> memset(mpRingBuffer, 0, sizeof(T)*numEntries); >> } >> >> void Destroy() >> { >> - _aligned_free(mpRingBuffer); >> + AlignedFree(mpRingBuffer); >> mpRingBuffer = nullptr; >> } >> >> diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp >> b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp >> index 87d9f42..e0aa8dd 100644 >> --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp >> +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp >> @@ -181,7 +181,7 @@ HOTTILE* HotTileMgr::GetHotTileNoLoad( >> if (create) >> { >> uint32_t size = numSamples * mHotTileSize[attachment]; >> - hotTile.pBuffer = (uint8_t*)_aligned_malloc(size, >> KNOB_SIMD_WIDTH * 4); >> + hotTile.pBuffer = (uint8_t*)AlignedMalloc(size, KNOB_SIMD_WIDTH >> * 4); >> hotTile.state = HOTTILE_INVALID; >> hotTile.numSamples = numSamples; >> hotTile.renderTargetArrayIndex = 0; >> diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h >> b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h >> index 82a15e1..41d29ba 100644 >> --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h >> +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h >> @@ -313,7 +313,7 @@ private: >> HANDLE hProcess = GetCurrentProcess(); >> p = VirtualAllocExNuma(hProcess, nullptr, size, MEM_COMMIT | >> MEM_RESERVE, PAGE_READWRITE, numaNode); >> #else >> - p = _aligned_malloc(size, align); >> + p = AlignedMalloc(size, align); >> #endif >> >> return p; >> @@ -326,7 +326,7 @@ private: >> #if defined(_WIN32) >> VirtualFree(pBuffer, 0, MEM_RELEASE); >> #else >> - _aligned_free(pBuffer); >> + AlignedFree(pBuffer); >> #endif >> } >> } >> diff --git a/src/gallium/drivers/swr/swr_screen.cpp >> b/src/gallium/drivers/swr/swr_screen.cpp >> index 3d280e3..90ad220 100644 >> --- a/src/gallium/drivers/swr/swr_screen.cpp >> +++ b/src/gallium/drivers/swr/swr_screen.cpp >> @@ -558,7 +558,7 @@ swr_texture_layout(struct swr_screen *screen, >> res->swr.pitch = res->row_stride[0]; >> >> if (allocate) { >> - res->swr.pBaseAddress = (uint8_t *)_aligned_malloc(total_size, 64); >> + res->swr.pBaseAddress = (uint8_t *)AlignedMalloc(total_size, 64); >> >> if (res->has_depth && res->has_stencil) { >> SWR_FORMAT_INFO finfo = GetFormatInfo(res->secondary.format); >> @@ -571,7 +571,7 @@ swr_texture_layout(struct swr_screen *screen, >> res->secondary.numSamples = (1 << pt->nr_samples); >> res->secondary.pitch = res->alignedWidth * finfo.Bpp; >> >> - res->secondary.pBaseAddress = (uint8_t *)_aligned_malloc( >> + res->secondary.pBaseAddress = (uint8_t *)AlignedMalloc( >> res->alignedHeight * res->secondary.pitch, 64); >> } >> } >> @@ -663,9 +663,9 @@ swr_resource_destroy(struct pipe_screen *p_screen, >> struct pipe_resource *pt) >> struct sw_winsys *winsys = screen->winsys; >> winsys->displaytarget_destroy(winsys, spr->display_target); >> } else >> - _aligned_free(spr->swr.pBaseAddress); >> + AlignedFree(spr->swr.pBaseAddress); >> >> - _aligned_free(spr->secondary.pBaseAddress); >> + AlignedFree(spr->secondary.pBaseAddress); >> >> FREE(spr); >> } >> > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev