Gallium already has os_malloc_aligned (in src/gallium/auxiliary/os/os_memory_stdc.h) for this purpose. Does this duplication add anything?
On 05/17/2016 03:36 PM, Tim Rowley wrote: > --- > src/gallium/drivers/swr/rasterizer/common/os.h | 19 +++++++++++++++++-- > src/gallium/drivers/swr/rasterizer/core/api.cpp | 16 ++++++++-------- > src/gallium/drivers/swr/rasterizer/core/arena.h | 4 ++-- > src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 8 ++++---- > src/gallium/drivers/swr/rasterizer/core/ringbuffer.h | 4 ++-- > src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp | 2 +- > src/gallium/drivers/swr/rasterizer/core/tilemgr.h | 4 ++-- > src/gallium/drivers/swr/swr_screen.cpp | 8 ++++---- > 8 files changed, 40 insertions(+), 25 deletions(-) > > diff --git a/src/gallium/drivers/swr/rasterizer/common/os.h > b/src/gallium/drivers/swr/rasterizer/common/os.h > index 1d68585..8b15670 100644 > --- a/src/gallium/drivers/swr/rasterizer/common/os.h > +++ b/src/gallium/drivers/swr/rasterizer/common/os.h > @@ -49,6 +49,16 @@ > > #define PRAGMA_WARNING_POP() __pragma(warning(pop)) > > +static inline void *AlignedMalloc(size_t _Size, size_t _Alignment) > +{ > + return _aligned_malloc(_Size, _Alignment); > +} > + > +static inline void AlignedFree(void* p) > +{ > + return _aligned_free(p); > +} > + > #if defined(_WIN64) > #define BitScanReverseSizeT BitScanReverse64 > #define BitScanForwardSizeT BitScanForward64 > @@ -155,7 +165,7 @@ unsigned char _BitScanReverse(unsigned int *Index, > unsigned int Mask) > } > > inline > -void *_aligned_malloc(unsigned int size, unsigned int alignment) > +void *AlignedMalloc(unsigned int size, unsigned int alignment) > { > void *ret; > if (posix_memalign(&ret, alignment, size)) > @@ -171,12 +181,17 @@ unsigned char _bittest(const LONG *a, LONG b) > return ((*(unsigned *)(a) & (1 << b)) != 0); > } > > +static inline > +void AlignedFree(void* p) > +{ > + free(p); > +} > + > #define GetCurrentProcessId getpid > #define GetCurrentThreadId gettid > > #define CreateDirectory(name, pSecurity) mkdir(name, 0777) > > -#define _aligned_free free > #define InterlockedCompareExchange(Dest, Exchange, Comparand) > __sync_val_compare_and_swap(Dest, Comparand, Exchange) > #define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, > Value) > #define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1) > diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp > b/src/gallium/drivers/swr/rasterizer/core/api.cpp > index 3c25370..9d6f250 100644 > --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp > +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp > @@ -57,7 +57,7 @@ HANDLE SwrCreateContext( > RDTSC_RESET(); > RDTSC_INIT(0); > > - void* pContextMem = _aligned_malloc(sizeof(SWR_CONTEXT), KNOB_SIMD_WIDTH > * 4); > + void* pContextMem = AlignedMalloc(sizeof(SWR_CONTEXT), KNOB_SIMD_WIDTH * > 4); > memset(pContextMem, 0, sizeof(SWR_CONTEXT)); > SWR_CONTEXT *pContext = new (pContextMem) SWR_CONTEXT(); > > @@ -67,8 +67,8 @@ HANDLE SwrCreateContext( > pContext->dcRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT); > pContext->dsRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT); > > - pContext->pMacroTileManagerArray = > (MacroTileMgr*)_aligned_malloc(sizeof(MacroTileMgr) * > KNOB_MAX_DRAWS_IN_FLIGHT, 64); > - pContext->pDispatchQueueArray = > (DispatchQueue*)_aligned_malloc(sizeof(DispatchQueue) * > KNOB_MAX_DRAWS_IN_FLIGHT, 64); > + pContext->pMacroTileManagerArray = > (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * KNOB_MAX_DRAWS_IN_FLIGHT, > 64); > + pContext->pDispatchQueueArray = > (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * > KNOB_MAX_DRAWS_IN_FLIGHT, 64); > > for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc) > { > @@ -110,7 +110,7 @@ HANDLE SwrCreateContext( > MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE, > numaNode); > #else > - pContext->pScratch[i] = (uint8_t*)_aligned_malloc(32 * > sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4); > + pContext->pScratch[i] = (uint8_t*)AlignedMalloc(32 * > sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4); > #endif > } > > @@ -152,8 +152,8 @@ void SwrDestroyContext(HANDLE hContext) > pContext->pDispatchQueueArray[i].~DispatchQueue(); > } > > - _aligned_free(pContext->pDispatchQueueArray); > - _aligned_free(pContext->pMacroTileManagerArray); > + AlignedFree(pContext->pDispatchQueueArray); > + AlignedFree(pContext->pMacroTileManagerArray); > > // Free scratch space. > for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i) > @@ -161,14 +161,14 @@ void SwrDestroyContext(HANDLE hContext) > #if defined(_WIN32) > VirtualFree(pContext->pScratch[i], 0, MEM_RELEASE); > #else > - _aligned_free(pContext->pScratch[i]); > + AlignedFree(pContext->pScratch[i]); > #endif > } > > delete(pContext->pHotTileMgr); > > pContext->~SWR_CONTEXT(); > - _aligned_free((SWR_CONTEXT*)hContext); > + AlignedFree((SWR_CONTEXT*)hContext); > } > > void CopyState(DRAW_STATE& dst, const DRAW_STATE& src) > diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h > b/src/gallium/drivers/swr/rasterizer/core/arena.h > index 26785db..1db0972 100644 > --- a/src/gallium/drivers/swr/rasterizer/core/arena.h > +++ b/src/gallium/drivers/swr/rasterizer/core/arena.h > @@ -54,7 +54,7 @@ public: > { > SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock)); > > - ArenaBlock* p = new (_aligned_malloc(size, align)) ArenaBlock(); > + ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock(); > p->blockSize = size; > return p; > } > @@ -64,7 +64,7 @@ public: > if (pMem) > { > SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd)); > - _aligned_free(pMem); > + AlignedFree(pMem); > } > } > }; > diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp > b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp > index 6bb9b12..d6643c6 100644 > --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp > +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp > @@ -946,7 +946,7 @@ static void AllocateTessellationData(SWR_CONTEXT* > pContext) > if (gt_pTessellationThreadData == nullptr) > { > gt_pTessellationThreadData = (TessellationThreadLocalData*) > - _aligned_malloc(sizeof(TessellationThreadLocalData), 64); > + AlignedMalloc(sizeof(TessellationThreadLocalData), 64); > memset(gt_pTessellationThreadData, 0, > sizeof(*gt_pTessellationThreadData)); > } > } > @@ -985,7 +985,7 @@ static void TessellationStages( > gt_pTessellationThreadData->tsCtxSize); > if (tsCtx == nullptr) > { > - gt_pTessellationThreadData->pTxCtx = > _aligned_malloc(gt_pTessellationThreadData->tsCtxSize, 64); > + gt_pTessellationThreadData->pTxCtx = > AlignedMalloc(gt_pTessellationThreadData->tsCtxSize, 64); > tsCtx = TSInitCtx( > tsState.domain, > tsState.partitioning, > @@ -1063,8 +1063,8 @@ static void TessellationStages( > size_t requiredAllocSize = sizeof(simdvector) * > requiredDSOutputVectors; > if (requiredDSOutputVectors > > gt_pTessellationThreadData->numDSOutputVectors) > { > - _aligned_free(gt_pTessellationThreadData->pDSOutput); > - gt_pTessellationThreadData->pDSOutput = > (simdscalar*)_aligned_malloc(requiredAllocSize, 64); > + AlignedFree(gt_pTessellationThreadData->pDSOutput); > + gt_pTessellationThreadData->pDSOutput = > (simdscalar*)AlignedMalloc(requiredAllocSize, 64); > gt_pTessellationThreadData->numDSOutputVectors = > requiredDSOutputVectors; > } > SWR_ASSERT(gt_pTessellationThreadData->pDSOutput); > diff --git a/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h > b/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h > index d8eb50a..b9076de 100644 > --- a/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h > +++ b/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h > @@ -47,14 +47,14 @@ public: > { > SWR_ASSERT(numEntries > 0); > mNumEntries = numEntries; > - mpRingBuffer = (T*)_aligned_malloc(sizeof(T)*numEntries, 64); > + mpRingBuffer = (T*)AlignedMalloc(sizeof(T)*numEntries, 64); > SWR_ASSERT(mpRingBuffer != nullptr); > memset(mpRingBuffer, 0, sizeof(T)*numEntries); > } > > void Destroy() > { > - _aligned_free(mpRingBuffer); > + AlignedFree(mpRingBuffer); > mpRingBuffer = nullptr; > } > > diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp > b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp > index 87d9f42..e0aa8dd 100644 > --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp > +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp > @@ -181,7 +181,7 @@ HOTTILE* HotTileMgr::GetHotTileNoLoad( > if (create) > { > uint32_t size = numSamples * mHotTileSize[attachment]; > - hotTile.pBuffer = (uint8_t*)_aligned_malloc(size, > KNOB_SIMD_WIDTH * 4); > + hotTile.pBuffer = (uint8_t*)AlignedMalloc(size, KNOB_SIMD_WIDTH > * 4); > hotTile.state = HOTTILE_INVALID; > hotTile.numSamples = numSamples; > hotTile.renderTargetArrayIndex = 0; > diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h > b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h > index 82a15e1..41d29ba 100644 > --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h > +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h > @@ -313,7 +313,7 @@ private: > HANDLE hProcess = GetCurrentProcess(); > p = VirtualAllocExNuma(hProcess, nullptr, size, MEM_COMMIT | > MEM_RESERVE, PAGE_READWRITE, numaNode); > #else > - p = _aligned_malloc(size, align); > + p = AlignedMalloc(size, align); > #endif > > return p; > @@ -326,7 +326,7 @@ private: > #if defined(_WIN32) > VirtualFree(pBuffer, 0, MEM_RELEASE); > #else > - _aligned_free(pBuffer); > + AlignedFree(pBuffer); > #endif > } > } > diff --git a/src/gallium/drivers/swr/swr_screen.cpp > b/src/gallium/drivers/swr/swr_screen.cpp > index 3d280e3..90ad220 100644 > --- a/src/gallium/drivers/swr/swr_screen.cpp > +++ b/src/gallium/drivers/swr/swr_screen.cpp > @@ -558,7 +558,7 @@ swr_texture_layout(struct swr_screen *screen, > res->swr.pitch = res->row_stride[0]; > > if (allocate) { > - res->swr.pBaseAddress = (uint8_t *)_aligned_malloc(total_size, 64); > + res->swr.pBaseAddress = (uint8_t *)AlignedMalloc(total_size, 64); > > if (res->has_depth && res->has_stencil) { > SWR_FORMAT_INFO finfo = GetFormatInfo(res->secondary.format); > @@ -571,7 +571,7 @@ swr_texture_layout(struct swr_screen *screen, > res->secondary.numSamples = (1 << pt->nr_samples); > res->secondary.pitch = res->alignedWidth * finfo.Bpp; > > - res->secondary.pBaseAddress = (uint8_t *)_aligned_malloc( > + res->secondary.pBaseAddress = (uint8_t *)AlignedMalloc( > res->alignedHeight * res->secondary.pitch, 64); > } > } > @@ -663,9 +663,9 @@ swr_resource_destroy(struct pipe_screen *p_screen, struct > pipe_resource *pt) > struct sw_winsys *winsys = screen->winsys; > winsys->displaytarget_destroy(winsys, spr->display_target); > } else > - _aligned_free(spr->swr.pBaseAddress); > + AlignedFree(spr->swr.pBaseAddress); > > - _aligned_free(spr->secondary.pBaseAddress); > + AlignedFree(spr->secondary.pBaseAddress); > > FREE(spr); > } > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev