/**************************************************************************** * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * @file arena.h * * @brief Arena memory manager * The arena is convenient and fast for managing allocations for any of * our allocations that are associated with operations and can all be freed * once when their operation has completed. Allocations are cheap since * most of the time its simply an increment of an offset. Also, no need to * free individual allocations. All of the arena memory can be freed at once. * ******************************************************************************/ #pragma once #include #include #include #include "core/utils.h" static const size_t ARENA_BLOCK_ALIGN = 64; struct ArenaBlock { size_t blockSize = 0; ArenaBlock* pNext = nullptr; }; static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, "Increase BLOCK_ALIGN size"); class DefaultAllocator { public: ArenaBlock* AllocateAligned(size_t size, size_t align) { SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock)); ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock(); p->blockSize = size; return p; } void Free(ArenaBlock* pMem) { if (pMem) { SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd)); AlignedFree(pMem); } } }; // Caching Allocator for Arena template struct CachingAllocatorT : DefaultAllocator { ArenaBlock* AllocateAligned(size_t size, size_t align) { SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock)); SWR_ASSUME_ASSERT(size <= uint32_t(-1)); uint32_t bucket = GetBucketId(size); { // search cached blocks std::lock_guard l(m_mutex); ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket]; ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align); if (pBlock) { m_cachedSize -= pBlock->blockSize; if (pBlock == m_pLastCachedBlocks[bucket]) { m_pLastCachedBlocks[bucket] = pPrevBlock; } } else { pPrevBlock = &m_oldCachedBlocks[bucket]; pBlock = SearchBlocks(pPrevBlock, size, align); if (pBlock) { m_oldCachedSize -= pBlock->blockSize; if (pBlock == m_pOldLastCachedBlocks[bucket]) { m_pOldLastCachedBlocks[bucket] = pPrevBlock; } } } if (pBlock) { SWR_ASSUME_ASSERT(pPrevBlock && pPrevBlock->pNext == pBlock); pPrevBlock->pNext = pBlock->pNext; pBlock->pNext = nullptr; return pBlock; } m_totalAllocated += size; #if 0 { static uint32_t count = 0; char buf[128]; sprintf_s(buf, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count, uint64_t(size), uint64_t(m_totalAllocated)); OutputDebugStringA(buf); } #endif } if (bucket && bucket < (CACHE_NUM_BUCKETS - 1)) { // Make all blocks in this bucket the same size size = size_t(1) << (bucket + 1 + CACHE_START_BUCKET_BIT); } return this->DefaultAllocator::AllocateAligned(size, align); } void Free(ArenaBlock* pMem) { if (pMem) { std::unique_lock l(m_mutex); InsertCachedBlock(GetBucketId(pMem->blockSize), pMem); } } void FreeOldBlocks() { if (!m_cachedSize) { return; } std::lock_guard l(m_mutex); bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE); for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i) { if (doFree) { ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext; while (pBlock) { ArenaBlock* pNext = pBlock->pNext; m_oldCachedSize -= pBlock->blockSize; m_totalAllocated -= pBlock->blockSize; this->DefaultAllocator::Free(pBlock); pBlock = pNext; } m_oldCachedBlocks[i].pNext = nullptr; m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i]; } if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i]) { if (i && i < (CACHE_NUM_BUCKETS - 1)) { // We know that all blocks are the same size. // Just move the list over. m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext; m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext; m_cachedBlocks[i].pNext = nullptr; if (m_pOldLastCachedBlocks[i]->pNext) { m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i]; } m_pLastCachedBlocks[i] = &m_cachedBlocks[i]; } else { // The end buckets can have variable sized lists. // Insert each block based on size ArenaBlock* pBlock = m_cachedBlocks[i].pNext; while (pBlock) { ArenaBlock* pNext = pBlock->pNext; pBlock->pNext = nullptr; m_cachedSize -= pBlock->blockSize; InsertCachedBlock(i, pBlock); pBlock = pNext; } m_pLastCachedBlocks[i] = &m_cachedBlocks[i]; m_cachedBlocks[i].pNext = nullptr; } } } m_oldCachedSize += m_cachedSize; m_cachedSize = 0; } CachingAllocatorT() { for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i) { m_pLastCachedBlocks[i] = &m_cachedBlocks[i]; m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i]; } } ~CachingAllocatorT() { // Free all cached blocks for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i) { ArenaBlock* pBlock = m_cachedBlocks[i].pNext; while (pBlock) { ArenaBlock* pNext = pBlock->pNext; this->DefaultAllocator::Free(pBlock); pBlock = pNext; } pBlock = m_oldCachedBlocks[i].pNext; while (pBlock) { ArenaBlock* pNext = pBlock->pNext; this->DefaultAllocator::Free(pBlock); pBlock = pNext; } } } private: static uint32_t GetBucketId(size_t blockSize) { uint32_t bucketId = 0; #if defined(BitScanReverseSizeT) BitScanReverseSizeT((unsigned long*)&bucketId, (blockSize - 1) >> CACHE_START_BUCKET_BIT); bucketId = std::min(bucketId, CACHE_NUM_BUCKETS - 1); #endif return bucketId; } template void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock) { SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS); ArenaBlock* pPrevBlock = OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId]; ArenaBlock* pBlock = pPrevBlock->pNext; while (pBlock) { if (pNewBlock->blockSize >= pBlock->blockSize) { // Insert here break; } pPrevBlock = pBlock; pBlock = pBlock->pNext; } // Insert into list SWR_ASSUME_ASSERT(pPrevBlock); pPrevBlock->pNext = pNewBlock; pNewBlock->pNext = pBlock; if (OldBlockT) { if (m_pOldLastCachedBlocks[bucketId] == pPrevBlock) { m_pOldLastCachedBlocks[bucketId] = pNewBlock; } m_oldCachedSize += pNewBlock->blockSize; } else { if (m_pLastCachedBlocks[bucketId] == pPrevBlock) { m_pLastCachedBlocks[bucketId] = pNewBlock; } m_cachedSize += pNewBlock->blockSize; } } static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align) { ArenaBlock* pBlock = pPrevBlock->pNext; ArenaBlock* pPotentialBlock = nullptr; ArenaBlock* pPotentialPrev = nullptr; while (pBlock) { if (pBlock->blockSize >= blockSize) { if (pBlock == AlignUp(pBlock, align)) { if (pBlock->blockSize == blockSize) { // Won't find a better match break; } // We could use this as it is larger than we wanted, but // continue to search for a better match pPotentialBlock = pBlock; pPotentialPrev = pPrevBlock; } } else { // Blocks are sorted by size (biggest first) // So, if we get here, there are no blocks // large enough, fall through to allocation. pBlock = nullptr; break; } pPrevBlock = pBlock; pBlock = pBlock->pNext; } if (!pBlock) { // Couldn't find an exact match, use next biggest size pBlock = pPotentialBlock; pPrevBlock = pPotentialPrev; } return pBlock; } // buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ... static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT; static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT; static const size_t MAX_UNUSED_SIZE = sizeof(MEGABYTE); ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS]; ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS]; ArenaBlock m_oldCachedBlocks[CACHE_NUM_BUCKETS]; ArenaBlock* m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS]; std::mutex m_mutex; size_t m_totalAllocated = 0; size_t m_cachedSize = 0; size_t m_oldCachedSize = 0; }; typedef CachingAllocatorT<> CachingAllocator; template class TArena { public: TArena(T& in_allocator) : m_allocator(in_allocator) {} TArena() : m_allocator(m_defAllocator) {} ~TArena() { Reset(true); } void* AllocAligned(size_t size, size_t align) { if (0 == size) { return nullptr; } SWR_ASSERT(align <= ARENA_BLOCK_ALIGN); if (m_pCurBlock) { ArenaBlock* pCurBlock = m_pCurBlock; size_t offset = AlignUp(m_offset, align); if ((offset + size) <= pCurBlock->blockSize) { void* pMem = PtrAdd(pCurBlock, offset); m_offset = offset + size; return pMem; } // Not enough memory in this block, fall through to allocate // a new block } static const size_t ArenaBlockSize = BlockSizeT; size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize); // Add in one BLOCK_ALIGN unit to store ArenaBlock in. blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN); ArenaBlock* pNewBlock = m_allocator.AllocateAligned( blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned. SWR_ASSERT(pNewBlock != nullptr); if (pNewBlock != nullptr) { m_offset = ARENA_BLOCK_ALIGN; pNewBlock->pNext = m_pCurBlock; m_pCurBlock = pNewBlock; } return AllocAligned(size, align); } void* Alloc(size_t size) { return AllocAligned(size, 1); } void* AllocAlignedSync(size_t size, size_t align) { void* pAlloc = nullptr; m_mutex.lock(); pAlloc = AllocAligned(size, align); m_mutex.unlock(); return pAlloc; } void* AllocSync(size_t size) { void* pAlloc = nullptr; m_mutex.lock(); pAlloc = Alloc(size); m_mutex.unlock(); return pAlloc; } void Reset(bool removeAll = false) { m_offset = ARENA_BLOCK_ALIGN; if (m_pCurBlock) { ArenaBlock* pUsedBlocks = m_pCurBlock->pNext; m_pCurBlock->pNext = nullptr; while (pUsedBlocks) { ArenaBlock* pBlock = pUsedBlocks; pUsedBlocks = pBlock->pNext; m_allocator.Free(pBlock); } if (removeAll) { m_allocator.Free(m_pCurBlock); m_pCurBlock = nullptr; } } } bool IsEmpty() { return (m_pCurBlock == nullptr) || (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr); } private: ArenaBlock* m_pCurBlock = nullptr; size_t m_offset = ARENA_BLOCK_ALIGN; /// @note Mutex is only used by sync allocation functions. std::mutex m_mutex; DefaultAllocator m_defAllocator; T& m_allocator; }; using StdArena = TArena; using CachingArena = TArena;