/**************************************************************************** * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * @file fifo.hpp * * @brief Definitions for our fifos used for thread communication. * ******************************************************************************/ #pragma once #include "common/os.h" #include "arena.h" #include #include template struct QUEUE { OSALIGNLINE(volatile uint32_t) mLock{0}; OSALIGNLINE(volatile uint32_t) mNumEntries{0}; std::vector mBlocks; T* mCurBlock{nullptr}; uint32_t mHead{0}; uint32_t mTail{0}; uint32_t mCurBlockIdx{0}; // power of 2 static const uint32_t mBlockSizeShift = 6; static const uint32_t mBlockSize = 1 << mBlockSizeShift; template void clear(ArenaT& arena) { mHead = 0; mTail = 0; mBlocks.clear(); T* pNewBlock = (T*)arena.AllocAligned(sizeof(T) * mBlockSize, KNOB_SIMD_WIDTH * 4); mBlocks.push_back(pNewBlock); mCurBlock = pNewBlock; mCurBlockIdx = 0; mNumEntries = 0; mLock = 0; } uint32_t getNumQueued() { return mNumEntries; } bool tryLock() { if (mLock) { return false; } // try to lock the FIFO long initial = InterlockedCompareExchange(&mLock, 1, 0); return (initial == 0); } void unlock() { mLock = 0; } T* peek() { if (mNumEntries == 0) { return nullptr; } uint32_t block = mHead >> mBlockSizeShift; return &mBlocks[block][mHead & (mBlockSize - 1)]; } void dequeue_noinc() { mHead++; mNumEntries--; } template bool enqueue_try_nosync(ArenaT& arena, const T* entry) { const float* pSrc = (const float*)entry; float* pDst = (float*)&mCurBlock[mTail]; auto lambda = [&](int32_t i) { __m256 vSrc = _mm256_load_ps(pSrc + i * KNOB_SIMD_WIDTH); _mm256_stream_ps(pDst + i * KNOB_SIMD_WIDTH, vSrc); }; const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH * 4); static_assert(numSimdLines * KNOB_SIMD_WIDTH * 4 == sizeof(T), "FIFO element size should be multiple of SIMD width."); UnrollerL<0, numSimdLines, 1>::step(lambda); mTail++; if (mTail == mBlockSize) { if (++mCurBlockIdx < mBlocks.size()) { mCurBlock = mBlocks[mCurBlockIdx]; } else { T* newBlock = (T*)arena.AllocAligned(sizeof(T) * mBlockSize, KNOB_SIMD_WIDTH * 4); SWR_ASSERT(newBlock); mBlocks.push_back(newBlock); mCurBlock = newBlock; } mTail = 0; } mNumEntries++; return true; } void destroy() {} };