21 #if BT_USE_OPENMP && BT_THREADSAFE 25 #endif // #if BT_USE_OPENMP && BT_THREADSAFE 28 #if BT_USE_PPL && BT_THREADSAFE 35 #endif // #if BT_USE_PPL && BT_THREADSAFE 38 #if BT_USE_TBB && BT_THREADSAFE 41 #define __TBB_NO_IMPLICIT_LINKAGE 1 43 #include <tbb/task_scheduler_init.h> 44 #include <tbb/parallel_for.h> 45 #include <tbb/blocked_range.h> 47 #endif // #if BT_USE_TBB && BT_THREADSAFE 58 #if __cplusplus >= 201103L 62 #define USE_CPP11_ATOMICS 1 64 #elif defined( _MSC_VER ) 67 #define USE_MSVC_INTRINSICS 1 69 #elif defined( __GNUC__ ) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) 73 #define USE_GCC_BUILTIN_ATOMICS 1 75 #elif defined( __GNUC__ ) && (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) 78 #define USE_GCC_BUILTIN_ATOMICS_OLD 1 88 #define THREAD_LOCAL_STATIC thread_local static 92 std::atomic<int>* aDest =
reinterpret_cast<std::atomic<int>*
>(&
mLock);
94 return std::atomic_compare_exchange_weak_explicit( aDest, &expected,
int(1), std::memory_order_acq_rel, std::memory_order_acquire );
108 std::atomic<int>* aDest =
reinterpret_cast<std::atomic<int>*
>(&
mLock);
109 std::atomic_store_explicit( aDest,
int(0), std::memory_order_release );
113 #elif USE_MSVC_INTRINSICS 115 #define WIN32_LEAN_AND_MEAN 120 #define THREAD_LOCAL_STATIC __declspec( thread ) static 125 volatile long* aDest =
reinterpret_cast<long*
>(&
mLock);
126 return ( 0 == _InterlockedCompareExchange( aDest, 1, 0) );
140 volatile long* aDest =
reinterpret_cast<long*
>( &
mLock );
141 _InterlockedExchange( aDest, 0 );
144 #elif USE_GCC_BUILTIN_ATOMICS 146 #define THREAD_LOCAL_STATIC static __thread 153 const int memOrderSuccess = __ATOMIC_ACQ_REL;
154 const int memOrderFail = __ATOMIC_ACQUIRE;
155 return __atomic_compare_exchange_n(&
mLock, &expected,
int(1), weak, memOrderSuccess, memOrderFail);
169 __atomic_store_n(&
mLock,
int(0), __ATOMIC_RELEASE);
172 #elif USE_GCC_BUILTIN_ATOMICS_OLD 175 #define THREAD_LOCAL_STATIC static __thread 179 return __sync_bool_compare_and_swap(&
mLock,
int(0),
int(1));
194 __sync_fetch_and_and(&
mLock,
int(0));
197 #else //#elif USE_MSVC_INTRINSICS 199 #error "no threading primitives defined -- unknown platform" 201 #endif //#else //#elif USE_MSVC_INTRINSICS 203 #else //#if BT_THREADSAFE 208 btAssert( !
"unimplemented btSpinMutex::lock() called" );
213 btAssert( !
"unimplemented btSpinMutex::unlock() called" );
218 btAssert( !
"unimplemented btSpinMutex::tryLock() called" );
222 #define THREAD_LOCAL_STATIC static 224 #endif // #else //#if BT_THREADSAFE 245 btAssert( !
"thread counter exceeded" );
249 unsigned int val = mCounter;
287 #define BT_DETECT_BAD_THREAD_INDEX 0 289 #if BT_DETECT_BAD_THREAD_INDEX 291 typedef DWORD ThreadId_t;
292 const static ThreadId_t kInvalidThreadId = 0;
295 static ThreadId_t getDebugThreadId()
297 return GetCurrentThreadId();
300 #endif // #if BT_DETECT_BAD_THREAD_INDEX 306 const unsigned int kNullIndex = ~0
U;
308 if ( sThreadIndex == kNullIndex )
310 sThreadIndex = gThreadCounter.
getNext();
313 #if BT_DETECT_BAD_THREAD_INDEX 314 if ( gBtTaskScheduler && sThreadIndex > 0 )
316 ThreadId_t tid = getDebugThreadId();
318 if ( gDebugThreadIds[ sThreadIndex ] == kInvalidThreadId )
321 gDebugThreadIds[ sThreadIndex ] = tid;
325 if ( gDebugThreadIds[ sThreadIndex ] != tid )
329 btAssert( !
"there are 2 or more threads with the same thread-index!" );
334 #endif // #if BT_DETECT_BAD_THREAD_INDEX 353 m_savedThreadCounter = 0;
367 gThreadCounter.
mCounter = m_savedThreadCounter;
376 m_savedThreadCounter = gThreadCounter.
mCounter;
383 gThreadsRunningCounterMutex.
lock();
384 gThreadsRunningCounter++;
385 gThreadsRunningCounterMutex.
unlock();
390 gThreadsRunningCounterMutex.
lock();
391 gThreadsRunningCounter--;
392 gThreadsRunningCounterMutex.
unlock();
397 return gThreadsRunningCounter != 0;
406 btAssert( !
"btSetTaskScheduler must be called from the main thread!" );
409 if ( gBtTaskScheduler )
414 gBtTaskScheduler = ts;
433 #if BT_DETECT_BAD_THREAD_INDEX 439 gDebugThreadIds[ i ] = kInvalidThreadId;
442 #endif // #if BT_DETECT_BAD_THREAD_INDEX 444 btAssert( gBtTaskScheduler != NULL );
445 gBtTaskScheduler->
parallelFor( iBegin, iEnd, grainSize, body );
447 #else // #if BT_THREADSAFE 450 btAssert( !
"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" );
453 #endif// #if BT_THREADSAFE 460 #if BT_DETECT_BAD_THREAD_INDEX 466 gDebugThreadIds[ i ] = kInvalidThreadId;
469 #endif // #if BT_DETECT_BAD_THREAD_INDEX 471 btAssert( gBtTaskScheduler != NULL );
472 return gBtTaskScheduler->
parallelSum( iBegin, iEnd, grainSize, body );
474 #else // #if BT_THREADSAFE 477 btAssert( !
"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" );
478 return body.
sumLoop( iBegin, iEnd );
480 #endif //#else // #if BT_THREADSAFE 498 body.forLoop( iBegin, iEnd );
503 return body.sumLoop( iBegin, iEnd );
508 #if BT_USE_OPENMP && BT_THREADSAFE 522 return omp_get_max_threads();
528 virtual void setNumThreads(
int numThreads )
BT_OVERRIDE 534 omp_set_num_threads( 1 );
535 omp_set_num_threads( m_numThreads );
536 m_savedThreadCounter = 0;
546 #pragma omp parallel for schedule( static, 1 ) 547 for (
int i = iBegin; i < iEnd; i += grainSize )
550 body.
forLoop( i, ( std::min )( i + grainSize, iEnd ) );
559 #pragma omp parallel for schedule( static, 1 ) reduction(+:sum) 560 for (
int i = iBegin; i < iEnd; i += grainSize )
563 sum += body.
sumLoop( i, ( std::min )( i + grainSize, iEnd ) );
569 #endif // #if BT_USE_OPENMP && BT_THREADSAFE 572 #if BT_USE_TBB && BT_THREADSAFE 579 tbb::task_scheduler_init* m_tbbSchedulerInit;
585 m_tbbSchedulerInit = NULL;
587 ~btTaskSchedulerTBB()
589 if ( m_tbbSchedulerInit )
591 delete m_tbbSchedulerInit;
592 m_tbbSchedulerInit = NULL;
598 return tbb::task_scheduler_init::default_num_threads();
604 virtual void setNumThreads(
int numThreads )
BT_OVERRIDE 607 if ( m_tbbSchedulerInit )
610 delete m_tbbSchedulerInit;
611 m_tbbSchedulerInit = NULL;
613 m_tbbSchedulerInit =
new tbb::task_scheduler_init( m_numThreads );
614 m_savedThreadCounter = 0;
620 struct ForBodyAdapter
625 void operator()(
const tbb::blocked_range<int>& range )
const 628 mBody->
forLoop( range.begin(), range.end() );
634 ForBodyAdapter tbbBody( &body );
636 tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
638 tbb::simple_partitioner()
642 struct SumBodyAdapter
648 SumBodyAdapter(
const SumBodyAdapter& src,
tbb::split ) : mBody( src.mBody ), mSum(
btScalar( 0 ) ) {}
649 void join(
const SumBodyAdapter& src ) { mSum += src.mSum; }
650 void operator()(
const tbb::blocked_range<int>& range )
653 mSum += mBody->
sumLoop( range.begin(), range.end() );
659 SumBodyAdapter tbbBody( &body );
661 tbb::parallel_deterministic_reduce( tbb::blocked_range<int>( iBegin, iEnd, grainSize ), tbbBody );
666 #endif // #if BT_USE_TBB && BT_THREADSAFE 669 #if BT_USE_PPL && BT_THREADSAFE 676 concurrency::combinable<btScalar> m_sum;
684 return concurrency::GetProcessorCount();
690 virtual void setNumThreads(
int numThreads )
BT_OVERRIDE 694 m_numThreads = ( std::max )( 1, ( std::min )( maxThreadCount, numThreads ) );
695 using namespace concurrency;
696 if ( CurrentScheduler::Id() != -1 )
698 CurrentScheduler::Detach();
700 SchedulerPolicy policy;
704 policy.SetConcurrencyLimits( 1, 1 );
705 CurrentScheduler::Create( policy );
706 CurrentScheduler::Detach();
708 policy.SetConcurrencyLimits( m_numThreads, m_numThreads );
709 CurrentScheduler::Create( policy );
710 m_savedThreadCounter = 0;
716 struct ForBodyAdapter
722 ForBodyAdapter(
const btIParallelForBody* body,
int grainSize,
int end ) : mBody( body ), mGrainSize( grainSize ), mIndexEnd( end ) {}
723 void operator()(
int i )
const 726 mBody->
forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
733 ForBodyAdapter pplBody( &body, grainSize, iEnd );
736 concurrency::parallel_for( iBegin,
743 struct SumBodyAdapter
746 concurrency::combinable<btScalar>* mSum;
750 SumBodyAdapter(
const btIParallelSumBody* body, concurrency::combinable<btScalar>*
sum,
int grainSize,
int end ) : mBody( body ), mSum(sum), mGrainSize( grainSize ), mIndexEnd( end ) {}
751 void operator()(
int i )
const 754 mSum->local() += mBody->
sumLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
762 SumBodyAdapter pplBody( &body, &m_sum, grainSize, iEnd );
765 concurrency::parallel_for( iBegin,
771 return m_sum.combine( sumFunc );
774 #endif // #if BT_USE_PPL && BT_THREADSAFE 781 return &sTaskScheduler;
788 #if BT_USE_OPENMP && BT_THREADSAFE 789 static btTaskSchedulerOpenMP sTaskScheduler;
790 return &sTaskScheduler;
800 #if BT_USE_TBB && BT_THREADSAFE 801 static btTaskSchedulerTBB sTaskScheduler;
802 return &sTaskScheduler;
812 #if BT_USE_PPL && BT_THREADSAFE 813 static btTaskSchedulerPPL sTaskScheduler;
814 return &sTaskScheduler;
static T sum(const btAlignedObjectArray< T > &items)
btITaskScheduler * btGetPPLTaskScheduler()
void btResetThreadIndexCounter()
virtual void forLoop(int iBegin, int iEnd) const =0
virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body) BT_OVERRIDE
btITaskScheduler * btGetTaskScheduler()
static int split(btDbvtNode **leaves, int count, const btVector3 &org, const btVector3 &axis)
virtual void deactivate()
btSpinMutex – lightweight spin-mutex implemented with atomic ops, never puts a thread to sleep becau...
void btPushThreadsAreRunning()
virtual int getMaxNumThreads() const BT_OVERRIDE
const unsigned int BT_MAX_THREAD_COUNT
void btSetTaskScheduler(btITaskScheduler *ts)
btTaskSchedulerSequential()
virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body)=0
#define THREAD_LOCAL_STATIC
bool btThreadsAreRunning()
virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body)=0
virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body) BT_OVERRIDE
static ThreadsafeCounter gThreadCounter
btTaskSchedulerSequential – non-threaded implementation of task scheduler (really just useful for te...
virtual btScalar sumLoop(int iBegin, int iEnd) const =0
virtual int getNumThreads() const BT_OVERRIDE
btITaskScheduler(const char *name)
virtual void setNumThreads(int numThreads) BT_OVERRIDE
static btITaskScheduler * gBtTaskScheduler
btITaskScheduler * btGetSequentialTaskScheduler()
btITaskScheduler * btGetTBBTaskScheduler()
btScalar btParallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body)
static int gThreadsRunningCounter
void btParallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body)
unsigned int btGetCurrentThreadIndex()
btITaskScheduler * btGetOpenMPTaskScheduler()
static btSpinMutex gThreadsRunningCounterMutex
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
void btPopThreadsAreRunning()