Foundation
Loading...
Searching...
No Matches
Public Member Functions | Static Public Member Functions | Private Member Functions | Static Private Member Functions | Private Attributes | List of all members
Foundation::Core::ThreadPool Class Reference

Atomic, lock-free Thread Pool implementation with fixed bounds. More...

#include <ThreadPool.hpp>

Public Member Functions

 ThreadPool (size_t numThreads, size_t maxTasks, Allocator *alloc, StringView name="ThreadPool")
 Construct a thread pool with the given number of worker threads.
 
template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>
TPushImpl (JobPriority priority, Args &&... args)
 Push a job implementing ThreadPoolJob to the thread pool.
 
template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>
TPushImpl (Args &&... args)
 
template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>
TPushImplAlloc (Allocator *jobAllocator, Args &&... args)
 Push a job with an explicit allocator for the job object.
 
template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>
TPushImplAlloc (JobPriority priority, Allocator *jobAllocator, Args &&... args)
 
template<typename Lambda , typename... Args>
auto Push (JobPriority priority, Lambda &&func, Args const &... args)
 Push a lambda job to the thread pool.
 
template<typename Lambda , typename... Args>
auto Push (Lambda &&func, Args const &... args)
 
template<typename Lambda , typename... Args>
auto PushAlloc (Allocator *jobAllocator, Lambda &&func, Args const &... args)
 Push a lambda job with an explicit allocator for the job object.
 
template<typename Lambda , typename... Args>
auto PushAlloc (JobPriority priority, Allocator *jobAllocator, Lambda &&func, Args const &... args)
 
void CoInvoke (Job &job, size_t count, JobPriority priority=JobPriority::Normal)
 Enqueues count non-owning references to a single, externally-owned job so that up to count workers co-run it concurrently (each call to ThreadPoolJob::Execute receives that worker's id). The pool never destroys or frees job — the caller owns its lifetime and MUST keep it alive until all co-invocations complete (e.g. via a fork-join latch). Used by ParallelFor for a self-draining, zero-allocation job.
 
size_t GetWorkerCount () const noexcept
 Number of worker threads. Worker ids passed to Execute are in [0, this).
 
size_t GetParallelForConcurrency () const noexcept
 Number of distinct worker ids a ParallelFor functor may see (workers + the participating caller). Size per-worker scratch to this.
 
template<typename Fn >
void ParallelFor (ExecutionPolicy policy, size_t count, Fn &&fn)
 Parallel-for over [0, count): invokes fn for every index as fn(i) or fn(i, workerId) (the worker id is passed only if the functor accepts it), blocking until all indices are processed.
 
template<typename Fn >
void ParallelFor (size_t count, Fn &&fn)
 Index parallel-for defaulting to ExecutionPolicy::Par.
 
template<typename It , typename Fn >
requires std::random_access_iterator<It>
void ParallelFor (ExecutionPolicy policy, It first, It last, Fn &&fn)
 Iterator-range parallel-for, like a (policy-aware) parallel std::for_each: invokes fn for each element in [first, last) as fn(elem) or fn(elem, workerId).
 
template<typename It , typename Fn >
requires std::random_access_iterator<It>
void ParallelFor (It first, It last, Fn &&fn)
 Iterator-range parallel-for defaulting to ExecutionPolicy::Par.
 
template<typename Fn >
Future< voidParallelForAsync (ExecutionPolicy policy, size_t count, Fn &&fn)
 Non-blocking parallel-for over [0, count): schedules the work across the pool's workers and returns immediately with a Future<void> that becomes ready once every index has been processed. The calling thread does NOT participate.
 
template<typename Fn >
Future< voidParallelForAsync (size_t count, Fn &&fn)
 Index async parallel-for defaulting to ExecutionPolicy::Par.
 
template<typename It , typename Fn >
requires std::random_access_iterator<It>
Future< voidParallelForAsync (ExecutionPolicy policy, It first, It last, Fn &&fn)
 Iterator-range overload of ParallelForAsync (random-access iterators only).
 
template<typename It , typename Fn >
requires std::random_access_iterator<It>
Future< voidParallelForAsync (It first, It last, Fn &&fn)
 Iterator-range async parallel-for defaulting to ExecutionPolicy::Par.
 
void Shutdown ()
 Shutdown the ThreadPool, potentially cancelling all pending jobs.
 
void Join ()
 Wait for all scheduled jobs to complete.
 
 ~ThreadPool ()
 Shutdown, without waiting for pending jobs.
 
size_t GetPendingJobCount () const noexcept
 
size_t GetCompletedJobCount () const noexcept
 
size_t GetTotalJobCount () const noexcept
 

Static Public Member Functions

static Future< voidMakeReadyFuture ()
 Returns an already-satisfied Future<void> (for trivial/inline async paths).
 
static const size_t CalcTaskSize (size_t size)
 

Private Member Functions

void ThreadPoolWorker (size_t id)
 
template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>
TPushImplInternal (JobPriority priority, Allocator *jobAllocator, Args &&... args)
 
template<typename Lambda , typename... Args>
auto PushLambdaInternal (JobPriority priority, Allocator *jobAllocator, Lambda &&func, Args const &... args)
 

Static Private Member Functions

static constexpr size_t PriorityIndex (JobPriority priority) noexcept
 

Private Attributes

AllocatormAllocator
 
String mName
 
Atomic< boolmShutdown {false}
 
Atomic< size_tmComplete {0}
 
Atomic< size_tmTotal {0}
 
JobQueues mJobs
 
Vector< ThreadmThreads
 

Detailed Description

Atomic, lock-free Thread Pool implementation with fixed bounds.

Constructor & Destructor Documentation

◆ ThreadPool()

Foundation::Core::ThreadPool::ThreadPool ( size_t  numThreads,
size_t  maxTasks,
Allocator alloc,
StringView  name = "ThreadPool" 
)

Construct a thread pool with the given number of worker threads.

Parameters
numThreadsNumber of worker threads to spawn.
maxTasksMax number of tasks that can be queued per priority. Must be a power of two - see getTaskSize
allocAllocator to use for internal and job allocations
namePrefix for worker thread names ("name@id")

◆ ~ThreadPool()

Foundation::Core::ThreadPool::~ThreadPool ( )

Shutdown, without waiting for pending jobs.

Member Function Documentation

◆ CalcTaskSize()

static const size_t Foundation::Core::ThreadPool::CalcTaskSize ( size_t  size)
inlinestatic

Aligns a number to upper, closest power of 2 so that it's a valid maxTasks size.

◆ CoInvoke()

void Foundation::Core::ThreadPool::CoInvoke ( Job job,
size_t  count,
JobPriority  priority = JobPriority::Normal 
)

Enqueues count non-owning references to a single, externally-owned job so that up to count workers co-run it concurrently (each call to ThreadPoolJob::Execute receives that worker's id). The pool never destroys or frees job — the caller owns its lifetime and MUST keep it alive until all co-invocations complete (e.g. via a fork-join latch). Used by ParallelFor for a self-draining, zero-allocation job.

Note
count is typically <= GetWorkerCount; worker ids passed to Execute are in [0, GetWorkerCount). A participating caller should use id == GetWorkerCount.

◆ GetCompletedJobCount()

size_t Foundation::Core::ThreadPool::GetCompletedJobCount ( ) const
inlinenoexcept

◆ GetParallelForConcurrency()

size_t Foundation::Core::ThreadPool::GetParallelForConcurrency ( ) const
inlinenoexcept

Number of distinct worker ids a ParallelFor functor may see (workers + the participating caller). Size per-worker scratch to this.

◆ GetPendingJobCount()

size_t Foundation::Core::ThreadPool::GetPendingJobCount ( ) const
inlinenoexcept

◆ GetTotalJobCount()

size_t Foundation::Core::ThreadPool::GetTotalJobCount ( ) const
inlinenoexcept

◆ GetWorkerCount()

size_t Foundation::Core::ThreadPool::GetWorkerCount ( ) const
inlinenoexcept

Number of worker threads. Worker ids passed to Execute are in [0, this).

◆ Join()

void Foundation::Core::ThreadPool::Join ( )

Wait for all scheduled jobs to complete.

Note
This MUST be called if you'd like all submitted work to complete before destruction.

◆ MakeReadyFuture()

static Future< void > Foundation::Core::ThreadPool::MakeReadyFuture ( )
inlinestatic

Returns an already-satisfied Future<void> (for trivial/inline async paths).

◆ ParallelFor() [1/4]

template<typename It , typename Fn >
requires std::random_access_iterator<It>
void Foundation::Core::ThreadPool::ParallelFor ( ExecutionPolicy  policy,
It  first,
It  last,
Fn &&  fn 
)
inline

Iterator-range parallel-for, like a (policy-aware) parallel std::for_each: invokes fn for each element in [first, last) as fn(elem) or fn(elem, workerId).

A thin wrapper over the index form (random-access iterators only): the element is passed by reference, so fn may mutate it (parallel transform). Same concurrency contract as the index form; policy selects serial vs parallel at runtime.

◆ ParallelFor() [2/4]

template<typename Fn >
void Foundation::Core::ThreadPool::ParallelFor ( ExecutionPolicy  policy,
size_t  count,
Fn &&  fn 
)
inline

Parallel-for over [0, count): invokes fn for every index as fn(i) or fn(i, workerId) (the worker id is passed only if the functor accepts it), blocking until all indices are processed.

Main-thread-initiated and non-nesting. With ExecutionPolicy::Par it reuses the pool's workers and the calling thread (which participates with id == GetWorkerCount), pushing only non-owning references to a stack-resident ParallelForJob — zero per-call allocation, no futures. ExecutionPolicy::Seq (and a worker-less pool) runs inline in order. fn must be safe to invoke concurrently under Par; key any scratch by workerId. Granularity is one index per call: a job that wants coarser work batches itself by choosing count.

◆ ParallelFor() [3/4]

template<typename It , typename Fn >
requires std::random_access_iterator<It>
void Foundation::Core::ThreadPool::ParallelFor ( It  first,
It  last,
Fn &&  fn 
)
inline

Iterator-range parallel-for defaulting to ExecutionPolicy::Par.

◆ ParallelFor() [4/4]

template<typename Fn >
void Foundation::Core::ThreadPool::ParallelFor ( size_t  count,
Fn &&  fn 
)
inline

Index parallel-for defaulting to ExecutionPolicy::Par.

◆ ParallelForAsync() [1/4]

template<typename It , typename Fn >
requires std::random_access_iterator<It>
Future< void > Foundation::Core::ThreadPool::ParallelForAsync ( ExecutionPolicy  policy,
It  first,
It  last,
Fn &&  fn 
)
inline

Iterator-range overload of ParallelForAsync (random-access iterators only).

Owns both first and fn in the scheduled functor (the caller's stack frame may unwind before the work runs), then forwards to the index form.

◆ ParallelForAsync() [2/4]

template<typename Fn >
Future< void > Foundation::Core::ThreadPool::ParallelForAsync ( ExecutionPolicy  policy,
size_t  count,
Fn &&  fn 
)
inline

Non-blocking parallel-for over [0, count): schedules the work across the pool's workers and returns immediately with a Future<void> that becomes ready once every index has been processed. The calling thread does NOT participate.

Unlike ParallelFor (which blocks and lets the caller help), this lets the caller schedule deformation/skinning work and continue with independent CPU work, then wait on the returned future only at the real dependency boundary. The functor is moved into a heap-resident ParallelForAsyncState shared by the pushed worker jobs; the last worker to finish satisfies the promise and frees the state. fn must be safe to invoke concurrently (same contract as ParallelFor); key any scratch by workerId, which is in [0, GetWorkerCount). With ExecutionPolicy::Seq, an empty range, or a worker-less pool the work runs inline on the calling thread and the returned future is already ready.

◆ ParallelForAsync() [3/4]

template<typename It , typename Fn >
requires std::random_access_iterator<It>
Future< void > Foundation::Core::ThreadPool::ParallelForAsync ( It  first,
It  last,
Fn &&  fn 
)
inline

Iterator-range async parallel-for defaulting to ExecutionPolicy::Par.

◆ ParallelForAsync() [4/4]

template<typename Fn >
Future< void > Foundation::Core::ThreadPool::ParallelForAsync ( size_t  count,
Fn &&  fn 
)
inline

Index async parallel-for defaulting to ExecutionPolicy::Par.

◆ PriorityIndex()

static constexpr size_t Foundation::Core::ThreadPool::PriorityIndex ( JobPriority  priority)
inlinestaticconstexprprivatenoexcept

◆ Push() [1/2]

template<typename Lambda , typename... Args>
auto Foundation::Core::ThreadPool::Push ( JobPriority  priority,
Lambda &&  func,
Args const &...  args 
)
inline

Push a lambda job to the thread pool.

Returns
Future<func ReturnType> that will be set when the job is completed.

◆ Push() [2/2]

template<typename Lambda , typename... Args>
auto Foundation::Core::ThreadPool::Push ( Lambda &&  func,
Args const &...  args 
)
inline

◆ PushAlloc() [1/2]

template<typename Lambda , typename... Args>
auto Foundation::Core::ThreadPool::PushAlloc ( Allocator jobAllocator,
Lambda &&  func,
Args const &...  args 
)
inline

Push a lambda job with an explicit allocator for the job object.

Parameters
jobAllocatorOptional allocator for the job object. If null, the thread pool allocator is used.

◆ PushAlloc() [2/2]

template<typename Lambda , typename... Args>
auto Foundation::Core::ThreadPool::PushAlloc ( JobPriority  priority,
Allocator jobAllocator,
Lambda &&  func,
Args const &...  args 
)
inline

◆ PushImpl() [1/2]

template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>
T * Foundation::Core::ThreadPool::PushImpl ( Args &&...  args)
inline

◆ PushImpl() [2/2]

template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>
T * Foundation::Core::ThreadPool::PushImpl ( JobPriority  priority,
Args &&...  args 
)
inline

Push a job implementing ThreadPoolJob to the thread pool.

Note
This by itself does not return a future or any way to get the result of the job It's up to the implementation of the job to provide a way to get the result. See also ThreadPoolLambdaJob
Returns
Stable pointer of the pushed job. Lifetime guaranteed until the job's completion.

◆ PushImplAlloc() [1/2]

template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>
T * Foundation::Core::ThreadPool::PushImplAlloc ( Allocator jobAllocator,
Args &&...  args 
)
inline

Push a job with an explicit allocator for the job object.

Parameters
jobAllocatorOptional allocator for the job object. If null, the thread pool allocator is used.
Returns
Stable pointer of the pushed job. Lifetime guaranteed until the job's completion.

◆ PushImplAlloc() [2/2]

template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>
T * Foundation::Core::ThreadPool::PushImplAlloc ( JobPriority  priority,
Allocator jobAllocator,
Args &&...  args 
)
inline

◆ PushImplInternal()

template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>
T * Foundation::Core::ThreadPool::PushImplInternal ( JobPriority  priority,
Allocator jobAllocator,
Args &&...  args 
)
inlineprivate

◆ PushLambdaInternal()

template<typename Lambda , typename... Args>
auto Foundation::Core::ThreadPool::PushLambdaInternal ( JobPriority  priority,
Allocator jobAllocator,
Lambda &&  func,
Args const &...  args 
)
inlineprivate

◆ Shutdown()

void Foundation::Core::ThreadPool::Shutdown ( )

Shutdown the ThreadPool, potentially cancelling all pending jobs.

Note
This does not cancel running jobs, but prevents any new jobs from being run/scheduled.

◆ ThreadPoolWorker()

void Foundation::Core::ThreadPool::ThreadPoolWorker ( size_t  id)
private

Member Data Documentation

◆ mAllocator

Allocator* Foundation::Core::ThreadPool::mAllocator
private

◆ mComplete

Atomic<size_t> Foundation::Core::ThreadPool::mComplete {0}
private

◆ mJobs

JobQueues Foundation::Core::ThreadPool::mJobs
private

◆ mName

String Foundation::Core::ThreadPool::mName
private

◆ mShutdown

Atomic<bool> Foundation::Core::ThreadPool::mShutdown {false}
private

◆ mThreads

Vector<Thread> Foundation::Core::ThreadPool::mThreads
private

◆ mTotal

Atomic<size_t> Foundation::Core::ThreadPool::mTotal {0}
private

The documentation for this class was generated from the following files: