Atomic, lock-free Thread Pool implementation with fixed bounds. More...

#include <ThreadPool.hpp>

Public Member Functions
	ThreadPool (size_t numThreads, size_t maxTasks, Allocator *alloc, StringView name="ThreadPool")
	Construct a thread pool with the given number of worker threads.

template<typename T , typename... Args> requires std::is_base_of_v<Job, T>
T *	PushImpl (JobPriority priority, Args &&... args)
	Push a job implementing ThreadPoolJob to the thread pool.

template<typename T , typename... Args> requires std::is_base_of_v<Job, T>
T *	PushImpl (Args &&... args)

template<typename T , typename... Args> requires std::is_base_of_v<Job, T>
T *	PushImplAlloc (Allocator *jobAllocator, Args &&... args)
	Push a job with an explicit allocator for the job object.

template<typename T , typename... Args> requires std::is_base_of_v<Job, T>
T *	PushImplAlloc (JobPriority priority, Allocator *jobAllocator, Args &&... args)

template<typename Lambda , typename... Args>
auto	Push (JobPriority priority, Lambda &&func, Args const &... args)
	Push a lambda job to the thread pool.

template<typename Lambda , typename... Args>
auto	Push (Lambda &&func, Args const &... args)

template<typename Lambda , typename... Args>
auto	PushAlloc (Allocator *jobAllocator, Lambda &&func, Args const &... args)
	Push a lambda job with an explicit allocator for the job object.

template<typename Lambda , typename... Args>
auto	PushAlloc (JobPriority priority, Allocator *jobAllocator, Lambda &&func, Args const &... args)

void	CoInvoke (Job &job, size_t count, JobPriority priority=JobPriority::Normal)
	Enqueues `count` non-owning references to a single, externally-owned job so that up to `count` workers co-run it concurrently (each call to ThreadPoolJob::Execute receives that worker's id). The pool never destroys or frees `job` — the caller owns its lifetime and MUST keep it alive until all co-invocations complete (e.g. via a fork-join latch). Used by ParallelFor for a self-draining, zero-allocation job.

size_t	GetWorkerCount () const noexcept
	Number of worker threads. Worker ids passed to Execute are in [0, this).

size_t	GetParallelForConcurrency () const noexcept
	Number of distinct worker ids a ParallelFor functor may see (workers + the participating caller). Size per-worker scratch to this.

template<typename Fn >
void	ParallelFor (ExecutionPolicy policy, size_t count, Fn &&fn)
	Parallel-for over [0, `count`): invokes `fn` for every index as `fn(i)` or `fn(i, workerId)` (the worker id is passed only if the functor accepts it), blocking until all indices are processed.

template<typename Fn >
void	ParallelFor (size_t count, Fn &&fn)
	Index parallel-for defaulting to ExecutionPolicy::Par.

template<typename It , typename Fn > requires std::random_access_iterator<It>
void	ParallelFor (ExecutionPolicy policy, It first, It last, Fn &&fn)
	Iterator-range parallel-for, like a (policy-aware) parallel `std::for_each`: invokes `fn` for each element in [`first`, `last`) as `fn(elem)` or `fn(elem, workerId)`.

template<typename It , typename Fn > requires std::random_access_iterator<It>
void	ParallelFor (It first, It last, Fn &&fn)
	Iterator-range parallel-for defaulting to ExecutionPolicy::Par.

template<typename Fn >
Future< void >	ParallelForAsync (ExecutionPolicy policy, size_t count, Fn &&fn)
	Non-blocking parallel-for over [0, `count`): schedules the work across the pool's workers and returns immediately with a Future<void> that becomes ready once every index has been processed. The calling thread does NOT participate.

template<typename Fn >
Future< void >	ParallelForAsync (size_t count, Fn &&fn)
	Index async parallel-for defaulting to ExecutionPolicy::Par.

template<typename It , typename Fn > requires std::random_access_iterator<It>
Future< void >	ParallelForAsync (ExecutionPolicy policy, It first, It last, Fn &&fn)
	Iterator-range overload of ParallelForAsync (random-access iterators only).

template<typename It , typename Fn > requires std::random_access_iterator<It>
Future< void >	ParallelForAsync (It first, It last, Fn &&fn)
	Iterator-range async parallel-for defaulting to ExecutionPolicy::Par.

void	Shutdown ()
	Shutdown the ThreadPool, potentially cancelling all pending jobs.

void	Join ()
	Wait for all scheduled jobs to complete.

	~ThreadPool ()
	Shutdown, without waiting for pending jobs.

size_t	GetPendingJobCount () const noexcept

size_t	GetCompletedJobCount () const noexcept

size_t	GetTotalJobCount () const noexcept

Static Public Member Functions
static Future< void >	MakeReadyFuture ()
	Returns an already-satisfied `Future<void>` (for trivial/inline async paths).

static const size_t	CalcTaskSize (size_t size)

Private Member Functions
void	ThreadPoolWorker (size_t id)

template<typename T , typename... Args> requires std::is_base_of_v<Job, T>
T *	PushImplInternal (JobPriority priority, Allocator *jobAllocator, Args &&... args)

template<typename Lambda , typename... Args>
auto	PushLambdaInternal (JobPriority priority, Allocator *jobAllocator, Lambda &&func, Args const &... args)

Static Private Member Functions
static constexpr size_t	PriorityIndex (JobPriority priority) noexcept

Private Attributes
Allocator *	mAllocator

String	mName

Atomic< bool >	mShutdown {false}

Atomic< size_t >	mComplete {0}

Atomic< size_t >	mTotal {0}

JobQueues	mJobs

Vector< Thread >	mThreads

Detailed Description

Atomic, lock-free Thread Pool implementation with fixed bounds.

Constructor & Destructor Documentation

◆ ThreadPool()

Foundation::Core::ThreadPool::ThreadPool	(	size_t	numThreads,
		size_t	maxTasks,
		Allocator *	alloc,
		StringView	name = `"ThreadPool"`
	)

Construct a thread pool with the given number of worker threads.

Parameters

numThreads	Number of worker threads to spawn.
maxTasks	Max number of tasks that can be queued per priority. Must be a power of two - see getTaskSize
alloc	Allocator to use for internal and job allocations
name	Prefix for worker thread names ("name@id")

◆ ~ThreadPool()

Foundation::Core::ThreadPool::~ThreadPool ( )

Shutdown, without waiting for pending jobs.

Member Function Documentation

◆ CalcTaskSize()

static const size_t Foundation::Core::ThreadPool::CalcTaskSize ( size_t size )

inlinestatic

Aligns a number to upper, closest power of 2 so that it's a valid maxTasks size.

◆ CoInvoke()

void Foundation::Core::ThreadPool::CoInvoke	(	Job &	job,
		size_t	count,
		JobPriority	priority = `JobPriority::Normal`
	)

Enqueues count non-owning references to a single, externally-owned job so that up to count workers co-run it concurrently (each call to ThreadPoolJob::Execute receives that worker's id). The pool never destroys or frees job — the caller owns its lifetime and MUST keep it alive until all co-invocations complete (e.g. via a fork-join latch). Used by ParallelFor for a self-draining, zero-allocation job.

Note: count is typically <= GetWorkerCount; worker ids passed to Execute are in [0, GetWorkerCount). A participating caller should use id == GetWorkerCount.

◆ GetCompletedJobCount()

size_t Foundation::Core::ThreadPool::GetCompletedJobCount ( ) const

inlinenoexcept

◆ GetParallelForConcurrency()

size_t Foundation::Core::ThreadPool::GetParallelForConcurrency ( ) const

inlinenoexcept

Number of distinct worker ids a ParallelFor functor may see (workers + the participating caller). Size per-worker scratch to this.

◆ GetPendingJobCount()

size_t Foundation::Core::ThreadPool::GetPendingJobCount ( ) const

inlinenoexcept

◆ GetTotalJobCount()

size_t Foundation::Core::ThreadPool::GetTotalJobCount ( ) const

inlinenoexcept

◆ GetWorkerCount()

size_t Foundation::Core::ThreadPool::GetWorkerCount ( ) const

inlinenoexcept

Number of worker threads. Worker ids passed to Execute are in [0, this).

◆ Join()

void Foundation::Core::ThreadPool::Join ( )

Wait for all scheduled jobs to complete.

Note: This MUST be called if you'd like all submitted work to complete before destruction.

◆ MakeReadyFuture()

static Future< void > Foundation::Core::ThreadPool::MakeReadyFuture ( )

inlinestatic

Returns an already-satisfied Future<void> (for trivial/inline async paths).

◆ ParallelFor() [1/4]

template<typename It , typename Fn >
requires std::random_access_iterator<It>

void Foundation::Core::ThreadPool::ParallelFor	(	ExecutionPolicy	policy,
		It	first,
		It	last,
		Fn &&	fn
	)

inline

Iterator-range parallel-for, like a (policy-aware) parallel std::for_each: invokes fn for each element in [first, last) as fn(elem) or fn(elem, workerId).

A thin wrapper over the index form (random-access iterators only): the element is passed by reference, so fn may mutate it (parallel transform). Same concurrency contract as the index form; policy selects serial vs parallel at runtime.

◆ ParallelFor() [2/4]

template<typename Fn >

void Foundation::Core::ThreadPool::ParallelFor	(	ExecutionPolicy	policy,
		size_t	count,
		Fn &&	fn
	)

inline

Parallel-for over [0, count): invokes fn for every index as fn(i) or fn(i, workerId) (the worker id is passed only if the functor accepts it), blocking until all indices are processed.

Main-thread-initiated and non-nesting. With ExecutionPolicy::Par it reuses the pool's workers and the calling thread (which participates with id == GetWorkerCount), pushing only non-owning references to a stack-resident ParallelForJob — zero per-call allocation, no futures. ExecutionPolicy::Seq (and a worker-less pool) runs inline in order. fn must be safe to invoke concurrently under Par; key any scratch by workerId. Granularity is one index per call: a job that wants coarser work batches itself by choosing count.

◆ ParallelFor() [3/4]

template<typename It , typename Fn >
requires std::random_access_iterator<It>

void Foundation::Core::ThreadPool::ParallelFor	(	It	first,
		It	last,
		Fn &&	fn
	)

inline

Iterator-range parallel-for defaulting to ExecutionPolicy::Par.

◆ ParallelFor() [4/4]

template<typename Fn >

void Foundation::Core::ThreadPool::ParallelFor	(	size_t	count,
		Fn &&	fn
	)

inline

Index parallel-for defaulting to ExecutionPolicy::Par.

◆ ParallelForAsync() [1/4]

template<typename It , typename Fn >
requires std::random_access_iterator<It>

Future< void > Foundation::Core::ThreadPool::ParallelForAsync	(	ExecutionPolicy	policy,
		It	first,
		It	last,
		Fn &&	fn
	)

inline

Iterator-range overload of ParallelForAsync (random-access iterators only).

Owns both first and fn in the scheduled functor (the caller's stack frame may unwind before the work runs), then forwards to the index form.

◆ ParallelForAsync() [2/4]

template<typename Fn >

Future< void > Foundation::Core::ThreadPool::ParallelForAsync	(	ExecutionPolicy	policy,
		size_t	count,
		Fn &&	fn
	)

inline

Non-blocking parallel-for over [0, count): schedules the work across the pool's workers and returns immediately with a Future<void> that becomes ready once every index has been processed. The calling thread does NOT participate.

Unlike ParallelFor (which blocks and lets the caller help), this lets the caller schedule deformation/skinning work and continue with independent CPU work, then wait on the returned future only at the real dependency boundary. The functor is moved into a heap-resident ParallelForAsyncState shared by the pushed worker jobs; the last worker to finish satisfies the promise and frees the state. fn must be safe to invoke concurrently (same contract as ParallelFor); key any scratch by workerId, which is in [0, GetWorkerCount). With ExecutionPolicy::Seq, an empty range, or a worker-less pool the work runs inline on the calling thread and the returned future is already ready.

◆ ParallelForAsync() [3/4]

template<typename It , typename Fn >
requires std::random_access_iterator<It>

Future< void > Foundation::Core::ThreadPool::ParallelForAsync	(	It	first,
		It	last,
		Fn &&	fn
	)

inline

Iterator-range async parallel-for defaulting to ExecutionPolicy::Par.

◆ ParallelForAsync() [4/4]

template<typename Fn >

Future< void > Foundation::Core::ThreadPool::ParallelForAsync	(	size_t	count,
		Fn &&	fn
	)

inline

Index async parallel-for defaulting to ExecutionPolicy::Par.

◆ PriorityIndex()

static constexpr size_t Foundation::Core::ThreadPool::PriorityIndex ( JobPriority priority )

inlinestaticconstexprprivatenoexcept

◆ Push() [1/2]

template<typename Lambda , typename... Args>

auto Foundation::Core::ThreadPool::Push	(	JobPriority	priority,
		Lambda &&	func,
		Args const &...	args
	)

inline

Push a lambda job to the thread pool.

Returns: Future<func ReturnType> that will be set when the job is completed.

◆ Push() [2/2]

template<typename Lambda , typename... Args>

auto Foundation::Core::ThreadPool::Push	(	Lambda &&	func,
		Args const &...	args
	)

inline

◆ PushAlloc() [1/2]

template<typename Lambda , typename... Args>

auto Foundation::Core::ThreadPool::PushAlloc	(	Allocator *	jobAllocator,
		Lambda &&	func,
		Args const &...	args
	)

inline

Push a lambda job with an explicit allocator for the job object.

Parameters

jobAllocator Optional allocator for the job object. If null, the thread pool allocator is used.

◆ PushAlloc() [2/2]

template<typename Lambda , typename... Args>

auto Foundation::Core::ThreadPool::PushAlloc	(	JobPriority	priority,
		Allocator *	jobAllocator,
		Lambda &&	func,
		Args const &...	args
	)

inline

◆ PushImpl() [1/2]

template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>

T * Foundation::Core::ThreadPool::PushImpl ( Args &&... args )

inline

◆ PushImpl() [2/2]

template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>

T * Foundation::Core::ThreadPool::PushImpl	(	JobPriority	priority,
		Args &&...	args
	)

inline

Push a job implementing ThreadPoolJob to the thread pool.

Note: This by itself does not return a future or any way to get the result of the job It's up to the implementation of the job to provide a way to get the result. See also ThreadPoolLambdaJob

Returns: Stable pointer of the pushed job. Lifetime guaranteed until the job's completion.

◆ PushImplAlloc() [1/2]

template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>

T * Foundation::Core::ThreadPool::PushImplAlloc	(	Allocator *	jobAllocator,
		Args &&...	args
	)

inline

Push a job with an explicit allocator for the job object.

Parameters

jobAllocator Optional allocator for the job object. If null, the thread pool allocator is used.

Returns: Stable pointer of the pushed job. Lifetime guaranteed until the job's completion.

◆ PushImplAlloc() [2/2]

template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>

T * Foundation::Core::ThreadPool::PushImplAlloc	(	JobPriority	priority,
		Allocator *	jobAllocator,
		Args &&...	args
	)

inline

◆ PushImplInternal()

template<typename T , typename... Args>
requires std::is_base_of_v<Job, T>

T * Foundation::Core::ThreadPool::PushImplInternal	(	JobPriority	priority,
		Allocator *	jobAllocator,
		Args &&...	args
	)

inlineprivate

◆ PushLambdaInternal()

template<typename Lambda , typename... Args>

auto Foundation::Core::ThreadPool::PushLambdaInternal	(	JobPriority	priority,
		Allocator *	jobAllocator,
		Lambda &&	func,
		Args const &...	args
	)

inlineprivate

◆ Shutdown()

void Foundation::Core::ThreadPool::Shutdown ( )

Shutdown the ThreadPool, potentially cancelling all pending jobs.

Note: This does not cancel running jobs, but prevents any new jobs from being run/scheduled.

◆ ThreadPoolWorker()

void Foundation::Core::ThreadPool::ThreadPoolWorker ( size_t id )

private

Member Data Documentation

◆ mAllocator

Allocator* Foundation::Core::ThreadPool::mAllocator

private

◆ mComplete

Atomic<size_t> Foundation::Core::ThreadPool::mComplete {0}

private

◆ mJobs

JobQueues Foundation::Core::ThreadPool::mJobs

private

◆ mName

String Foundation::Core::ThreadPool::mName

private

◆ mShutdown

Atomic<bool> Foundation::Core::ThreadPool::mShutdown {false}

private

◆ mThreads

Vector<Thread> Foundation::Core::ThreadPool::mThreads

private

◆ mTotal

Atomic<size_t> Foundation::Core::ThreadPool::mTotal {0}

private

The documentation for this class was generated from the following files:

Source/Core/ThreadPool.hpp
Source/Core/ThreadPool.cpp

Public Member Functions

Static Public Member Functions

Private Member Functions

Static Private Member Functions

Private Attributes

Detailed Description

Constructor & Destructor Documentation

◆ ThreadPool()

◆ ~ThreadPool()

Member Function Documentation

◆ CalcTaskSize()

◆ CoInvoke()

◆ GetCompletedJobCount()

◆ GetParallelForConcurrency()

◆ GetPendingJobCount()

◆ GetTotalJobCount()

◆ GetWorkerCount()

◆ Join()

◆ MakeReadyFuture()

◆ ParallelFor() [1/4]

◆ ParallelFor() [2/4]

◆ ParallelFor() [3/4]

◆ ParallelFor() [4/4]

◆ ParallelForAsync() [1/4]

◆ ParallelForAsync() [2/4]

◆ ParallelForAsync() [3/4]

◆ ParallelForAsync() [4/4]

◆ PriorityIndex()

◆ Push() [1/2]

◆ Push() [2/2]

◆ PushAlloc() [1/2]

◆ PushAlloc() [2/2]

◆ PushImpl() [1/2]

◆ PushImpl() [2/2]

◆ PushImplAlloc() [1/2]

◆ PushImplAlloc() [2/2]

◆ PushImplInternal()

◆ PushLambdaInternal()

◆ Shutdown()

◆ ThreadPoolWorker()

Member Data Documentation

◆ mAllocator

◆ mComplete

◆ mJobs

◆ mName

◆ mShutdown

◆ mThreads

◆ mTotal