19#include <owl/common/owl-common.h>
29#include <tbb/parallel_for.h>
30#include <tbb/task_arena.h>
31#define OWL_HAVE_PARALLEL_FOR 1
37 template<
typename INDEX_T,
typename TASK_T>
38 inline void serial_for(INDEX_T nTasks, TASK_T&& taskFunction)
40 for (INDEX_T taskIndex = 0; taskIndex < nTasks; ++taskIndex) {
41 taskFunction(taskIndex);
46 template<
typename INDEX_T,
typename TASK_T>
47 inline void parallel_for(INDEX_T nTasks, TASK_T&& taskFunction,
size_t blockSize=1)
49 if (nTasks == 0)
return;
51 taskFunction(
size_t(0));
52 else if (blockSize==1) {
53 tbb::parallel_for(INDEX_T(0), nTasks, std::forward<TASK_T>(taskFunction));
55 const size_t numBlocks = (nTasks+blockSize-1)/blockSize;
56 tbb::parallel_for((
size_t)0, numBlocks, [&](
size_t blockIdx){
57 size_t begin = blockIdx*blockSize;
58 size_t end = std::min(begin+blockSize,
size_t(nTasks));
59 for (
size_t i=begin;i<end;i++)
60 taskFunction(INDEX_T(i));
65#ifndef OWL_DISABLE_TBB
68 template<
typename INDEX_T,
typename TASK_T>
69 inline void parallel_for(INDEX_T nTasks, TASK_T&& taskFunction,
size_t blockSize=1)
70 { serial_for(nTasks,taskFunction); }
81 template<
typename TASK_T>
82 void serial_for_blocked(
size_t begin,
size_t end,
size_t blockSize,
83 TASK_T &&taskFunction)
85 for (
size_t block_begin=begin; block_begin < end; block_begin += blockSize)
86 taskFunction(block_begin,std::min(block_begin+blockSize,end));
89 template<
typename TASK_T>
90 void parallel_for_blocked(
size_t begin,
size_t end,
int blockSize,
91 const TASK_T &taskFunction)
94 serial_for_blocked(begin,end,blockSize,taskFunction);
96 const size_t numTasks = end-begin;
97 const size_t numBlocks = (numTasks+blockSize-1)/blockSize;
98 parallel_for(numBlocks,[&](
size_t blockID){
99 size_t block_begin = begin+blockID*blockSize;
100 taskFunction(block_begin,std::min(block_begin+blockSize,end));
105 template<
typename TASK_T>
106 void parallel_for_blocked(
int begin,
int end,
int blockSize,
107 const TASK_T& taskFunction)
110 serial_for_blocked(begin, end, blockSize, taskFunction);
112 const int numTasks = end - begin;
113 const int numBlocks = (numTasks + blockSize - 1) / blockSize;
114 parallel_for(numBlocks, [&](
int blockID) {
115 int block_begin = begin + blockID * blockSize;
116 taskFunction(block_begin, std::min(block_begin + blockSize, end));