1 #ifndef _COMPADRE_PARALLELMANAGER_HPP_ 2 #define _COMPADRE_PARALLELMANAGER_HPP_ 4 #include "Compadre_Config.h" 73 #ifdef COMPADRE_USE_CUDA 90 if (
const char* env_threads = std::getenv(
"THREADS")) {
93 if (
const char* env_vector_lanes = std::getenv(
"VECTORLANES")) {
114 template<
typename Tag,
class C>
116 const int vector_lanes_per_thread = -1)
const {
118 if (threads_per_team>0 && vector_lanes_per_thread>0) {
121 Kokkos::parallel_for(
122 Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
127 functor,
typeid(Tag).name());
130 Kokkos::parallel_for(
131 Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
135 functor,
typeid(Tag).name());
138 Kokkos::parallel_for(
139 Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
143 functor,
typeid(Tag).name());
146 Kokkos::parallel_for(
147 Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
150 functor,
typeid(Tag).name());
152 }
else if (threads_per_team>0) {
155 Kokkos::parallel_for(
161 functor,
typeid(Tag).name());
164 Kokkos::parallel_for(
169 functor,
typeid(Tag).name());
172 Kokkos::parallel_for(
177 functor,
typeid(Tag).name());
180 Kokkos::parallel_for(
184 functor,
typeid(Tag).name());
186 }
else if (vector_lanes_per_thread>0) {
189 Kokkos::parallel_for(
190 Kokkos::TeamPolicy<Tag>(batch_size,
_default_threads, vector_lanes_per_thread)
195 functor,
typeid(Tag).name());
198 Kokkos::parallel_for(
199 Kokkos::TeamPolicy<Tag>(batch_size,
_default_threads, vector_lanes_per_thread)
203 functor,
typeid(Tag).name());
206 Kokkos::parallel_for(
207 Kokkos::TeamPolicy<Tag>(batch_size,
_default_threads, vector_lanes_per_thread)
211 functor,
typeid(Tag).name());
214 Kokkos::parallel_for(
215 Kokkos::TeamPolicy<Tag>(batch_size,
_default_threads, vector_lanes_per_thread)
218 functor,
typeid(Tag).name());
223 Kokkos::parallel_for(
229 functor,
typeid(Tag).name());
232 Kokkos::parallel_for(
237 functor,
typeid(Tag).name());
240 Kokkos::parallel_for(
245 functor,
typeid(Tag).name());
248 Kokkos::parallel_for(
252 functor,
typeid(Tag).name());
261 const int vector_lanes_per_thread = -1, std::string functor_name =
typeid(C).name())
const {
263 if (threads_per_team>0 && vector_lanes_per_thread>0) {
266 Kokkos::parallel_for(
267 Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
272 functor, functor_name);
275 Kokkos::parallel_for(
276 Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
280 functor, functor_name);
283 Kokkos::parallel_for(
284 Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
288 functor, functor_name);
291 Kokkos::parallel_for(
292 Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
295 functor, functor_name);
297 }
else if (threads_per_team>0) {
300 Kokkos::parallel_for(
306 functor, functor_name);
309 Kokkos::parallel_for(
314 functor, functor_name);
317 Kokkos::parallel_for(
322 functor, functor_name);
325 Kokkos::parallel_for(
329 functor, functor_name);
331 }
else if (vector_lanes_per_thread>0) {
333 Kokkos::parallel_for(
339 functor, functor_name);
342 Kokkos::parallel_for(
347 functor, functor_name);
350 Kokkos::parallel_for(
355 functor, functor_name);
358 Kokkos::parallel_for(
362 functor, functor_name);
366 Kokkos::parallel_for(
372 functor, functor_name);
375 Kokkos::parallel_for(
380 functor, functor_name);
383 Kokkos::parallel_for(
388 functor, functor_name);
391 Kokkos::parallel_for(
395 functor, functor_name);
402 template<
typename Tag,
class C>
405 CallFunctorWithTeamThreadsAndVectors<Tag,C>(functor, batch_size,
_default_threads, 1);
413 CallFunctorWithTeamThreadsAndVectors<C>(functor, batch_size,
_default_threads, 1, functor_name);
416 KOKKOS_INLINE_FUNCTION
425 KOKKOS_INLINE_FUNCTION
434 KOKKOS_INLINE_FUNCTION
443 KOKKOS_INLINE_FUNCTION
std::size_t global_index_type
int _scratch_thread_level_b
higher (slower) level memory for Kokkos::parallel_for for thread access memory
void CallFunctorWithTeamThreads(C functor, const global_index_type batch_size, std::string functor_name=typeid(C).name()) const
Calls a parallel_for parallel_for will break out over loops over teams with each thread executing cod...
void CallFunctorWithTeamThreads(C functor, const global_index_type batch_size) const
Calls a parallel_for parallel_for will break out over loops over teams with each thread executing cod...
int _thread_scratch_size_b
KOKKOS_INLINE_FUNCTION int getTeamScratchLevel(const int level) const
void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team=-1, const int vector_lanes_per_thread=-1) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
int _default_vector_lanes
int _thread_scratch_size_a
void setTeamScratchLevel(const int level, const int value)
KOKKOS_INLINE_FUNCTION int getThreadScratchLevel(const int level) const
void setThreadScratchLevel(const int level, const int value)
KOKKOS_INLINE_FUNCTION int getThreadScratchSize(const int level) const
int _default_threads
largest team size
int _scratch_team_level_b
lowest level memory for Kokkos::parallel_for for thread access memory
void setTeamScratchSize(const int level, const int value)
void setThreadScratchSize(const int level, const int value)
int _scratch_thread_level_a
higher (slower) level memory for Kokkos::parallel_for for team access memory
KOKKOS_INLINE_FUNCTION int getTeamScratchSize(const int level) const
int _scratch_team_level_a
lowest level memory for Kokkos::parallel_for for team access memory
void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team=-1, const int vector_lanes_per_thread=-1, std::string functor_name=typeid(C).name()) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...