Kokkos Core Kernels Package  Version of the Day
KokkosExp_MDRangePolicy.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 2.0
6 // Copyright (2014) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
44 #ifndef KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
45 #define KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
46 
47 #include <initializer_list>
48 
49 #include<impl/KokkosExp_Host_IterateTile.hpp>
50 #include <Kokkos_ExecPolicy.hpp>
51 #include <Kokkos_Parallel.hpp>
52 
53 #if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
54 #include<Cuda/KokkosExp_Cuda_IterateTile.hpp>
55 #include <Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp>
56 #endif
57 
58 #if defined( __HCC__ ) && defined( KOKKOS_ENABLE_ROCM )
59 //#include<ROCm/KokkosExp_ROCm_IterateTile.hpp>
60 #include <ROCm/KokkosExp_ROCm_IterateTile_Refactor.hpp>
61 #endif
62 
63 namespace Kokkos {
64 
65 // ------------------------------------------------------------------ //
66 
67 enum class Iterate
68 {
69  Default, // Default for the device
70  Left, // Left indices stride fastest
71  Right, // Right indices stride fastest
72 };
73 
74 template <typename ExecSpace>
75 struct default_outer_direction
76 {
77  using type = Iterate;
78  #if defined( KOKKOS_ENABLE_CUDA)||defined( KOKKOS_ENABLE_ROCM)
79  static constexpr Iterate value = Iterate::Left;
80  #else
81  static constexpr Iterate value = Iterate::Right;
82  #endif
83 };
84 
85 template <typename ExecSpace>
86 struct default_inner_direction
87 {
88  using type = Iterate;
89  #if defined( KOKKOS_ENABLE_CUDA)||defined( KOKKOS_ENABLE_ROCM)
90  static constexpr Iterate value = Iterate::Left;
91  #else
92  static constexpr Iterate value = Iterate::Right;
93  #endif
94 };
95 
96 
97 // Iteration Pattern
98 template < unsigned N
99  , Iterate OuterDir = Iterate::Default
100  , Iterate InnerDir = Iterate::Default
101  >
102 struct Rank
103 {
104  static_assert( N != 0u, "Kokkos Error: rank 0 undefined");
105  static_assert( N != 1u, "Kokkos Error: rank 1 is not a multi-dimensional range");
106  static_assert( N < 7u, "Kokkos Error: Unsupported rank...");
107 
108  using iteration_pattern = Rank<N, OuterDir, InnerDir>;
109 
110  static constexpr int rank = N;
111  static constexpr Iterate outer_direction = OuterDir;
112  static constexpr Iterate inner_direction = InnerDir;
113 };
114 
115 
116 // multi-dimensional iteration pattern
117 template <typename... Properties>
118 struct MDRangePolicy
119  : public Kokkos::Impl::PolicyTraits<Properties ...>
120 {
121  using traits = Kokkos::Impl::PolicyTraits<Properties ...>;
122  using range_policy = RangePolicy<Properties...>;
123 
124  using impl_range_policy = RangePolicy< typename traits::execution_space
125  , typename traits::schedule_type
126  , typename traits::index_type
127  > ;
128 
129  typedef MDRangePolicy execution_policy; // needed for is_execution_space interrogation
130 
131  static_assert( !std::is_same<typename traits::iteration_pattern,void>::value
132  , "Kokkos Error: MD iteration pattern not defined" );
133 
134  using iteration_pattern = typename traits::iteration_pattern;
135  using work_tag = typename traits::work_tag;
136  using launch_bounds = typename traits::launch_bounds;
137  using member_type = typename range_policy::member_type;
138 
139  enum { rank = static_cast<int>(iteration_pattern::rank) };
140 
141  using index_type = typename traits::index_type;
142  using array_index_type = long;
143  using point_type = Kokkos::Array<array_index_type,rank>; //was index_type
144  using tile_type = Kokkos::Array<array_index_type,rank>;
145  // If point_type or tile_type is not templated on a signed integral type (if it is unsigned),
146  // then if user passes in intializer_list of runtime-determined values of
147  // signed integral type that are not const will receive a compiler error due
148  // to an invalid case for implicit conversion -
149  // "conversion from integer or unscoped enumeration type to integer type that cannot represent all values of the original, except where source is a constant expression whose value can be stored exactly in the target type"
150  // This would require the user to either pass a matching index_type parameter
151  // as template parameter to the MDRangePolicy or static_cast the individual values
152 
153  point_type m_lower;
154  point_type m_upper;
155  tile_type m_tile;
156  point_type m_tile_end;
157  index_type m_num_tiles;
158  index_type m_prod_tile_dims;
159 
160 /*
161  // NDE enum impl definition alternative - replace static constexpr int ?
162  enum { outer_direction = static_cast<int> (
163  (iteration_pattern::outer_direction != Iterate::Default)
164  ? iteration_pattern::outer_direction
165  : default_outer_direction< typename traits::execution_space>::value ) };
166 
167  enum { inner_direction = static_cast<int> (
168  iteration_pattern::inner_direction != Iterate::Default
169  ? iteration_pattern::inner_direction
170  : default_inner_direction< typename traits::execution_space>::value ) };
171 
172  enum { Right = static_cast<int>( Iterate::Right ) };
173  enum { Left = static_cast<int>( Iterate::Left ) };
174 */
175  //static constexpr int rank = iteration_pattern::rank;
176 
177  static constexpr int outer_direction = static_cast<int> (
178  (iteration_pattern::outer_direction != Iterate::Default)
179  ? iteration_pattern::outer_direction
180  : default_outer_direction< typename traits::execution_space>::value );
181 
182  static constexpr int inner_direction = static_cast<int> (
183  iteration_pattern::inner_direction != Iterate::Default
184  ? iteration_pattern::inner_direction
185  : default_inner_direction< typename traits::execution_space>::value ) ;
186 
187  // Ugly ugly workaround intel 14 not handling scoped enum correctly
188  static constexpr int Right = static_cast<int>( Iterate::Right );
189  static constexpr int Left = static_cast<int>( Iterate::Left );
190 
191  MDRangePolicy( point_type const& lower, point_type const& upper, tile_type const& tile = tile_type{} )
192  : m_lower(lower)
193  , m_upper(upper)
194  , m_tile(tile)
195  , m_num_tiles(1)
196  , m_prod_tile_dims(1)
197  {
198  // Host
199  if ( true
200  #if defined(KOKKOS_ENABLE_CUDA)
201  && !std::is_same< typename traits::execution_space, Kokkos::Cuda >::value
202  #endif
203  #if defined(KOKKOS_ENABLE_ROCM)
204  && !std::is_same< typename traits::execution_space, Kokkos::Experimental::ROCm >::value
205  #endif
206  )
207  {
208  index_type span;
209  for (int i=0; i<rank; ++i) {
210  span = upper[i] - lower[i];
211  if ( m_tile[i] <= 0 ) {
212  if ( ((int)inner_direction == (int)Right && (i < rank-1))
213  || ((int)inner_direction == (int)Left && (i > 0)) )
214  {
215  m_tile[i] = 2;
216  }
217  else {
218  m_tile[i] = (span == 0 ? 1 : span);
219  }
220  }
221  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
222  m_num_tiles *= m_tile_end[i];
223  m_prod_tile_dims *= m_tile[i];
224  }
225  }
226  #if defined(KOKKOS_ENABLE_CUDA)
227  else // Cuda
228  {
229  index_type span;
230  int increment = 1;
231  int rank_start = 0;
232  int rank_end = rank;
233  if((int)inner_direction == (int)Right) {
234  increment = -1;
235  rank_start = rank-1;
236  rank_end = -1;
237  }
238  for (int i=rank_start; i!=rank_end; i+=increment) {
239  span = m_upper[i] - m_lower[i];
240  if ( m_tile[i] <= 0 ) {
241  // TODO: determine what is a good default tile size for cuda
242  // may be rank dependent
243  if ( ((int)inner_direction == (int)Right && (i < rank-1))
244  || ((int)inner_direction == (int)Left && (i > 0)) )
245  {
246  if ( m_prod_tile_dims < 256 ) {
247  m_tile[i] = 2;
248  } else {
249  m_tile[i] = 1;
250  }
251  }
252  else {
253  m_tile[i] = 16;
254  }
255  }
256  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
257  m_num_tiles *= m_tile_end[i];
258  m_prod_tile_dims *= m_tile[i];
259  }
260  if ( m_prod_tile_dims > 1024 ) { // Match Cuda restriction for ParallelReduce; 1024,1024,64 max per dim (Kepler), but product num_threads < 1024
261  printf(" Tile dimensions exceed Cuda limits\n");
262  Kokkos::abort(" Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
263  //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
264  }
265  }
266  #endif
267  #if defined(KOKKOS_ENABLE_ROCM)
268  else // ROCm
269  {
270  index_type span;
271  int increment = 1;
272  int rank_start = 0;
273  int rank_end = rank;
274  if((int)inner_direction == (int)Right) {
275  increment = -1;
276  rank_start = rank-1;
277  rank_end = -1;
278  }
279  for (int i=rank_start; i!=rank_end; i+=increment) {
280  span = m_upper[i] - m_lower[i];
281  if ( m_tile[i] <= 0 ) {
282  // TODO: determine what is a good default tile size for rocm
283  // may be rank dependent
284  if ( ((int)inner_direction == (int)Right && (i < rank-1))
285  || ((int)inner_direction == (int)Left && (i > 0)) )
286  {
287  if ( m_prod_tile_dims < 256 ) {
288  m_tile[i] = 4;
289  } else {
290  m_tile[i] = 1;
291  }
292  }
293  else {
294  m_tile[i] = 16;
295  }
296  }
297  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
298  m_num_tiles *= m_tile_end[i];
299  m_prod_tile_dims *= m_tile[i];
300  }
301  if ( m_prod_tile_dims > 1024 ) { //but product num_threads < 1024
302  printf(" Tile dimensions exceed ROCm limits\n");
303  Kokkos::abort(" ROCm ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
304  //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
305  }
306  }
307  #endif
308  }
309 
310 
311  template < typename LT , typename UT , typename TT = array_index_type >
312  MDRangePolicy( std::initializer_list<LT> const& lower, std::initializer_list<UT> const& upper, std::initializer_list<TT> const& tile = {} )
313  {
314 
315  if(static_cast<int>(m_lower.size()) != rank || static_cast<int>(m_upper.size()) != rank)
316  Kokkos::abort("MDRangePolicy: Constructor initializer lists have wrong size");
317 
318  for ( auto i = 0; i < rank; ++i ) {
319  m_lower[i] = static_cast<array_index_type>(lower.begin()[i]);
320  m_upper[i] = static_cast<array_index_type>(upper.begin()[i]);
321  if(static_cast<int>(tile.size())==rank)
322  m_tile[i] = static_cast<array_index_type>(tile.begin()[i]);
323  else
324  m_tile[i] = 0;
325  }
326 
327  m_num_tiles = 1;
328  m_prod_tile_dims = 1;
329 
330  // Host
331  if ( true
332  #if defined(KOKKOS_ENABLE_CUDA)
333  && !std::is_same< typename traits::execution_space, Kokkos::Cuda >::value
334  #endif
335  #if defined(KOKKOS_ENABLE_ROCM)
336  && !std::is_same< typename traits::execution_space, Kokkos::Experimental::ROCm >::value
337  #endif
338  )
339  {
340  index_type span;
341  for (int i=0; i<rank; ++i) {
342  span = m_upper[i] - m_lower[i];
343  if ( m_tile[i] <= 0 ) {
344  if ( ((int)inner_direction == (int)Right && (i < rank-1))
345  || ((int)inner_direction == (int)Left && (i > 0)) )
346  {
347  m_tile[i] = 2;
348  }
349  else {
350  m_tile[i] = (span == 0 ? 1 : span);
351  }
352  }
353  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
354  m_num_tiles *= m_tile_end[i];
355  m_prod_tile_dims *= m_tile[i];
356  }
357  }
358  #if defined(KOKKOS_ENABLE_CUDA)
359  else // Cuda
360  {
361  index_type span;
362  int increment = 1;
363  int rank_start = 0;
364  int rank_end = rank;
365  if((int)inner_direction == (int)Right) {
366  increment = -1;
367  rank_start = rank-1;
368  rank_end = -1;
369  }
370  for (int i=rank_start; i!=rank_end; i+=increment) {
371  span = m_upper[i] - m_lower[i];
372  if ( m_tile[i] <= 0 ) {
373  // TODO: determine what is a good default tile size for cuda
374  // may be rank dependent
375  if ( ((int)inner_direction == (int)Right && (i < rank-1))
376  || ((int)inner_direction == (int)Left && (i > 0)) )
377  {
378  if ( m_prod_tile_dims < 256 ) {
379  m_tile[i] = 2;
380  } else {
381  m_tile[i] = 1;
382  }
383  }
384  else {
385  m_tile[i] = 16;
386  }
387  }
388  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
389  m_num_tiles *= m_tile_end[i];
390  m_prod_tile_dims *= m_tile[i];
391  }
392  if ( m_prod_tile_dims > 1024 ) { // Match Cuda restriction for ParallelReduce; 1024,1024,64 max per dim (Kepler), but product num_threads < 1024
393  printf(" Tile dimensions exceed Cuda limits\n");
394  Kokkos::abort(" Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
395  //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
396  }
397  }
398  #endif
399  #if defined(KOKKOS_ENABLE_ROCM)
400  else // ROCm
401  {
402  index_type span;
403  int increment = 1;
404  int rank_start = 0;
405  int rank_end = rank;
406  if((int)inner_direction == (int)Right) {
407  increment = -1;
408  rank_start = rank-1;
409  rank_end = -1;
410  }
411  for (int i=rank_start; i!=rank_end; i+=increment) {
412  span = m_upper[i] - m_lower[i];
413  if ( m_tile[i] <= 0 ) {
414  // TODO: determine what is a good default tile size for cuda
415  // may be rank dependent
416  if ( ((int)inner_direction == (int)Right && (i < rank-1))
417  || ((int)inner_direction == (int)Left && (i > 0)) )
418  {
419  if ( m_prod_tile_dims < 256 ) {
420  m_tile[i] = 2;
421  } else {
422  m_tile[i] = 1;
423  }
424  }
425  else {
426  m_tile[i] = 16;
427  }
428  }
429  m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]);
430  m_num_tiles *= m_tile_end[i];
431  m_prod_tile_dims *= m_tile[i];
432  }
433  if ( m_prod_tile_dims > 1024 ) { // Match ROCm restriction for ParallelReduce; 1024,1024,1024 max per dim , but product num_threads < 1024
434  printf(" Tile dimensions exceed ROCm limits\n");
435  Kokkos::abort(" ROCm ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
436  //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
437  }
438  }
439  #endif
440  }
441 
442 };
443 
444 } // namespace Kokkos
445 
446 // For backward compatibility
447 namespace Kokkos { namespace Experimental {
448  using Kokkos::MDRangePolicy;
449  using Kokkos::Rank;
450  using Kokkos::Iterate;
451 } } // end Kokkos::Experimental
452 // ------------------------------------------------------------------ //
453 
454 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
455 // ------------------------------------------------------------------ //
456 //md_parallel_for - deprecated use parallel_for
457 // ------------------------------------------------------------------ //
458 
459 namespace Kokkos { namespace Experimental {
460 
461 template <typename MDRange, typename Functor, typename Enable = void>
462 void md_parallel_for( MDRange const& range
463  , Functor const& f
464  , const std::string& str = ""
465  , typename std::enable_if<( true
466  #if defined( KOKKOS_ENABLE_CUDA)
467  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
468  #endif
469  #if defined( KOKKOS_ENABLE_ROCM)
470  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
471  #endif
472  ) >::type* = 0
473  )
474 {
475  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, void> g(range, f);
476 
477  using range_policy = typename MDRange::impl_range_policy;
478 
479  Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str );
480 }
481 
482 template <typename MDRange, typename Functor>
483 void md_parallel_for( const std::string& str
484  , MDRange const& range
485  , Functor const& f
486  , typename std::enable_if<( true
487  #if defined( KOKKOS_ENABLE_CUDA)
488  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
489  #endif
490  #if defined( KOKKOS_ENABLE_ROCM)
491  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
492  #endif
493  ) >::type* = 0
494  )
495 {
496  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, void> g(range, f);
497 
498  using range_policy = typename MDRange::impl_range_policy;
499 
500  Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str );
501 }
502 
503 // Cuda specialization
504 #if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
505 template <typename MDRange, typename Functor>
506 void md_parallel_for( const std::string& str
507  , MDRange const& range
508  , Functor const& f
509  , typename std::enable_if<( true
510  #if defined( KOKKOS_ENABLE_CUDA)
511  && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
512  #endif
513  ) >::type* = 0
514  )
515 {
516  Kokkos::Impl::DeviceIterateTile<MDRange, Functor, typename MDRange::work_tag> closure(range, f);
517  closure.execute();
518 }
519 
520 template <typename MDRange, typename Functor>
521 void md_parallel_for( MDRange const& range
522  , Functor const& f
523  , const std::string& str = ""
524  , typename std::enable_if<( true
525  #if defined( KOKKOS_ENABLE_CUDA)
526  && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
527  #endif
528  ) >::type* = 0
529  )
530 {
531  Kokkos::Impl::DeviceIterateTile<MDRange, Functor, typename MDRange::work_tag> closure(range, f);
532  closure.execute();
533 }
534 #endif
535 // ------------------------------------------------------------------ //
536 
537 // ------------------------------------------------------------------ //
538 //md_parallel_reduce - deprecated use parallel_reduce
539 // ------------------------------------------------------------------ //
540 template <typename MDRange, typename Functor, typename ValueType>
541 void md_parallel_reduce( MDRange const& range
542  , Functor const& f
543  , ValueType & v
544  , const std::string& str = ""
545  , typename std::enable_if<( true
546  #if defined( KOKKOS_ENABLE_CUDA)
547  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
548  #endif
549  #if defined( KOKKOS_ENABLE_ROCM)
550  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
551  #endif
552  ) >::type* = 0
553  )
554 {
555  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, ValueType> g(range, f);
556 
557  using range_policy = typename MDRange::impl_range_policy;
558  Kokkos::parallel_reduce( str, range_policy(0, range.m_num_tiles).set_chunk_size(1), g, v );
559 }
560 
561 template <typename MDRange, typename Functor, typename ValueType>
562 void md_parallel_reduce( const std::string& str
563  , MDRange const& range
564  , Functor const& f
565  , ValueType & v
566  , typename std::enable_if<( true
567  #if defined( KOKKOS_ENABLE_CUDA)
568  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
569  #endif
570  #if defined( KOKKOS_ENABLE_ROCM)
571  && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Experimental::ROCm>::value
572  #endif
573  ) >::type* = 0
574  )
575 {
576  Kokkos::Impl::Experimental::MDFunctor<MDRange, Functor, ValueType> g(range, f);
577 
578  using range_policy = typename MDRange::impl_range_policy;
579 
580  Kokkos::parallel_reduce( str, range_policy(0, range.m_num_tiles).set_chunk_size(1), g, v );
581 }
582 
583 // Cuda - md_parallel_reduce not implemented - use parallel_reduce
584 
585 } } // namespace Kokkos::Experimental
586 #endif
587 
588 #endif //KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
589 
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
Parallel reduction.
Declaration of parallel operators.
KOKKOS_INLINE_FUNCTION constexpr unsigned rank(const View< D, P... > &V)
Temporary free function rank() until rank() is implemented in the View.