Kokkos Core Kernels Package
Version of the Day
core
src
Kokkos_Macros.hpp
1
/*
2
//@HEADER
3
// ************************************************************************
4
//
5
// Kokkos v. 2.0
6
// Copyright (2014) Sandia Corporation
7
//
8
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9
// the U.S. Government retains certain rights in this software.
10
//
11
// Redistribution and use in source and binary forms, with or without
12
// modification, are permitted provided that the following conditions are
13
// met:
14
//
15
// 1. Redistributions of source code must retain the above copyright
16
// notice, this list of conditions and the following disclaimer.
17
//
18
// 2. Redistributions in binary form must reproduce the above copyright
19
// notice, this list of conditions and the following disclaimer in the
20
// documentation and/or other materials provided with the distribution.
21
//
22
// 3. Neither the name of the Corporation nor the names of the
23
// contributors may be used to endorse or promote products derived from
24
// this software without specific prior written permission.
25
//
26
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
//
38
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
39
//
40
// ************************************************************************
41
//@HEADER
42
*/
43
44
#ifndef KOKKOS_MACROS_HPP
45
#define KOKKOS_MACROS_HPP
46
47
//----------------------------------------------------------------------------
61
#ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
62
#include <KokkosCore_config.h>
63
#endif
64
65
#include <impl/Kokkos_OldMacros.hpp>
66
67
//----------------------------------------------------------------------------
97
//----------------------------------------------------------------------------
98
99
#if defined(KOKKOS_ENABLE_SERIAL) || defined(KOKKOS_ENABLE_THREADS) || \
100
defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_QTHREADS) || \
101
defined(KOKKOS_ENABLE_ROCM) || defined(KOKKOS_ENABLE_OPENMPTARGET)
102
#define KOKKOS_INTERNAL_ENABLE_NON_CUDA_BACKEND
103
#endif
104
105
#if !defined(KOKKOS_ENABLE_THREADS) && !defined(KOKKOS_ENABLE_CUDA) && \
106
!defined(KOKKOS_ENABLE_OPENMP) && !defined(KOKKOS_ENABLE_QTHREADS) && \
107
!defined(KOKKOS_ENABLE_ROCM) && !defined(KOKKOS_ENABLE_OPENMPTARGET)
108
#define KOKKOS_INTERNAL_NOT_PARALLEL
109
#endif
110
111
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
112
113
#if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
114
// Compiling with a CUDA compiler.
115
//
116
// Include <cuda.h> to pick up the CUDA_VERSION macro defined as:
117
// CUDA_VERSION = ( MAJOR_VERSION * 1000 ) + ( MINOR_VERSION * 10 )
118
//
119
// When generating device code the __CUDA_ARCH__ macro is defined as:
120
// __CUDA_ARCH__ = ( MAJOR_CAPABILITY * 100 ) + ( MINOR_CAPABILITY * 10 )
121
122
#include <cuda_runtime.h>
123
#include <cuda.h>
124
125
#if !defined( CUDA_VERSION )
126
#error "#include <cuda.h> did not define CUDA_VERSION."
127
#endif
128
129
#if ( CUDA_VERSION < 7000 )
130
// CUDA supports C++11 in device code starting with version 7.0.
131
// This includes auto type and device code internal lambdas.
132
#error "Cuda version 7.0 or greater required."
133
#endif
134
135
#if defined( __CUDA_ARCH__ ) && ( __CUDA_ARCH__ < 300 )
136
// Compiling with CUDA compiler for device code.
137
#error "Cuda device capability >= 3.0 is required."
138
#endif
139
140
#ifdef KOKKOS_ENABLE_CUDA_LAMBDA
141
#if ( CUDA_VERSION < 7050 )
142
// CUDA supports C++11 lambdas generated in host code to be given
143
// to the device starting with version 7.5. But the release candidate (7.5.6)
144
// still identifies as 7.0.
145
#error "Cuda version 7.5 or greater required for host-to-device Lambda support."
146
#endif
147
148
#if ( CUDA_VERSION < 8000 ) && defined( __NVCC__ )
149
#define KOKKOS_LAMBDA [=]__device__
150
#if defined( KOKKOS_INTERNAL_ENABLE_NON_CUDA_BACKEND )
151
#undef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
152
#endif
153
#else
154
#define KOKKOS_LAMBDA [=]__host__ __device__
155
156
#if defined( KOKKOS_ENABLE_CXX1Z )
157
#define KOKKOS_CLASS_LAMBDA [=,*this] __host__ __device__
158
#endif
159
#endif
160
161
#if defined( __NVCC__ )
162
#define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER
163
#endif
164
#else // !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
165
#undef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
166
#endif // !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
167
168
#if ( 9000 <= CUDA_VERSION ) && ( CUDA_VERSION < 10000 )
169
// CUDA 9 introduced an incorrect warning,
170
// see https://github.com/kokkos/kokkos/issues/1470
171
#define KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND
172
#endif
173
#endif // #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
174
175
//----------------------------------------------------------------------------
176
// Language info: C++, CUDA, OPENMP
177
178
#if defined( KOKKOS_ENABLE_CUDA )
179
// Compiling Cuda code to 'ptx'
180
181
#define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
182
#define KOKKOS_INLINE_FUNCTION __device__ __host__ inline
183
#define KOKKOS_FUNCTION __device__ __host__
184
#endif // #if defined( __CUDA_ARCH__ )
185
186
#if defined( KOKKOS_ENABLE_ROCM ) && defined( __HCC__ )
187
188
#define KOKKOS_FORCEINLINE_FUNCTION __attribute__((amp,cpu)) inline
189
#define KOKKOS_INLINE_FUNCTION __attribute__((amp,cpu)) inline
190
#define KOKKOS_FUNCTION __attribute__((amp,cpu))
191
#define KOKKOS_LAMBDA [=] __attribute__((amp,cpu))
192
#endif
193
194
#if defined( _OPENMP )
195
// Compiling with OpenMP.
196
// The value of _OPENMP is an integer value YYYYMM
197
// where YYYY and MM are the year and month designation
198
// of the supported OpenMP API version.
199
#endif // #if defined( _OPENMP )
200
201
//----------------------------------------------------------------------------
202
// Mapping compiler built-ins to KOKKOS_COMPILER_*** macros
203
204
#if defined( __NVCC__ )
205
// NVIDIA compiler is being used.
206
// Code is parsed and separated into host and device code.
207
// Host code is compiled again with another compiler.
208
// Device code is compile to 'ptx'.
209
#define KOKKOS_COMPILER_NVCC __NVCC__
210
#endif // #if defined( __NVCC__ )
211
212
#if !defined( KOKKOS_LAMBDA )
213
#define KOKKOS_LAMBDA [=]
214
#endif
215
216
#if defined( KOKKOS_ENABLE_CXX1Z ) && !defined( KOKKOS_CLASS_LAMBDA )
217
#define KOKKOS_CLASS_LAMBDA [=,*this]
218
#endif
219
220
//#if !defined( __CUDA_ARCH__ ) // Not compiling Cuda code to 'ptx'.
221
222
// Intel compiler for host code.
223
224
#if defined( __INTEL_COMPILER )
225
#define KOKKOS_COMPILER_INTEL __INTEL_COMPILER
226
#elif defined( __ICC )
227
// Old define
228
#define KOKKOS_COMPILER_INTEL __ICC
229
#elif defined( __ECC )
230
// Very old define
231
#define KOKKOS_COMPILER_INTEL __ECC
232
#endif
233
234
// CRAY compiler for host code
235
#if defined( _CRAYC )
236
#define KOKKOS_COMPILER_CRAYC _CRAYC
237
#endif
238
239
#if defined( __IBMCPP__ )
240
// IBM C++
241
#define KOKKOS_COMPILER_IBM __IBMCPP__
242
#elif defined( __IBMC__ )
243
#define KOKKOS_COMPILER_IBM __IBMC__
244
#endif
245
246
#if defined( __APPLE_CC__ )
247
#define KOKKOS_COMPILER_APPLECC __APPLE_CC__
248
#endif
249
250
#if defined( __clang__ ) && !defined( KOKKOS_COMPILER_INTEL )
251
#define KOKKOS_COMPILER_CLANG __clang_major__*100+__clang_minor__*10+__clang_patchlevel__
252
#endif
253
254
#if !defined( __clang__ ) && !defined( KOKKOS_COMPILER_INTEL ) &&defined( __GNUC__ )
255
#define KOKKOS_COMPILER_GNU __GNUC__*100+__GNUC_MINOR__*10+__GNUC_PATCHLEVEL__
256
257
#if ( 472 > KOKKOS_COMPILER_GNU )
258
#error "Compiling with GCC version earlier than 4.7.2 is not supported."
259
#endif
260
#endif
261
262
#if defined( __PGIC__ )
263
#define KOKKOS_COMPILER_PGI __PGIC__*100+__PGIC_MINOR__*10+__PGIC_PATCHLEVEL__
264
265
#if ( 1540 > KOKKOS_COMPILER_PGI )
266
#error "Compiling with PGI version earlier than 15.4 is not supported."
267
#endif
268
#endif
269
270
//#endif // #if !defined( __CUDA_ARCH__ )
271
272
//----------------------------------------------------------------------------
273
// Intel compiler macros
274
275
#if defined( KOKKOS_COMPILER_INTEL )
276
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
277
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
278
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
279
#if ( 1800 > KOKKOS_COMPILER_INTEL )
280
#define KOKKOS_ENABLE_PRAGMA_SIMD 1
281
#endif
282
283
#if ( __INTEL_COMPILER > 1400 )
284
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
285
#endif
286
287
#if ! defined( KOKKOS_MEMORY_ALIGNMENT )
288
#define KOKKOS_MEMORY_ALIGNMENT 64
289
#endif
290
291
#define KOKKOS_RESTRICT __restrict__
292
293
#ifndef KOKKOS_IMPL_ALIGN_PTR
294
#define KOKKOS_IMPL_ALIGN_PTR(size) __attribute__((align_value(size)))
295
#endif
296
297
#if ( 1400 > KOKKOS_COMPILER_INTEL )
298
#if ( 1300 > KOKKOS_COMPILER_INTEL )
299
#error "Compiling with Intel version earlier than 13.0 is not supported. Official minimal version is 14.0."
300
#else
301
#warning "Compiling with Intel version 13.x probably works but is not officially supported. Official minimal version is 14.0."
302
#endif
303
#endif
304
305
#if !defined( KOKKOS_ENABLE_ASM ) && !defined( _WIN32 )
306
#define KOKKOS_ENABLE_ASM 1
307
#endif
308
309
#if !defined( KOKKOS_FORCEINLINE_FUNCTION )
310
#if !defined( _WIN32 )
311
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
312
#else
313
#define KOKKOS_FORCEINLINE_FUNCTION inline
314
#endif
315
#endif
316
317
#if defined( KOKKOS_ARCH_AVX512MIC )
318
#define KOKKOS_ENABLE_RFO_PREFETCH 1
319
#endif
320
321
#if defined( __MIC__ )
322
// Compiling for Xeon Phi
323
#endif
324
#endif
325
326
//----------------------------------------------------------------------------
327
// Cray compiler macros
328
329
#if defined( KOKKOS_COMPILER_CRAYC )
330
#endif
331
332
//----------------------------------------------------------------------------
333
// IBM Compiler macros
334
335
#if defined( KOKKOS_COMPILER_IBM )
336
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
337
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
338
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
339
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
340
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
341
342
#if ! defined( KOKKOS_ENABLE_ASM )
343
#define KOKKOS_ENABLE_ASM 1
344
#endif
345
#endif
346
347
//----------------------------------------------------------------------------
348
// CLANG compiler macros
349
350
#if defined( KOKKOS_COMPILER_CLANG )
351
//#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
352
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
353
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
354
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
355
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
356
357
#if !defined( KOKKOS_FORCEINLINE_FUNCTION )
358
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
359
#endif
360
361
#if !defined( KOKKOS_IMPL_ALIGN_PTR )
362
#define KOKKOS_IMPL_ALIGN_PTR(size) __attribute__((aligned(size)))
363
#endif
364
365
#endif
366
367
//----------------------------------------------------------------------------
368
// GNU Compiler macros
369
370
#if defined( KOKKOS_COMPILER_GNU )
371
//#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
372
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
373
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
374
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
375
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
376
377
#if defined( KOKKOS_ARCH_AVX512MIC )
378
#define KOKKOS_ENABLE_RFO_PREFETCH 1
379
#endif
380
381
#if !defined( KOKKOS_FORCEINLINE_FUNCTION )
382
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
383
#endif
384
385
#if !defined( KOKKOS_ENABLE_ASM ) && !defined( __PGIC__ ) && \
386
( defined( __amd64 ) || defined( __amd64__ ) || \
387
defined( __x86_64 ) || defined( __x86_64__ ) || \
388
defined(__PPC64__) )
389
#define KOKKOS_ENABLE_ASM 1
390
#endif
391
#endif
392
393
//----------------------------------------------------------------------------
394
395
#if defined( KOKKOS_COMPILER_PGI )
396
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
397
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
398
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
399
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
400
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
401
#endif
402
403
//----------------------------------------------------------------------------
404
405
#if defined( KOKKOS_COMPILER_NVCC )
406
#if defined( __CUDA_ARCH__ )
407
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
408
#endif
409
#endif
410
411
//----------------------------------------------------------------------------
412
// Define function marking macros if compiler specific macros are undefined:
413
414
#if !defined( KOKKOS_FORCEINLINE_FUNCTION )
415
#define KOKKOS_FORCEINLINE_FUNCTION inline
416
#endif
417
418
#if !defined( KOKKOS_INLINE_FUNCTION )
419
#define KOKKOS_INLINE_FUNCTION inline
420
#endif
421
422
#if !defined( KOKKOS_FUNCTION )
423
#define KOKKOS_FUNCTION
424
#endif
425
426
//----------------------------------------------------------------------------
427
// Define empty macro for restrict if necessary:
428
429
#if !defined( KOKKOS_RESTRICT )
430
#define KOKKOS_RESTRICT
431
#endif
432
433
//----------------------------------------------------------------------------
434
// Define Macro for alignment:
435
436
#if ! defined( KOKKOS_MEMORY_ALIGNMENT )
437
#define KOKKOS_MEMORY_ALIGNMENT 64
438
#endif
439
440
#if ! defined( KOKKOS_MEMORY_ALIGNMENT_THRESHOLD )
441
#define KOKKOS_MEMORY_ALIGNMENT_THRESHOLD 1
442
#endif
443
444
#if !defined( KOKKOS_IMPL_ALIGN_PTR )
445
#define KOKKOS_IMPL_ALIGN_PTR(size)
/* */
446
#endif
447
448
//----------------------------------------------------------------------------
449
// Determine the default execution space for parallel dispatch.
450
// There is zero or one default execution space specified.
451
452
#if 1 < ( ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
453
( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM ) ? 1 : 0 ) + \
454
( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET ) ? 1 : 0 ) + \
455
( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
456
( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
457
( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) ? 1 : 0 ) + \
458
( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
459
#error "More than one KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_* specified."
460
#endif
461
462
// If default is not specified then chose from enabled execution spaces.
463
// Priority: CUDA, OPENMP, THREADS, QTHREADS, SERIAL
464
#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
465
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM )
466
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET )
467
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
468
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
469
//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS )
470
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
471
#elif defined( KOKKOS_ENABLE_CUDA )
472
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
473
#elif defined( KOKKOS_ENABLE_ROCM )
474
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM
475
#elif defined( KOKKOS_ENABLE_OPENMPTARGET )
476
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET
477
#elif defined( KOKKOS_ENABLE_OPENMP )
478
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP
479
#elif defined( KOKKOS_ENABLE_THREADS )
480
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS
481
//#elif defined( KOKKOS_ENABLE_QTHREADS )
482
// #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS
483
#else
484
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL
485
#endif
486
487
//----------------------------------------------------------------------------
488
// Determine for what space the code is being compiled:
489
490
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined( KOKKOS_ENABLE_CUDA )
491
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA
492
#elif defined( __HCC__ ) && defined( __HCC_ACCELERATOR__ ) && defined( KOKKOS_ENABLE_ROCM )
493
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_ROCM_GPU
494
#else
495
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
496
#endif
497
498
//----------------------------------------------------------------------------
499
500
#if ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \
501
( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 )
502
#if defined( KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN )
503
#define KOKKOS_ENABLE_POSIX_MEMALIGN 1
504
#endif
505
#endif
506
507
//----------------------------------------------------------------------------
508
// If compiling with CUDA then must be using CUDA 8 or better
509
// and use relocateable device code to enable the task policy.
510
// nvcc relocatable device code option: --relocatable-device-code=true
511
512
#if ( defined( KOKKOS_ENABLE_CUDA ) )
513
#if ( 8000 <= CUDA_VERSION ) && defined( KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE )
514
#define KOKKOS_ENABLE_TASKDAG
515
#endif
516
#else
517
#define KOKKOS_ENABLE_TASKDAG
518
#endif
519
520
521
#if defined ( KOKKOS_ENABLE_CUDA )
522
#if ( 9000 <= CUDA_VERSION )
523
#define KOKKOS_IMPL_CUDA_VERSION_9_WORKAROUND
524
#endif
525
#endif
526
527
#define KOKKOS_INVALID_INDEX (~std::size_t(0))
528
529
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
530
#define KOKKOS_IMPL_CTOR_DEFAULT_ARG 0
531
#else
532
#define KOKKOS_IMPL_CTOR_DEFAULT_ARG KOKKOS_INVALID_INDEX
533
#endif
534
535
536
537
#endif // #ifndef KOKKOS_MACROS_HPP
538
Generated by
1.8.14