48 #include "Kokkos_Core.hpp" 51 #include "Teuchos_CommandLineProcessor.hpp" 52 #include "Teuchos_StandardCatchMacros.hpp" 53 #ifdef KOKKOS_ENABLE_CUDA 54 #include "cuda_runtime_api.h" 58 #include <sys/types.h> 61 int main(
int argc,
char *argv[])
68 Teuchos::CommandLineProcessor CLP;
70 "This test performance of MP::Vector FEM assembly.\n");
72 CLP.setOption(
"n", &nGrid,
"Number of mesh points in each direction. Set to zero to use a range");
74 CLP.setOption(
"n-begin", &nGridBegin,
"Beginning number of mesh points in each direction.");
76 CLP.setOption(
"n-end", &nGridEnd,
"Ending number of mesh points in each direction.");
78 CLP.setOption(
"n-step", &nGridStep,
"Increment in number of mesh points in each direction.");
80 CLP.setOption(
"ni", &nIter,
"Number of assembly iterations");
82 CLP.setOption(
"print",
"no-print", &print,
"Print debugging output");
84 CLP.setOption(
"check",
"no-check", &
check,
"Check correctness");
85 bool quadratic =
false;
86 CLP.setOption(
"quadratic",
"linear", &quadratic,
"Use quadratic basis functions");
88 CLP.setOption(
"cores", &num_cores,
89 "Number of CPU cores to use (defaults to all)");
90 int num_hyper_threads = -1;
91 CLP.setOption(
"hyperthreads", &num_hyper_threads,
92 "Number of hyper threads per core to use (defaults to all)");
93 #ifdef KOKKOS_ENABLE_THREADS 95 CLP.setOption(
"threads",
"no-threads", &threads,
"Enable Threads device");
97 #ifdef KOKKOS_ENABLE_OPENMP 99 CLP.setOption(
"openmp",
"no-openmp", &openmp,
"Enable OpenMP device");
101 #ifdef KOKKOS_ENABLE_CUDA 103 CLP.setOption(
"cuda",
"no-cuda", &cuda,
"Enable Cuda device");
105 CLP.setOption(
"device", &device_id,
"CUDA device ID.");
108 CLP.setOption(
"vtune",
"no-vtune", &vtune,
"connect to vtune");
109 CLP.parse( argc, argv );
118 std::stringstream cmd;
119 pid_t my_os_pid=getpid();
120 const std::string vtune_loc =
122 const std::string output_dir =
"./vtune/vtune.0";
124 <<
" -collect hotspots -result-dir " << output_dir
125 <<
" -target-pid " << my_os_pid <<
" &";
126 std::cout << cmd.str() << std::endl;
127 system(cmd.str().c_str());
131 Kokkos::initialize(argc,argv);
132 #ifdef KOKKOS_ENABLE_THREADS 134 typedef Kokkos::Threads Device;
136 std::cout << std::endl
137 <<
"Threads performance with " << Kokkos::Threads::concurrency()
138 <<
" threads:" << std::endl;
140 performance_test_driver<Device>(
141 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check);
145 #ifdef KOKKOS_ENABLE_OPENMP 147 typedef Kokkos::OpenMP Device;
149 std::cout << std::endl
150 <<
"OpenMP performance with " << Kokkos::OpenMP::concurrency()
151 <<
" threads:" << std::endl;
153 performance_test_driver<Device>(
154 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check);
159 #ifdef KOKKOS_ENABLE_CUDA 161 typedef Kokkos::Cuda Device;
163 cudaDeviceProp deviceProp;
164 cudaGetDeviceProperties(&deviceProp, device_id);
165 std::cout << std::endl
166 <<
"CUDA performance performance with device " << device_id
168 << deviceProp.name <<
"):" 171 performance_test_driver<Device>(
172 print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check);
178 TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
int main(int argc, char *argv[])
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)