46 #ifndef MUELU_AMGXOPERATOR_DECL_HPP 47 #define MUELU_AMGXOPERATOR_DECL_HPP 49 #if defined (HAVE_MUELU_AMGX) 50 #include <Teuchos_ParameterList.hpp> 52 #include <Tpetra_Operator.hpp> 53 #include <Tpetra_CrsMatrix.hpp> 54 #include <Tpetra_MultiVector.hpp> 55 #include <Tpetra_Distributor.hpp> 56 #include <Tpetra_HashTable.hpp> 57 #include <Tpetra_Import.hpp> 58 #include <Tpetra_Import_Util.hpp> 62 #include "MueLu_TpetraOperator.hpp" 65 #include <cuda_runtime.h> 81 class AMGXOperator :
public TpetraOperator<Scalar, LocalOrdinal, GlobalOrdinal, Node>,
public BaseClass {
88 typedef Tpetra::Map<LO,GO,NO>
Map;
97 AMGXOperator(
const Teuchos::RCP<Tpetra::CrsMatrix<SC,LO,GO,NO> > &InA, Teuchos::ParameterList ¶mListIn) { }
120 Scalar alpha = Teuchos::ScalarTraits<Scalar>::one(),
Scalar beta = Teuchos::ScalarTraits<Scalar>::zero())
const {
150 typedef Tpetra::Map<LO,GO,NO>
Map;
155 const int* nbrs,
const Map& map,
const std::string& label) {
156 for (
int p = 0; p < comm->getSize(); p++) {
157 if (comm->getRank() == p) {
158 std::cout <<
"========\n" << label <<
", lid (gid), PID " << p <<
"\n========" << std::endl;
160 for (
size_t i = 0; i < vec.size(); ++i) {
161 std::cout <<
" neighbor " << nbrs[i] <<
" :";
162 for (
size_t j = 0; j < vec[i].size(); ++j)
163 std::cout <<
" " << vec[i][j] <<
" (" << map.getGlobalElement(perm[vec[i][j]]) <<
")";
164 std::cout << std::endl;
166 std::cout << std::endl;
178 AMGXOperator(
const Teuchos::RCP<Tpetra::CrsMatrix<SC,LO,GO,NO> > &inA, Teuchos::ParameterList ¶mListIn) {
179 RCP<const Teuchos::Comm<int> > comm = inA->getRowMap()->getComm();
180 int numProcs = comm->getSize();
181 int myRank = comm->getRank();
184 RCP<Teuchos::Time> amgxTimer = Teuchos::TimeMonitor::getNewTimer(
"MueLu: AMGX: initialize");
193 AMGX_SAFE_CALL(AMGX_install_signal_handler());
194 Teuchos::ParameterList configs = paramListIn.sublist(
"amgx:params",
true);
195 if (configs.isParameter(
"json file")) {
196 AMGX_SAFE_CALL(AMGX_config_create_from_file(&Config_, (
const char *) &configs.get<std::string>(
"json file")[0]));
198 std::ostringstream oss;
200 ParameterList::ConstIterator itr;
201 for (itr = configs.begin(); itr != configs.end(); ++itr) {
202 const std::string& name = configs.name(itr);
203 const ParameterEntry& entry = configs.entry(itr);
204 oss << name <<
"=" << filterValueToString(entry) <<
", ";
207 std::string configString = oss.str();
208 if (configString ==
"") {
212 AMGX_SAFE_CALL(AMGX_config_create(&Config_, configString.c_str()));
224 RCP<const Teuchos::MpiComm<int> > tmpic = Teuchos::rcp_dynamic_cast<
const Teuchos::MpiComm<int> >(comm->duplicate());
227 RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
228 MPI_Comm mpiComm = *rawMpiComm;
233 AMGX_resources_create_simple(&Resources_, Config_);
237 cudaGetDeviceCount(&numGPUDevices);
238 int device[] = {(comm->getRank() % numGPUDevices)};
240 AMGX_config_add_parameters(&Config_,
"communicator=MPI");
242 AMGX_resources_create(&Resources_, Config_, &mpiComm, 1, device);
244 AMGX_resources_create(&Resources_, Config_, MPI_COMM_WORLD, 1, device);
248 AMGX_Mode mode = AMGX_mode_dDDI;
249 AMGX_solver_create(&Solver_, Resources_, mode, Config_);
250 AMGX_matrix_create(&A_, Resources_, mode);
251 AMGX_vector_create(&X_, Resources_, mode);
252 AMGX_vector_create(&Y_, Resources_, mode);
255 amgxTimer->incrementNumCalls();
257 std::vector<int> amgx2muelu;
261 RCP<const Tpetra::Import<LO,GO,NO> > importer = inA->getCrsGraph()->getImporter();
265 Tpetra::Distributor distributor = importer->getDistributor();
267 Array<int> sendRanks = distributor.getProcsTo();
268 Array<int> recvRanks = distributor.getProcsFrom();
270 std::sort(sendRanks.begin(), sendRanks.end());
271 std::sort(recvRanks.begin(), recvRanks.end());
274 if (sendRanks.size() != recvRanks.size()) {
277 for (
int i = 0; i < sendRanks.size(); i++) {
278 if (recvRanks[i] != sendRanks[i])
283 TEUCHOS_TEST_FOR_EXCEPTION(!match,
MueLu::Exceptions::RuntimeError,
"AMGX requires that the processors that we send to and receive from are the same. " 284 "This is not the case: we send to {" << sendRanks <<
"} and receive from {" << recvRanks <<
"}");
286 int num_neighbors = sendRanks.size();
287 const int* neighbors = &sendRanks[0];
293 Tpetra::Details::HashTable<int,int> hashTable(3*num_neighbors);
294 for (
int i = 0; i < num_neighbors; i++)
295 hashTable.add(neighbors[i], i);
298 ArrayView<const int> exportLIDs = importer->getExportLIDs();
299 ArrayView<const int> exportPIDs = importer->getExportPIDs();
300 Array<int> importPIDs;
301 Tpetra::Import_Util::getPids(*importer, importPIDs,
true);
304 RCP<const Map> rowMap = inA->getRowMap();
305 RCP<const Map> colMap = inA->getColMap();
307 int N = rowMap->getNodeNumElements(), Nc = colMap->getNodeNumElements();
308 muelu2amgx_.resize(Nc, -1);
310 int numUniqExports = 0;
311 for (
int i = 0; i < exportLIDs.size(); i++)
312 if (muelu2amgx_[exportLIDs[i]] == -1) {
314 muelu2amgx_[exportLIDs[i]] = -2;
317 int localOffset = 0, exportOffset = N - numUniqExports;
319 for (
int i = 0; i < exportLIDs.size(); i++)
320 if (muelu2amgx_[exportLIDs[i]] < 0)
321 muelu2amgx_[exportLIDs[i]] = exportOffset++;
323 for (
int i = 0; i < N; i++)
324 if (muelu2amgx_[i] == -1)
325 muelu2amgx_[i] = localOffset++;
327 int importOffset = N;
328 for (
int k = 0; k < num_neighbors; k++)
329 for (
int i = 0; i < importPIDs.size(); i++)
330 if (importPIDs[i] != -1 && hashTable.get(importPIDs[i]) == k)
331 muelu2amgx_[i] = importOffset++;
333 amgx2muelu.resize(muelu2amgx_.size());
334 for (
int i = 0; i < (int)muelu2amgx_.size(); i++)
335 amgx2muelu[muelu2amgx_[i]] = i;
338 std::vector<std::vector<int> > sendDatas (num_neighbors);
339 std::vector<int> send_sizes(num_neighbors, 0);
340 for (
int i = 0; i < exportPIDs.size(); i++) {
341 int index = hashTable.get(exportPIDs[i]);
342 sendDatas [index].push_back(muelu2amgx_[exportLIDs[i]]);
347 std::vector<const int*> send_maps(num_neighbors);
348 for (
int i = 0; i < num_neighbors; i++)
349 send_maps[i] = &(sendDatas[i][0]);
355 std::vector<std::vector<int> > recvDatas (num_neighbors);
356 std::vector<int> recv_sizes(num_neighbors, 0);
357 for (
int i = 0; i < importPIDs.size(); i++)
358 if (importPIDs[i] != -1) {
359 int index = hashTable.get(importPIDs[i]);
360 recvDatas [index].push_back(muelu2amgx_[i]);
365 std::vector<const int*> recv_maps(num_neighbors);
366 for (
int i = 0; i < num_neighbors; i++)
367 recv_maps[i] = &(recvDatas[i][0]);
372 AMGX_SAFE_CALL(AMGX_matrix_comm_from_maps_one_ring(A_, 1, num_neighbors, neighbors, &send_sizes[0], &send_maps[0], &recv_sizes[0], &recv_maps[0]));
374 AMGX_vector_bind(X_, A_);
375 AMGX_vector_bind(Y_, A_);
378 RCP<Teuchos::Time> matrixTransformTimer = Teuchos::TimeMonitor::getNewTimer(
"MueLu: AMGX: transform matrix");
379 matrixTransformTimer->start();
381 ArrayRCP<const size_t> ia_s;
382 ArrayRCP<const int> ja;
383 ArrayRCP<const double> a;
384 inA->getAllValues(ia_s, ja, a);
386 ArrayRCP<int> ia(ia_s.size());
387 for (
int i = 0; i < ia.size(); i++)
388 ia[i] = Teuchos::as<int>(ia_s[i]);
390 N_ = inA->getNodeNumRows();
391 int nnz = inA->getNodeNumEntries();
393 matrixTransformTimer->stop();
394 matrixTransformTimer->incrementNumCalls();
399 RCP<Teuchos::Time> matrixTimer = Teuchos::TimeMonitor::getNewTimer(
"MueLu: AMGX: transfer matrix CPU->GPU");
400 matrixTimer->start();
402 AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia[0], &ja[0], &a[0], NULL);
406 std::vector<int> ia_new(ia.size());
407 std::vector<int> ja_new(ja.size());
408 std::vector<double> a_new (a.size());
411 for (
int i = 0; i < N_; i++) {
412 int oldRow = amgx2muelu[i];
414 ia_new[i+1] = ia_new[i] + (ia[oldRow+1] - ia[oldRow]);
416 for (
int j = ia[oldRow]; j < ia[oldRow+1]; j++) {
417 int offset = j - ia[oldRow];
418 ja_new[ia_new[i] + offset] = muelu2amgx_[ja[j]];
419 a_new [ia_new[i] + offset] = a[j];
427 for (
int j = ia_new[i]; j < ia_new[i+1]-1; j++)
428 if (ja_new[j] > ja_new[j+1]) {
429 std::swap(ja_new[j], ja_new[j+1]);
430 std::swap(a_new [j], a_new [j+1]);
433 }
while (swapped ==
true);
436 AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia_new[0], &ja_new[0], &a_new[0], NULL);
439 matrixTimer->incrementNumCalls();
441 domainMap_ = inA->getDomainMap();
442 rangeMap_ = inA->getRangeMap();
444 RCP<Teuchos::Time> realSetupTimer = Teuchos::TimeMonitor::getNewTimer(
"MueLu: AMGX: setup (total)");
445 realSetupTimer->start();
446 AMGX_solver_setup(Solver_, A_);
447 realSetupTimer->stop();
448 realSetupTimer->incrementNumCalls();
450 vectorTimer1_ = Teuchos::TimeMonitor::getNewTimer(
"MueLu: AMGX: transfer vectors CPU->GPU");
451 vectorTimer2_ = Teuchos::TimeMonitor::getNewTimer(
"MueLu: AMGX: transfer vector GPU->CPU");
452 solverTimer_ = Teuchos::TimeMonitor::getNewTimer(
"MueLu: AMGX: Solve (total)");
459 AMGX_SAFE_CALL(AMGX_solver_destroy(Solver_));
460 AMGX_SAFE_CALL(AMGX_vector_destroy(X_));
461 AMGX_SAFE_CALL(AMGX_vector_destroy(Y_));
462 AMGX_SAFE_CALL(AMGX_matrix_destroy(A_));
463 AMGX_SAFE_CALL(AMGX_resources_destroy(Resources_));
464 AMGX_SAFE_CALL(AMGX_config_destroy(Config_));
480 SC alpha = Teuchos::ScalarTraits<SC>::one(),
SC beta = Teuchos::ScalarTraits<SC>::zero())
const;
490 return ( entry.isList() ? std::string(
"...") :
toString(entry.getAny()) );
495 AMGX_matrix_get_size(A_, &n, &sizeX, &sizeY);
501 AMGX_solver_get_iterations_number(Solver_, &it);
506 AMGX_SOLVE_STATUS status;
507 AMGX_solver_get_status(Solver_, &status);
516 AMGX_matrix_handle
A_;
517 AMGX_vector_handle
X_;
518 AMGX_vector_handle
Y_;
533 #endif //HAVE_MUELU_AMGX 534 #endif // MUELU_AMGXOPERATOR_DECL_HPP RCP< MueLu::Hierarchy< SC, LO, GO, NO > > GetHierarchy() const
RCP< const Map > domainMap_
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
virtual ~AMGXOperator()
Destructor.
MueLu::DefaultLocalOrdinal LocalOrdinal
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
AMGXOperator(const Teuchos::RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > &inA, Teuchos::ParameterList ¶mListIn)
Tpetra::Map< LO, GO, NO > Map
AMGX_config_handle Config_
Teuchos::RCP< const Map > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
void printMaps(Teuchos::RCP< const Teuchos::Comm< int > > &comm, const std::vector< std::vector< int > > &vec, const std::vector< int > &perm, const int *nbrs, const Map &map, const std::string &label)
AMGX_solver_handle Solver_
Namespace for MueLu classes and methods.
std::vector< int > muelu2amgx_
virtual ~AMGXOperator()
Destructor.
Tpetra::Map< LO, GO, NO > Map
RCP< const Map > rangeMap_
RCP< Teuchos::Time > solverTimer_
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
RCP< MueLu::Hierarchy< SC, LO, GO, NO > > GetHierarchy() const
Teuchos::RCP< const Map > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
std::string filterValueToString(const Teuchos::ParameterEntry &entry)
AMGXOperator(const Teuchos::RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > &InA, Teuchos::ParameterList ¶mListIn)
Constructor.
AMGX_resources_handle Resources_
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Returns a solution for the linear system AX=Y in the Tpetra::MultiVector X.
Wraps an existing MueLu::Hierarchy as a Tpetra::Operator.
Tpetra::MultiVector< SC, LO, GO, NO > MultiVector
Exception throws to report errors in the internal logical of the program.
Tpetra::MultiVector< SC, LO, GO, NO > MultiVector
RCP< Teuchos::Time > vectorTimer1_
RCP< Teuchos::Time > vectorTimer2_
AMGX_SOLVE_STATUS getStatus()
Adapter for AmgX library from Nvidia.