46 #ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ 47 #define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ 49 #include <Xpetra_Map.hpp> 50 #include <Xpetra_Vector.hpp> 51 #include <Xpetra_MultiVectorFactory.hpp> 52 #include <Xpetra_MapFactory.hpp> 53 #include <Xpetra_VectorFactory.hpp> 55 #include "KokkosKernels_Handle.hpp" 56 #include "KokkosSparse_spgemm.hpp" 57 #include "KokkosSparse_spmv.hpp" 61 #include "MueLu_Aggregates.hpp" 67 #include "MueLu_Utilities.hpp" 69 #if defined(HAVE_MUELU_KOKKOS_REFACTOR) 70 #include "MueLu_Utilities_kokkos.hpp" 75 namespace NotayUtils {
76 template <
class LocalOrdinal>
78 return min + as<LocalOrdinal>((max-min+1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
81 template <
class LocalOrdinal>
84 LO n = Teuchos::as<LO>(list.size());
85 for(LO i = 0; i < n-1; i++)
90 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
92 RCP<ParameterList> validParamList = rcp(
new ParameterList());
94 typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
96 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) 102 #undef SET_VALID_ENTRY 105 validParamList->set< RCP<const FactoryBase> >(
"A", null,
"Generating factory of the matrix");
106 validParamList->set< RCP<const FactoryBase> >(
"Graph", null,
"Generating factory of the graph");
107 validParamList->set< RCP<const FactoryBase> >(
"DofsPerNode", null,
"Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
108 validParamList->set< RCP<const FactoryBase> >(
"AggregateQualities", null,
"Generating factory for variable \'AggregateQualities\'");
111 return validParamList;
114 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
116 const ParameterList& pL = GetParameterList();
118 Input(currentLevel,
"A");
119 Input(currentLevel,
"Graph");
120 Input(currentLevel,
"DofsPerNode");
121 if (pL.get<
bool>(
"aggregation: compute aggregate qualities")) {
122 Input(currentLevel,
"AggregateQualities");
129 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
132 using STS = Teuchos::ScalarTraits<Scalar>;
133 using MT =
typename STS::magnitudeType;
135 const MT MT_TWO = Teuchos::ScalarTraits<MT>::one() + Teuchos::ScalarTraits<MT>::one();
137 RCP<Teuchos::FancyOStream> out;
138 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
139 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
140 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
142 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
145 const ParameterList& pL = GetParameterList();
147 const MT kappa =
static_cast<MT
>(pL.get<
double>(
"aggregation: Dirichlet threshold"));
148 TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO,
150 "Pairwise requires kappa > 2" 151 " otherwise all rows are considered as Dirichlet rows.");
155 if (pL.isParameter(
"aggregation: pairwise: size"))
156 maxNumIter = pL.get<
int>(
"aggregation: pairwise: size");
157 TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1,
159 "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\"" 160 " must be a strictly positive integer");
163 RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel,
"Graph");
164 RCP<const Matrix> A = Get< RCP<Matrix> >(currentLevel,
"A");
168 RCP<Aggregates> aggregates = rcp(
new Aggregates(*graph));
169 aggregates->setObjectLabel(
"PW");
171 const LO numRows = graph->GetNodeNumVertices();
174 std::vector<unsigned> aggStat(numRows,
READY);
177 const int DofsPerNode = Get<int>(currentLevel,
"DofsPerNode");
179 "Pairwise only supports one dof per node");
186 std::string orderingStr = pL.get<std::string>(
"aggregation: ordering");
193 ordering = O_NATURAL;
194 if (orderingStr ==
"random" ) ordering = O_RANDOM;
195 else if(orderingStr ==
"natural") {}
196 #if defined(HAVE_MUELU_KOKKOS_REFACTOR) 197 else if(orderingStr ==
"cuthill-mckee" || orderingStr ==
"cm") ordering = O_CUTHILL_MCKEE;
207 Array<LO> orderingVector(numRows);
208 for (LO i = 0; i < numRows; i++)
209 orderingVector[i] = i;
210 if (ordering == O_RANDOM)
212 #if defined(HAVE_MUELU_KOKKOS_REFACTOR) 213 else if (ordering == O_CUTHILL_MCKEE) {
215 auto localVector = rcmVector->getData(0);
216 for (LO i = 0; i < numRows; i++)
217 orderingVector[i] = localVector[i];
222 LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
223 BuildInitialAggregates(pL, A, orderingVector(), kappa,
224 *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
226 "Initial pairwise aggregation failed to aggregate all nodes");
227 LO numLocalAggregates = aggregates->GetNumAggregates();
228 GetOStream(
Statistics0) <<
"Init : " << numLocalAggregates <<
" - " 229 << A->getNodeNumRows() / numLocalAggregates << std::endl;
238 LO numLocalDirichletNodes = numDirichletNodes;
239 Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
240 BuildOnRankLocalMatrix(A->getLocalMatrix(), coarseLocalA);
241 for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
243 BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
244 localVertex2AggId(), intermediateP);
247 BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
252 std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
253 auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
254 for(LO i=0; i<(LO)numRows; i++) {
257 LO agg=vertex2AggId[i];
258 agg2vertex[agg].push_back(i);
261 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
262 for(LO i = 0; i < numRows; i++) {
266 int agg = vertex2AggId[i];
267 std::vector<LO> & myagg = agg2vertex[agg];
269 size_t nnz = A->getNumEntriesInLocalRow(i);
270 ArrayView<const LO> indices;
271 ArrayView<const SC> vals;
272 A->getLocalRowView(i, indices, vals);
274 SC mysum = Teuchos::ScalarTraits<Scalar>::zero();
275 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
277 if(indices[colidx] < numRows) {
278 for(LO j=0; j<(LO)myagg.size(); j++)
279 if (vertex2AggId[indices[colidx]] == agg)
283 *out <<
"- ADDING col "<<indices[colidx]<<
" = "<<vals[colidx] << std::endl;
284 mysum += vals[colidx];
287 *out <<
"- NOT ADDING col "<<indices[colidx]<<
" = "<<vals[colidx] << std::endl;
291 rowSum_h[agg] = mysum;
297 Array<LO> localOrderingVector(numRows);
298 for (LO i = 0; i < numRows; i++)
299 localOrderingVector[i] = i;
300 if (ordering == O_RANDOM)
302 #if defined(HAVE_MUELU_KOKKOS_REFACTOR) 303 else if (ordering == O_CUTHILL_MCKEE) {
305 auto localVector = rcmVector->getData(0);
306 for (LO i = 0; i < numRows; i++)
307 localOrderingVector[i] = localVector[i];
312 numLocalAggregates = 0;
313 numNonAggregatedNodes =
static_cast<LO
>(coarseLocalA.numRows());
314 std::vector<LO> localAggStat(numNonAggregatedNodes,
READY);
315 localVertex2AggId.resize(numNonAggregatedNodes, -1);
316 BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
317 localAggStat, localVertex2AggId,
318 numLocalAggregates, numNonAggregatedNodes);
322 numLocalDirichletNodes = 0;
325 RCP<LOMultiVector> vertex2AggIdMV = aggregates->GetVertex2AggId();
326 ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
327 for(
size_t vertexIdx = 0; vertexIdx < A->getNodeNumRows(); ++vertexIdx) {
328 LO oldAggIdx = vertex2AggId[vertexIdx];
329 if(oldAggIdx != Teuchos::OrdinalTraits<LO>::invalid()) {
330 vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
335 GetOStream(
Statistics0) <<
"Iter " << aggregationIter <<
": " << numLocalAggregates <<
" - " 336 << A->getNodeNumRows() / numLocalAggregates << std::endl;
338 aggregates->SetNumAggregates(numLocalAggregates);
339 aggregates->AggregatesCrossProcessors(
false);
340 aggregates->ComputeAggregateSizes(
true);
343 Set(currentLevel,
"Aggregates", aggregates);
344 GetOStream(
Statistics0) << aggregates->description() << std::endl;
348 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
351 const RCP<const Matrix>& A,
352 const Teuchos::ArrayView<const LO> & orderingVector,
353 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
355 std::vector<unsigned>& aggStat,
356 LO& numNonAggregatedNodes,
357 LO& numDirichletNodes)
const {
359 Monitor m(*
this,
"BuildInitialAggregates");
360 using STS = Teuchos::ScalarTraits<Scalar>;
361 using MT =
typename STS::magnitudeType;
362 using RealValuedVector = Xpetra::Vector<MT,LocalOrdinal,GlobalOrdinal,Node>;
364 RCP<Teuchos::FancyOStream> out;
365 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
366 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
367 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
369 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
373 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
374 const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
375 const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
376 const MT MT_TWO = MT_ONE + MT_ONE;
377 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
378 const LO LO_ZERO = Teuchos::OrdinalTraits<LO>::zero();
380 const MT kappa_init = kappa / (kappa - MT_TWO);
381 const LO numRows = aggStat.size();
382 const int myRank = A->getMap()->getComm()->getRank();
386 double tie_criterion = params.get<
double>(
"aggregation: pairwise: tie threshold");
387 double tie_less = 1.0 - tie_criterion;
388 double tie_more = 1.0 + tie_criterion;
399 const ArrayRCP<const SC> D = ghostedDiag->getData(0);
400 const ArrayRCP<const SC> S = ghostedRowSum->getData(0);
401 const ArrayRCP<const MT> AbsRs = ghostedAbsRowSum->getData(0);
404 ArrayRCP<LO> vertex2AggId_rcp = aggregates.
GetVertex2AggId()->getDataNonConst(0);
405 ArrayRCP<LO> procWinner_rcp = aggregates.
GetProcWinner() ->getDataNonConst(0);
406 ArrayView<LO> vertex2AggId = vertex2AggId_rcp();
407 ArrayView<LO> procWinner = procWinner_rcp();
413 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getNodeNumElements()); ++row) {
414 MT aii = STS::magnitude(D[row]);
415 MT rowsum = AbsRs[row];
417 if(aii >= kappa_init * rowsum) {
418 *out <<
"Flagging index " << row <<
" as dirichlet " 419 "aii >= kappa*rowsum = " << aii <<
" >= " << kappa_init <<
" " << rowsum << std::endl;
421 --numNonAggregatedNodes;
428 LO aggIndex = LO_ZERO;
429 for(LO i = 0; i < numRows; i++) {
430 LO current_idx = orderingVector[i];
432 if(aggStat[current_idx] !=
READY)
435 MT best_mu = MT_ZERO;
436 LO best_idx = LO_INVALID;
438 size_t nnz = A->getNumEntriesInLocalRow(current_idx);
439 ArrayView<const LO> indices;
440 ArrayView<const SC> vals;
441 A->getLocalRowView(current_idx, indices, vals);
443 MT aii = STS::real(D[current_idx]);
444 MT si = STS::real(S[current_idx]);
445 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
447 LO col = indices[colidx];
448 SC val = vals[colidx];
449 if(current_idx == col || aggStat[col] !=
READY || col > numRows || val == SC_ZERO)
452 MT aij = STS::real(val);
453 MT ajj = STS::real(D[col]);
454 MT sj = - STS::real(S[col]);
455 if(aii - si + ajj - sj >= MT_ZERO) {
457 MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj);
458 MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) );
459 MT mu = mu_top / mu_bottom;
462 if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
463 (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) {
466 *out <<
"[" << current_idx <<
"] Column UPDATED " << col <<
": " 467 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
468 <<
" = " << aii - si + ajj - sj<<
", aij = "<<aij <<
", mu = " << mu << std::endl;
471 *out <<
"[" << current_idx <<
"] Column NOT UPDATED " << col <<
": " 472 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
473 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
", mu = " << mu << std::endl;
477 *out <<
"[" << current_idx <<
"] Column FAILED " << col <<
": " 478 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
479 <<
" = " << aii - si + ajj - sj << std::endl;
483 if(best_idx == LO_INVALID) {
484 *out <<
"No node buddy found for index " << current_idx
485 <<
" [agg " << aggIndex <<
"]\n" << std::endl;
490 aggStat[current_idx] =
ONEPT;
491 vertex2AggId[current_idx] = aggIndex;
492 procWinner[current_idx] = myRank;
493 numNonAggregatedNodes--;
498 if(best_mu <= kappa) {
499 *out <<
"Node buddies (" << current_idx <<
"," << best_idx <<
") [agg " << aggIndex <<
"]" << std::endl;
503 vertex2AggId[current_idx] = aggIndex;
504 vertex2AggId[best_idx] = aggIndex;
505 procWinner[current_idx] = myRank;
506 procWinner[best_idx] = myRank;
507 numNonAggregatedNodes-=2;
511 *out <<
"No buddy found for index " << current_idx <<
"," 512 " but aggregating as singleton [agg " << aggIndex <<
"]" << std::endl;
514 aggStat[current_idx] =
ONEPT;
515 vertex2AggId[current_idx] = aggIndex;
516 procWinner[current_idx] = myRank;
517 numNonAggregatedNodes--;
523 *out <<
"vertex2aggid :";
524 for(
int i = 0; i < static_cast<int>(vertex2AggId.size()); ++i) {
525 *out << i <<
"(" << vertex2AggId[i] <<
")";
533 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
536 const RCP<const Matrix>& A,
537 const Teuchos::ArrayView<const LO> & orderingVector,
538 const typename Matrix::local_matrix_type& coarseA,
539 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
540 const Kokkos::View<
typename Kokkos::ArithTraits<Scalar>::val_type*,
542 typename Matrix::local_matrix_type::device_type>& rowSum,
543 std::vector<LocalOrdinal>& localAggStat,
544 Teuchos::Array<LocalOrdinal>& localVertex2AggID,
545 LO& numLocalAggregates,
546 LO& numNonAggregatedNodes)
const {
547 Monitor m(*
this,
"BuildFurtherAggregates");
550 RCP<Teuchos::FancyOStream> out;
551 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
552 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
553 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
555 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
558 using value_type =
typename local_matrix_type::value_type;
559 const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
560 const magnitude_type MT_zero = Teuchos::ScalarTraits<magnitude_type>::zero();
561 const magnitude_type MT_one = Teuchos::ScalarTraits<magnitude_type>::one();
563 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid() ;
567 double tie_criterion = params.get<
double>(
"aggregation: pairwise: tie threshold");
568 double tie_less = 1.0 - tie_criterion;
569 double tie_more = 1.0 + tie_criterion;
571 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
577 const LO numRows =
static_cast<LO
>(coarseA.numRows());
578 typename local_matrix_type::values_type::HostMirror diagA_h(
"diagA host", numRows);
579 typename local_matrix_type::row_map_type::HostMirror row_map_h
580 = Kokkos::create_mirror_view(coarseA.graph.row_map);
582 typename local_matrix_type::index_type::HostMirror entries_h
583 = Kokkos::create_mirror_view(coarseA.graph.entries);
585 typename local_matrix_type::values_type::HostMirror values_h
586 = Kokkos::create_mirror_view(coarseA.values);
588 for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
589 for(LO entryIdx = static_cast<LO>(row_map_h(rowIdx));
590 entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
592 if(rowIdx == static_cast<LO>(entries_h(entryIdx))) {
593 diagA_h(rowIdx) = values_h(entryIdx);
598 for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
599 if(localAggStat[currentIdx] !=
READY) {
603 LO bestIdx = Teuchos::OrdinalTraits<LO>::invalid();
604 magnitude_type best_mu = Teuchos::ScalarTraits<magnitude_type>::zero();
605 const magnitude_type aii = Teuchos::ScalarTraits<value_type>::real(diagA_h(currentIdx));
606 const magnitude_type si = Teuchos::ScalarTraits<value_type>::real(rowSum_h(currentIdx));
607 for(
auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
608 const LO colIdx =
static_cast<LO
>(entries_h(entryIdx));
609 if(currentIdx == colIdx || localAggStat[colIdx] !=
READY || values_h(entryIdx) == KAT_zero || colIdx > numRows) {
613 const magnitude_type aij = Teuchos::ScalarTraits<value_type>::real(values_h(entryIdx));
614 const magnitude_type ajj = Teuchos::ScalarTraits<value_type>::real(diagA_h(colIdx));
615 const magnitude_type sj = - Teuchos::ScalarTraits<value_type>::real(rowSum_h(colIdx));
616 if(aii - si + ajj - sj >= MT_zero) {
617 const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj );
618 const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
622 if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
623 (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
626 *out <<
"[" << currentIdx <<
"] Column UPDATED " << colIdx <<
": " 627 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
628 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
" mu = " << mu << std::endl;
631 *out <<
"[" << currentIdx <<
"] Column NOT UPDATED " << colIdx <<
": " 632 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
633 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
", mu = " << mu << std::endl;
636 *out <<
"[" << currentIdx <<
"] Column FAILED " << colIdx <<
": " 637 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
638 <<
" = " << aii - si + ajj - sj << std::endl;
642 if(bestIdx == Teuchos::OrdinalTraits<LO>::invalid()) {
643 localAggStat[currentIdx] =
ONEPT;
644 localVertex2AggID[currentIdx] = numLocalAggregates;
645 --numNonAggregatedNodes;
646 ++numLocalAggregates;
648 if(best_mu <= kappa) {
649 *out <<
"Node buddies (" << currentIdx <<
"," << bestIdx <<
") [agg " << numLocalAggregates <<
"]" << std::endl;
652 localVertex2AggID[currentIdx] = numLocalAggregates;
653 --numNonAggregatedNodes;
656 localVertex2AggID[bestIdx] = numLocalAggregates;
657 --numNonAggregatedNodes;
659 ++numLocalAggregates;
661 *out <<
"No buddy found for index " << currentIdx <<
"," 662 " but aggregating as singleton [agg " << numLocalAggregates <<
"]" << std::endl;
664 localAggStat[currentIdx] =
ONEPT;
665 localVertex2AggID[currentIdx] = numLocalAggregates;
666 --numNonAggregatedNodes;
667 ++numLocalAggregates;
674 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
677 typename Matrix::local_matrix_type& onrankA)
const {
678 Monitor m(*
this,
"BuildOnRankLocalMatrix");
681 RCP<Teuchos::FancyOStream> out;
682 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
683 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
684 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
686 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
689 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
690 using values_type =
typename local_matrix_type::values_type;
691 using size_type =
typename local_graph_type::size_type;
692 using col_index_type =
typename local_graph_type::data_type;
693 using array_layout =
typename local_graph_type::array_layout;
694 using memory_traits =
typename local_graph_type::memory_traits;
701 const int numRows =
static_cast<int>(localA.numRows());
702 row_pointer_type rowPtr(
"onrankA row pointer", numRows + 1);
703 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
704 typename local_graph_type::row_map_type::HostMirror origRowPtr_h
705 = Kokkos::create_mirror_view(localA.graph.row_map);
706 typename local_graph_type::entries_type::HostMirror origColind_h
707 = Kokkos::create_mirror_view(localA.graph.entries);
708 typename values_type::HostMirror origValues_h
709 = Kokkos::create_mirror_view(localA.values);
716 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
717 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
718 if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;}
720 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
724 const LO nnzOnrankA = rowPtr_h(numRows);
727 col_indices_type colInd(
"onrankA column indices", rowPtr_h(numRows));
728 values_type values(
"onrankA values", rowPtr_h(numRows));
729 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
730 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
732 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
734 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
735 if(origColind_h(entryIdx) < numRows) {
736 colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
737 values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
746 nnzOnrankA, values, rowPtr, colInd);
749 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
754 const Teuchos::ArrayView<const LocalOrdinal>& localVertex2AggID,
755 typename Matrix::local_matrix_type& intermediateP)
const {
756 Monitor m(*
this,
"BuildIntermediateProlongator");
759 RCP<Teuchos::FancyOStream> out;
760 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
761 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
762 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
764 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
767 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
768 using values_type =
typename local_matrix_type::values_type;
769 using size_type =
typename local_graph_type::size_type;
770 using col_index_type =
typename local_graph_type::data_type;
771 using array_layout =
typename local_graph_type::array_layout;
772 using memory_traits =
typename local_graph_type::memory_traits;
776 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
778 const int intermediatePnnz = numRows - numDirichletNodes;
779 row_pointer_type rowPtr(
"intermediateP row pointer", numRows + 1);
780 col_indices_type colInd(
"intermediateP column indices", intermediatePnnz);
781 values_type values(
"intermediateP values", intermediatePnnz);
782 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
783 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
786 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
788 if(localVertex2AggID[rowIdx] == LO_INVALID) {
789 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
791 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
792 colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
798 Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
801 numRows, numLocalAggregates, intermediatePnnz,
802 values, rowPtr, colInd);
805 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
808 typename Matrix::local_matrix_type& coarseA)
const {
809 Monitor m(*
this,
"BuildCoarseLocalMatrix");
811 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
812 using values_type =
typename local_matrix_type::values_type;
813 using size_type =
typename local_graph_type::size_type;
814 using col_index_type =
typename local_graph_type::data_type;
815 using array_layout =
typename local_graph_type::array_layout;
816 using memory_traits =
typename local_graph_type::memory_traits;
821 localSpGEMM(coarseA, intermediateP,
"AP", AP);
834 row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt row pointer"),
835 intermediateP.numCols() + 1);
836 col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt column indices"),
837 intermediateP.nnz());
838 values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt values"),
839 intermediateP.nnz());
841 typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
843 for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
844 rowPtrPt_h(intermediateP.graph.entries(entryIdx) + 1) += 1;
846 for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
847 rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
851 typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
853 typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
855 typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
857 typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
858 typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
859 const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
862 col_index_type colIdx = 0;
863 for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
864 for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
865 colIdx = intermediateP.graph.entries(entryIdxP);
866 for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
867 if(colIndPt_h(entryIdxPt) == invalidColumnIndex) {
868 colIndPt_h(entryIdxPt) = rowIdx;
869 valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
880 intermediateP.numCols(),
881 intermediateP.numRows(),
883 valuesPt, rowPtrPt, colIndPt);
886 localSpGEMM(intermediatePt, AP,
"coarseA", coarseA);
889 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
892 const typename Matrix::local_matrix_type& B,
893 const std::string matrixLabel,
894 typename Matrix::local_matrix_type& C)
const {
896 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
897 using values_type =
typename local_matrix_type::values_type;
898 using size_type =
typename local_graph_type::size_type;
899 using col_index_type =
typename local_graph_type::data_type;
900 using array_layout =
typename local_graph_type::array_layout;
901 using memory_space =
typename device_type::memory_space;
902 using memory_traits =
typename local_graph_type::memory_traits;
907 int team_work_size = 16;
908 std::string myalg(
"SPGEMM_KK_MEMORY");
909 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
910 KokkosKernels::Experimental::KokkosKernelsHandle<
typename row_pointer_type::const_value_type,
911 typename col_indices_type::const_value_type,
912 typename values_type::const_value_type,
916 kh.create_spgemm_handle(alg_enum);
917 kh.set_team_work_size(team_work_size);
920 row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing(
"C row pointer"),
922 col_indices_type colIndC;
926 KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
927 B.numRows(), B.numCols(),
928 A.graph.row_map, A.graph.entries,
false,
929 B.graph.row_map, B.graph.entries,
false,
933 size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
935 colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing(
"C column inds"), nnzC);
936 valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing(
"C values"), nnzC);
940 KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
941 B.numRows(), B.numCols(),
942 A.graph.row_map, A.graph.entries, A.values,
false,
943 B.graph.row_map, B.graph.entries, B.values,
false,
944 rowPtrC, colIndC, valuesC);
945 kh.destroy_spgemm_handle();
947 C =
local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, const std::string matrixLabel, local_matrix_type &C) const
Wrapper for kokkos-kernels' spgemm that takes in CrsMatrix.
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
void BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, const LO numLocalAggregates, const ArrayView< const LO > &localVertex2AggID, local_matrix_type &intermediateP) const
Construction of a local prolongator with values equal to 1.0.
MueLu::DefaultLocalOrdinal LocalOrdinal
Container class for aggregation information.
typename Matrix::local_matrix_type local_matrix_type
typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type
Timer to be used in factories. Similar to Monitor but with additional timers.
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
Teuchos::RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op)
Namespace for MueLu classes and methods.
void BuildOnRankLocalMatrix(const local_matrix_type &localA, local_matrix_type &onRankA) const
Print statistics that do not involve significant additional computation.
void Build(Level ¤tLevel) const
Build aggregates.
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void BuildInitialAggregates(const Teuchos::ParameterList ¶ms, const RCP< const Matrix > &A, const ArrayView< const LO > &orderingVector, const magnitude_type kappa, Aggregates &aggregates, std::vector< unsigned > &aggStat, LO &numNonAggregatedNodes, LO &numDirichletNodes) const
Initial aggregation phase.
Class that holds all level-specific information.
typename device_type::execution_space execution_space
void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, local_matrix_type &coarseA) const
Implementation of a local Galerkin projection called inside BuildFurtherAggregates.
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Timer to be used in non-factories.
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
#define SET_VALID_ENTRY(name)
void DeclareInput(Level ¤tLevel) const
Input.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
Exception throws to report errors in the internal logical of the program.
void deep_copy(const View< DT, DP... > &dst, typename ViewTraits< DT, DP... >::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP... >::specialize, void >::value >::type *=nullptr)
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
typename Kokkos::View< impl_scalar_type *, Kokkos::LayoutLeft, device_type > row_sum_type
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.
void BuildFurtherAggregates(const Teuchos::ParameterList ¶ms, const RCP< const Matrix > &A, const Teuchos::ArrayView< const LO > &orderingVector, const local_matrix_type &coarseA, const magnitude_type kappa, const row_sum_type &rowSum, std::vector< LO > &localAggStat, Array< LO > &localVertex2AggID, LO &numLocalAggregates, LO &numNonAggregatedNodes) const
Further aggregation phase increases coarsening rate by a factor of ~2 per iteration.