43 #ifndef IFPACK2_RELAXATION_DEF_HPP 44 #define IFPACK2_RELAXATION_DEF_HPP 46 #include "Teuchos_StandardParameterEntryValidators.hpp" 47 #include "Teuchos_TimeMonitor.hpp" 48 #include "Tpetra_CrsMatrix.hpp" 49 #include "Tpetra_Experimental_BlockCrsMatrix.hpp" 51 #include "Ifpack2_Relaxation_decl.hpp" 52 #include "MatrixMarket_Tpetra.hpp" 55 #include "KokkosSparse_gauss_seidel.hpp" 66 class NonnegativeIntValidator :
public Teuchos::ParameterEntryValidator {
69 NonnegativeIntValidator () {}
72 Teuchos::ParameterEntryValidator::ValidStringsList validStringValues ()
const {
78 validate (
const Teuchos::ParameterEntry& entry,
79 const std::string& paramName,
80 const std::string& sublistName)
const 83 Teuchos::any anyVal = entry.getAny (
true);
84 const std::string entryName = entry.getAny (
false).typeName ();
86 TEUCHOS_TEST_FOR_EXCEPTION(
87 anyVal.type () !=
typeid (int),
88 Teuchos::Exceptions::InvalidParameterType,
89 "Parameter \"" << paramName <<
"\" in sublist \"" << sublistName
90 <<
"\" has the wrong type." << endl <<
"Parameter: " << paramName
91 << endl <<
"Type specified: " << entryName << endl
92 <<
"Type required: int" << endl);
94 const int val = Teuchos::any_cast<
int> (anyVal);
95 TEUCHOS_TEST_FOR_EXCEPTION(
96 val < 0, Teuchos::Exceptions::InvalidParameterValue,
97 "Parameter \"" << paramName <<
"\" in sublist \"" << sublistName
98 <<
"\" is negative." << endl <<
"Parameter: " << paramName
99 << endl <<
"Value specified: " << val << endl
100 <<
"Required range: [0, INT_MAX]" << endl);
104 const std::string getXMLTypeName ()
const {
105 return "NonnegativeIntValidator";
110 printDoc (
const std::string& docString,
111 std::ostream &out)
const 113 Teuchos::StrUtils::printLines (out,
"# ", docString);
114 out <<
"#\tValidator Used: " << std::endl;
115 out <<
"#\t\tNonnegativeIntValidator" << std::endl;
122 template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
126 static const Scalar eps ();
130 template<
class Scalar>
131 class SmallTraits<Scalar, true> {
133 static const Scalar eps () {
134 return Teuchos::ScalarTraits<Scalar>::one ();
139 template<
class Scalar>
140 class SmallTraits<Scalar, false> {
142 static const Scalar eps () {
143 return Teuchos::ScalarTraits<Scalar>::eps ();
153 template<
class MatrixType>
154 void Relaxation<MatrixType>::updateCachedMultiVector(
const Teuchos::RCP<
const Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> > & map,
size_t numVecs)
const{
157 if(cachedMV_.is_null() || &*map != &*cachedMV_->getMap() || cachedMV_->getNumVectors() !=numVecs)
158 cachedMV_ = Teuchos::rcp(
new Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>(map, numVecs,
false));
162 template<
class MatrixType>
164 setMatrix (
const Teuchos::RCP<const row_matrix_type>& A)
166 if (A.getRawPtr () != A_.getRawPtr ()) {
167 Importer_ = Teuchos::null;
168 Diagonal_ = Teuchos::null;
169 isInitialized_ =
false;
171 diagOffsets_ = Kokkos::View<size_t*, typename node_type::device_type> ();
172 savedDiagOffsets_ =
false;
173 hasBlockCrsMatrix_ =
false;
174 if (! A.is_null ()) {
175 IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
182 template<
class MatrixType>
188 DampingFactor_ (STS::one ()),
189 IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
191 A->getRowMap ()->getComm ()->getSize () > 1),
192 ZeroStartingSolution_ (true),
193 DoBackwardGS_ (false),
196 MinDiagonalValue_ (STS::zero ()),
197 fixTinyDiagEntries_ (false),
198 checkDiagEntries_ (false),
199 is_matrix_structurally_symmetric_ (false),
200 ifpack2_dump_matrix_(false),
201 isInitialized_ (false),
206 InitializeTime_ (0.0),
211 globalMinMagDiagEntryMag_ (STM::zero ()),
212 globalMaxMagDiagEntryMag_ (STM::zero ()),
213 globalNumSmallDiagEntries_ (0),
214 globalNumZeroDiagEntries_ (0),
215 globalNumNegDiagEntries_ (0),
217 savedDiagOffsets_ (false),
218 hasBlockCrsMatrix_ (false)
220 this->setObjectLabel (
"Ifpack2::Relaxation");
224 template<
class MatrixType>
228 template<
class MatrixType>
229 Teuchos::RCP<const Teuchos::ParameterList>
232 using Teuchos::Array;
233 using Teuchos::ParameterList;
234 using Teuchos::parameterList;
237 using Teuchos::rcp_const_cast;
238 using Teuchos::rcp_implicit_cast;
239 using Teuchos::setStringToIntegralParameter;
240 typedef Teuchos::ParameterEntryValidator PEV;
242 if (validParams_.is_null ()) {
243 RCP<ParameterList> pl = parameterList (
"Ifpack2::Relaxation");
247 Array<std::string> precTypes (5);
248 precTypes[0] =
"Jacobi";
249 precTypes[1] =
"Gauss-Seidel";
250 precTypes[2] =
"Symmetric Gauss-Seidel";
251 precTypes[3] =
"MT Gauss-Seidel";
252 precTypes[4] =
"MT Symmetric Gauss-Seidel";
253 Array<Details::RelaxationType> precTypeEnums (5);
254 precTypeEnums[0] = Details::JACOBI;
255 precTypeEnums[1] = Details::GS;
256 precTypeEnums[2] = Details::SGS;
257 precTypeEnums[3] = Details::MTGS;
258 precTypeEnums[4] = Details::MTSGS;
259 const std::string defaultPrecType (
"Jacobi");
260 setStringToIntegralParameter<Details::RelaxationType> (
"relaxation: type",
261 defaultPrecType,
"Relaxation method", precTypes (), precTypeEnums (),
264 const int numSweeps = 1;
265 RCP<PEV> numSweepsValidator =
266 rcp_implicit_cast<PEV> (rcp (
new NonnegativeIntValidator));
267 pl->set (
"relaxation: sweeps", numSweeps,
"Number of relaxation sweeps",
268 rcp_const_cast<const PEV> (numSweepsValidator));
271 pl->set (
"relaxation: damping factor", dampingFactor);
273 const bool zeroStartingSolution =
true;
274 pl->set (
"relaxation: zero starting solution", zeroStartingSolution);
276 const bool doBackwardGS =
false;
277 pl->set (
"relaxation: backward mode", doBackwardGS);
279 const bool doL1Method =
false;
280 pl->set (
"relaxation: use l1", doL1Method);
282 const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
283 (STM::one() + STM::one());
284 pl->set (
"relaxation: l1 eta", l1eta);
287 pl->set (
"relaxation: min diagonal value", minDiagonalValue);
289 const bool fixTinyDiagEntries =
false;
290 pl->set (
"relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
292 const bool checkDiagEntries =
false;
293 pl->set (
"relaxation: check diagonal entries", checkDiagEntries);
295 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
296 pl->set(
"relaxation: local smoothing indices", localSmoothingIndices);
298 const bool is_matrix_structurally_symmetric =
false;
299 pl->set(
"relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
301 const bool ifpack2_dump_matrix =
false;
302 pl->set(
"relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
304 validParams_ = rcp_const_cast<
const ParameterList> (pl);
310 template<
class MatrixType>
313 using Teuchos::getIntegralValue;
314 using Teuchos::ParameterList;
316 typedef scalar_type ST;
318 if (pl.isType<
double>(
"relaxation: damping factor")) {
321 ST df = pl.get<
double>(
"relaxation: damping factor");
322 pl.remove(
"relaxation: damping factor");
323 pl.set(
"relaxation: damping factor",df);
328 const Details::RelaxationType precType =
329 getIntegralValue<Details::RelaxationType> (pl,
"relaxation: type");
330 const int numSweeps = pl.get<
int> (
"relaxation: sweeps");
331 const ST dampingFactor = pl.get<ST> (
"relaxation: damping factor");
332 const bool zeroStartSol = pl.get<
bool> (
"relaxation: zero starting solution");
333 const bool doBackwardGS = pl.get<
bool> (
"relaxation: backward mode");
334 const bool doL1Method = pl.get<
bool> (
"relaxation: use l1");
335 const magnitude_type l1Eta = pl.get<magnitude_type> (
"relaxation: l1 eta");
336 const ST minDiagonalValue = pl.get<ST> (
"relaxation: min diagonal value");
337 const bool fixTinyDiagEntries = pl.get<
bool> (
"relaxation: fix tiny diagonal entries");
338 const bool checkDiagEntries = pl.get<
bool> (
"relaxation: check diagonal entries");
339 const bool is_matrix_structurally_symmetric = pl.get<
bool> (
"relaxation: symmetric matrix structure");
340 const bool ifpack2_dump_matrix = pl.get<
bool> (
"relaxation: ifpack2 dump matrix");
342 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >(
"relaxation: local smoothing indices");
346 PrecType_ = precType;
347 NumSweeps_ = numSweeps;
348 DampingFactor_ = dampingFactor;
349 ZeroStartingSolution_ = zeroStartSol;
350 DoBackwardGS_ = doBackwardGS;
351 DoL1Method_ = doL1Method;
353 MinDiagonalValue_ = minDiagonalValue;
354 fixTinyDiagEntries_ = fixTinyDiagEntries;
355 checkDiagEntries_ = checkDiagEntries;
356 is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
357 ifpack2_dump_matrix_ = ifpack2_dump_matrix;
358 localSmoothingIndices_ = localSmoothingIndices;
362 template<
class MatrixType>
367 this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
371 template<
class MatrixType>
372 Teuchos::RCP<const Teuchos::Comm<int> >
374 TEUCHOS_TEST_FOR_EXCEPTION(
375 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getComm: " 376 "The input matrix A is null. Please call setMatrix() with a nonnull " 377 "input matrix before calling this method.");
378 return A_->getRowMap ()->getComm ();
382 template<
class MatrixType>
383 Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
389 template<
class MatrixType>
390 Teuchos::RCP<
const Tpetra::Map<
typename MatrixType::local_ordinal_type,
391 typename MatrixType::global_ordinal_type,
392 typename MatrixType::node_type> >
394 TEUCHOS_TEST_FOR_EXCEPTION(
395 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getDomainMap: " 396 "The input matrix A is null. Please call setMatrix() with a nonnull " 397 "input matrix before calling this method.");
398 return A_->getDomainMap ();
402 template<
class MatrixType>
403 Teuchos::RCP<
const Tpetra::Map<
typename MatrixType::local_ordinal_type,
404 typename MatrixType::global_ordinal_type,
405 typename MatrixType::node_type> >
407 TEUCHOS_TEST_FOR_EXCEPTION(
408 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getRangeMap: " 409 "The input matrix A is null. Please call setMatrix() with a nonnull " 410 "input matrix before calling this method.");
411 return A_->getRangeMap ();
415 template<
class MatrixType>
421 template<
class MatrixType>
423 return(NumInitialize_);
427 template<
class MatrixType>
433 template<
class MatrixType>
439 template<
class MatrixType>
441 return(InitializeTime_);
445 template<
class MatrixType>
447 return(ComputeTime_);
451 template<
class MatrixType>
457 template<
class MatrixType>
459 return(ComputeFlops_);
463 template<
class MatrixType>
470 template<
class MatrixType>
472 TEUCHOS_TEST_FOR_EXCEPTION(
473 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getNodeSmootherComplexity: " 474 "The input matrix A is null. Please call setMatrix() with a nonnull " 475 "input matrix, then call compute(), before calling this method.");
477 return A_->getNodeNumRows() + A_->getNodeNumEntries();
481 template<
class MatrixType>
484 apply (
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
485 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
493 using Teuchos::rcpFromRef;
496 TEUCHOS_TEST_FOR_EXCEPTION(
497 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::apply: " 498 "The input matrix A is null. Please call setMatrix() with a nonnull " 499 "input matrix, then call compute(), before calling this method.");
500 TEUCHOS_TEST_FOR_EXCEPTION(
503 "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 " 504 "preconditioner instance before you may call apply(). You may call " 505 "isComputed() to find out if compute() has been called already.");
506 TEUCHOS_TEST_FOR_EXCEPTION(
507 X.getNumVectors() != Y.getNumVectors(),
509 "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. " 510 "X has " << X.getNumVectors() <<
" columns, but Y has " 511 << Y.getNumVectors() <<
" columns.");
512 TEUCHOS_TEST_FOR_EXCEPTION(
513 beta != STS::zero (), std::logic_error,
514 "Ifpack2::Relaxation::apply: beta = " << beta <<
" != 0 case not " 517 const std::string timerName (
"Ifpack2::Relaxation::apply");
518 Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
519 if (timer.is_null ()) {
520 timer = Teuchos::TimeMonitor::getNewCounter (timerName);
524 Teuchos::TimeMonitor timeMon (*timer);
526 if (alpha == STS::zero ()) {
528 Y.putScalar (STS::zero ());
536 auto X_lcl_host = X.template getLocalView<Kokkos::HostSpace> ();
537 auto Y_lcl_host = Y.template getLocalView<Kokkos::HostSpace> ();
538 if (X_lcl_host.data () == Y_lcl_host.data ()) {
539 Xcopy = rcp (
new MV (X, Teuchos::Copy));
541 Xcopy = rcpFromRef (X);
549 case Ifpack2::Details::JACOBI:
550 ApplyInverseJacobi(*Xcopy,Y);
552 case Ifpack2::Details::GS:
553 ApplyInverseGS(*Xcopy,Y);
555 case Ifpack2::Details::SGS:
556 ApplyInverseSGS(*Xcopy,Y);
558 case Ifpack2::Details::MTSGS:
559 ApplyInverseMTSGS_CrsMatrix(*Xcopy,Y);
561 case Ifpack2::Details::MTGS:
562 ApplyInverseMTGS_CrsMatrix(*Xcopy,Y);
566 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
567 "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value " 568 << PrecType_ <<
". Please report this bug to the Ifpack2 developers.");
570 if (alpha != STS::one ()) {
572 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
573 const double numVectors = as<double> (Y.getNumVectors ());
574 ApplyFlops_ += numGlobalRows * numVectors;
578 ApplyTime_ += timer->totalElapsedTime ();
583 template<
class MatrixType>
586 applyMat (
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
587 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
588 Teuchos::ETransp mode)
const 590 TEUCHOS_TEST_FOR_EXCEPTION(
591 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::applyMat: " 592 "The input matrix A is null. Please call setMatrix() with a nonnull " 593 "input matrix, then call compute(), before calling this method.");
594 TEUCHOS_TEST_FOR_EXCEPTION(
595 ! isComputed (), std::runtime_error,
"Ifpack2::Relaxation::applyMat: " 596 "isComputed() must be true before you may call applyMat(). " 597 "Please call compute() before calling this method.");
598 TEUCHOS_TEST_FOR_EXCEPTION(
599 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
600 "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
601 <<
" != Y.getNumVectors() = " << Y.getNumVectors () <<
".");
602 A_->apply (X, Y, mode);
606 template<
class MatrixType>
609 TEUCHOS_TEST_FOR_EXCEPTION
610 (A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::initialize: " 611 "The input matrix A is null. Please call setMatrix() with a nonnull " 612 "input matrix before calling this method.");
613 const std::string timerName (
"Ifpack2::Relaxation::initialize");
614 Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
615 if (timer.is_null ()) {
616 timer = Teuchos::TimeMonitor::getNewCounter (timerName);
620 hasBlockCrsMatrix_ =
false;
623 Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
624 Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type> (A_);
625 if (A_bcrs.is_null ()) {
626 hasBlockCrsMatrix_ =
false;
629 hasBlockCrsMatrix_ =
true;
633 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
634 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (&(*A_));
635 TEUCHOS_TEST_FOR_EXCEPTION
636 (crsMat == NULL, std::logic_error,
"Ifpack2::Relaxation::initialize: " 637 "Multithreaded Gauss-Seidel methods currently only work when the input " 638 "matrix is a Tpetra::CrsMatrix.");
640 if(this->ifpack2_dump_matrix_){
641 int random_integer = rand();
642 std::stringstream ss;
643 ss << random_integer;
644 std::string str = ss.str();
645 Tpetra::MatrixMarket::Writer<crs_matrix_type> crs_writer;
646 std::string file_name = str +
"_Ifpack2_MT_GS.mtx";
647 Teuchos::RCP<const crs_matrix_type> rcp_crs_mat = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_);
648 crs_writer.writeSparseFile(file_name, rcp_crs_mat);
651 this->mtKernelHandle_ = Teuchos::rcp (
new mt_kernel_handle_type ());
652 if (mtKernelHandle_->get_gs_handle () == NULL) {
653 mtKernelHandle_->create_gs_handle ();
655 local_matrix_type kcsr = crsMat->getLocalMatrix ();
657 bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
658 is_symmetric = is_symmetric || is_matrix_structurally_symmetric_;
660 using KokkosSparse::Experimental::gauss_seidel_symbolic;
661 gauss_seidel_symbolic<mt_kernel_handle_type,
663 lno_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
664 A_->getNodeNumRows (),
665 A_->getNodeNumCols (),
672 InitializeTime_ += timer->totalElapsedTime ();
674 isInitialized_ =
true;
678 template <
typename BlockDiagView>
679 struct InvertDiagBlocks {
680 typedef int value_type;
681 typedef typename BlockDiagView::size_type Size;
684 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
685 template <
typename View>
686 using UnmanagedView = Kokkos::View<
typename View::data_type,
typename View::array_layout,
687 typename View::device_type, Unmanaged>;
689 typedef typename BlockDiagView::non_const_value_type Scalar;
690 typedef typename BlockDiagView::device_type Device;
691 typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
692 typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
694 UnmanagedView<BlockDiagView> block_diag_;
698 UnmanagedView<RWrk> rwrk_;
700 UnmanagedView<IWrk> iwrk_;
703 InvertDiagBlocks (BlockDiagView& block_diag)
704 : block_diag_(block_diag)
706 const auto blksz = block_diag.extent(1);
707 Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
709 Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
713 KOKKOS_INLINE_FUNCTION
714 void operator() (
const Size i,
int& jinfo)
const {
715 auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
716 auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
717 auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
719 Tpetra::Experimental::GETF2(D_cur, ipiv, info);
724 Tpetra::Experimental::GETRI(D_cur, ipiv, work, info);
729 KOKKOS_INLINE_FUNCTION
730 void join (
volatile value_type& dst,
volatile value_type
const& src)
const { dst += src; }
734 template<
class MatrixType>
735 void Relaxation<MatrixType>::computeBlockCrs ()
738 using Teuchos::Array;
739 using Teuchos::ArrayRCP;
740 using Teuchos::ArrayView;
745 using Teuchos::REDUCE_MAX;
746 using Teuchos::REDUCE_MIN;
747 using Teuchos::REDUCE_SUM;
748 using Teuchos::rcp_dynamic_cast;
749 using Teuchos::reduceAll;
750 typedef local_ordinal_type LO;
751 typedef typename node_type::device_type device_type;
753 const std::string timerName (
"Ifpack2::Relaxation::computeBlockCrs");
754 Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
755 if (timer.is_null ()) {
756 timer = Teuchos::TimeMonitor::getNewCounter (timerName);
759 TEUCHOS_TEST_FOR_EXCEPTION(
760 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::" 761 "computeBlockCrs: The input matrix A is null. Please call setMatrix() " 762 "with a nonnull input matrix, then call initialize(), before calling " 764 const block_crs_matrix_type* blockCrsA =
765 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
766 TEUCHOS_TEST_FOR_EXCEPTION(
767 blockCrsA == NULL, std::logic_error,
"Ifpack2::Relaxation::" 768 "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we " 769 "got this far. Please report this bug to the Ifpack2 developers.");
771 const scalar_type one = STS::one ();
776 const LO lclNumMeshRows =
777 blockCrsA->getCrsGraph ().getNodeNumRows ();
778 const LO blockSize = blockCrsA->getBlockSize ();
780 if (! savedDiagOffsets_) {
781 blockDiag_ = block_diag_type ();
782 blockDiag_ = block_diag_type (
"Ifpack2::Relaxation::blockDiag_",
783 lclNumMeshRows, blockSize, blockSize);
784 if (Teuchos::as<LO>(diagOffsets_.extent (0) ) < lclNumMeshRows) {
787 diagOffsets_ = Kokkos::View<size_t*, device_type> ();
788 diagOffsets_ = Kokkos::View<size_t*, device_type> (
"offsets", lclNumMeshRows);
790 blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
791 TEUCHOS_TEST_FOR_EXCEPTION
792 (static_cast<size_t> (diagOffsets_.extent (0)) !=
793 static_cast<size_t> (blockDiag_.extent (0)),
794 std::logic_error,
"diagOffsets_.extent(0) = " <<
795 diagOffsets_.extent (0) <<
" != blockDiag_.extent(0) = " 796 << blockDiag_.extent (0) <<
797 ". Please report this bug to the Ifpack2 developers.");
798 savedDiagOffsets_ =
true;
800 blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
807 unmanaged_block_diag_type blockDiag = blockDiag_;
809 if (DoL1Method_ && IsParallel_) {
810 const scalar_type two = one + one;
811 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
812 Array<LO> indices (maxLength);
813 Array<scalar_type> values (maxLength * blockSize * blockSize);
814 size_t numEntries = 0;
816 for (LO i = 0; i < lclNumMeshRows; ++i) {
818 blockCrsA->getLocalRowCopy (i, indices (), values (), numEntries);
820 auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
821 for (LO subRow = 0; subRow < blockSize; ++subRow) {
822 magnitude_type diagonal_boost = STM::zero ();
823 for (
size_t k = 0 ; k < numEntries ; ++k) {
824 if (indices[k] > lclNumMeshRows) {
825 const size_t offset = blockSize*blockSize*k + subRow*blockSize;
826 for (LO subCol = 0; subCol < blockSize; ++subCol) {
827 diagonal_boost += STS::magnitude (values[offset+subCol] / two);
831 if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
832 diagBlock(subRow, subRow) += diagonal_boost;
840 Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
841 typedef typename unmanaged_block_diag_type::execution_space exec_space;
842 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
844 Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
849 TEUCHOS_TEST_FOR_EXCEPTION
850 (info > 0, std::runtime_error,
"GETF2 or GETRI failed on = " << info
851 <<
" diagonal blocks.");
856 #ifdef HAVE_IFPACK2_DEBUG 857 const int numResults = 2;
859 int lclResults[2], gblResults[2];
860 lclResults[0] = info;
861 lclResults[1] = -info;
864 reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
865 numResults, lclResults, gblResults);
866 TEUCHOS_TEST_FOR_EXCEPTION
867 (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
868 "Ifpack2::Relaxation::compute: When processing the input " 869 "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations " 870 "failed on one or more (MPI) processes.");
871 #endif // HAVE_IFPACK2_DEBUG 873 Importer_ = A_->getGraph ()->getImporter ();
876 ComputeTime_ += timer->totalElapsedTime ();
881 template<
class MatrixType>
884 using Teuchos::Array;
885 using Teuchos::ArrayRCP;
886 using Teuchos::ArrayView;
891 using Teuchos::REDUCE_MAX;
892 using Teuchos::REDUCE_MIN;
893 using Teuchos::REDUCE_SUM;
894 using Teuchos::rcp_dynamic_cast;
895 using Teuchos::reduceAll;
898 typedef typename vector_type::device_type device_type;
903 if (! isInitialized ()) {
907 if (hasBlockCrsMatrix_) {
913 const std::string timerName (
"Ifpack2::Relaxation::compute");
914 Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
915 if (timer.is_null ()) {
916 timer = Teuchos::TimeMonitor::getNewCounter (timerName);
922 TEUCHOS_TEST_FOR_EXCEPTION(
923 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::compute: " 924 "The input matrix A is null. Please call setMatrix() with a nonnull " 925 "input matrix, then call initialize(), before calling this method.");
930 TEUCHOS_TEST_FOR_EXCEPTION(
931 NumSweeps_ < 0, std::logic_error,
932 "Ifpack2::Relaxation::compute: NumSweeps_ = " << NumSweeps_ <<
" < 0. " 933 "Please report this bug to the Ifpack2 developers.");
935 Diagonal_ = rcp (
new vector_type (A_->getRowMap ()));
953 const crs_matrix_type* crsMat =
954 dynamic_cast<const crs_matrix_type*
> (A_.getRawPtr ());
955 if (crsMat == NULL || ! crsMat->isStaticGraph ()) {
956 A_->getLocalDiagCopy (*Diagonal_);
958 if (! savedDiagOffsets_) {
959 const size_t lclNumRows = A_->getRowMap ()->getNodeNumElements ();
960 if (diagOffsets_.extent (0) < lclNumRows) {
961 typedef typename node_type::device_type DT;
962 diagOffsets_ = Kokkos::View<size_t*, DT> ();
963 diagOffsets_ = Kokkos::View<size_t*, DT> (
"offsets", lclNumRows);
965 crsMat->getCrsGraph ()->getLocalDiagOffsets (diagOffsets_);
966 savedDiagOffsets_ =
true;
968 crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_);
969 #ifdef HAVE_IFPACK2_DEBUG 971 vector_type D_copy (A_->getRowMap ());
972 A_->getLocalDiagCopy (D_copy);
973 D_copy.update (STS::one (), *Diagonal_, -STS::one ());
977 TEUCHOS_TEST_FOR_EXCEPTION(
978 err != STM::zero(), std::logic_error,
"Ifpack2::Relaxation::compute: " 979 "\"fast-path\" diagonal computation failed. \\|D1 - D2\\|_inf = " 981 #endif // HAVE_IFPACK2_DEBUG 987 RCP<vector_type> origDiag;
988 if (checkDiagEntries_) {
989 origDiag = rcp (
new vector_type (A_->getRowMap ()));
990 Tpetra::deep_copy (*origDiag, *Diagonal_);
993 const size_t numMyRows = A_->getNodeNumRows ();
996 Diagonal_->template sync<Kokkos::HostSpace> ();
997 Diagonal_->template modify<Kokkos::HostSpace> ();
998 auto diag_2d = Diagonal_->template getLocalView<Kokkos::HostSpace> ();
999 auto diag_1d = Kokkos::subview (diag_2d, Kokkos::ALL (), 0);
1010 if (DoL1Method_ && IsParallel_) {
1012 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1013 Array<local_ordinal_type> indices (maxLength);
1014 Array<scalar_type> values (maxLength);
1017 for (
size_t i = 0; i < numMyRows; ++i) {
1018 A_->getLocalRowCopy (i, indices (), values (), numEntries);
1020 for (
size_t k = 0 ; k < numEntries ; ++k) {
1021 if (static_cast<size_t> (indices[k]) > numMyRows) {
1022 diagonal_boost += STS::magnitude (values[k] / two);
1025 if (STS::magnitude (diag[i]) < L1Eta_ * diagonal_boost) {
1026 diag[i] += diagonal_boost;
1042 const scalar_type oneOverMinDiagVal = (MinDiagonalValue_ == zero) ?
1043 one / SmallTraits<scalar_type>::eps () :
1044 one / MinDiagonalValue_;
1046 const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1048 if (checkDiagEntries_) {
1054 size_t numSmallDiagEntries = 0;
1055 size_t numZeroDiagEntries = 0;
1056 size_t numNegDiagEntries = 0;
1067 if (numMyRows > 0) {
1072 minMagDiagEntryMag = d_0_mag;
1073 maxMagDiagEntryMag = d_0_mag;
1081 for (
size_t i = 0 ; i < numMyRows; ++i) {
1088 if (d_i_real < STM::zero ()) {
1089 ++numNegDiagEntries;
1091 if (d_i_mag < minMagDiagEntryMag) {
1093 minMagDiagEntryMag = d_i_mag;
1095 if (d_i_mag > maxMagDiagEntryMag) {
1097 maxMagDiagEntryMag = d_i_mag;
1100 if (fixTinyDiagEntries_) {
1102 if (d_i_mag <= minDiagValMag) {
1103 ++numSmallDiagEntries;
1104 if (d_i_mag == STM::zero ()) {
1105 ++numZeroDiagEntries;
1107 diag[i] = oneOverMinDiagVal;
1109 diag[i] = one / d_i;
1114 if (d_i_mag <= minDiagValMag) {
1115 ++numSmallDiagEntries;
1116 if (d_i_mag == STM::zero ()) {
1117 ++numZeroDiagEntries;
1120 diag[i] = one / d_i;
1127 if (STS::isComplex) {
1128 ComputeFlops_ += 4.0 * numMyRows;
1130 ComputeFlops_ += numMyRows;
1147 const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1150 Array<magnitude_type> localVals (2);
1151 localVals[0] = minMagDiagEntryMag;
1153 localVals[1] = -maxMagDiagEntryMag;
1154 Array<magnitude_type> globalVals (2);
1155 reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1156 localVals.getRawPtr (),
1157 globalVals.getRawPtr ());
1158 globalMinMagDiagEntryMag_ = globalVals[0];
1159 globalMaxMagDiagEntryMag_ = -globalVals[1];
1161 Array<size_t> localCounts (3);
1162 localCounts[0] = numSmallDiagEntries;
1163 localCounts[1] = numZeroDiagEntries;
1164 localCounts[2] = numNegDiagEntries;
1165 Array<size_t> globalCounts (3);
1166 reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1167 localCounts.getRawPtr (),
1168 globalCounts.getRawPtr ());
1169 globalNumSmallDiagEntries_ = globalCounts[0];
1170 globalNumZeroDiagEntries_ = globalCounts[1];
1171 globalNumNegDiagEntries_ = globalCounts[2];
1183 vector_type diff (A_->getRowMap ());
1184 diff.reciprocal (*origDiag);
1185 Diagonal_->template sync<device_type> ();
1186 diff.update (-one, *Diagonal_, one);
1187 globalDiagNormDiff_ = diff.norm2 ();
1190 if (fixTinyDiagEntries_) {
1194 for (
size_t i = 0 ; i < numMyRows; ++i) {
1199 if (d_i_mag <= minDiagValMag) {
1200 diag[i] = oneOverMinDiagVal;
1202 diag[i] = one / d_i;
1207 for (
size_t i = 0 ; i < numMyRows; ++i) {
1208 diag[i] = one / diag[i];
1211 if (STS::isComplex) {
1212 ComputeFlops_ += 4.0 * numMyRows;
1214 ComputeFlops_ += numMyRows;
1218 if (IsParallel_ && (PrecType_ == Ifpack2::Details::GS ||
1219 PrecType_ == Ifpack2::Details::SGS)) {
1223 Importer_ = A_->getGraph ()->getImporter ();
1224 Diagonal_->template sync<device_type> ();
1227 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
1228 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (&(*A_));
1229 TEUCHOS_TEST_FOR_EXCEPTION
1230 (crsMat == NULL, std::logic_error,
"Ifpack2::Relaxation::compute: " 1231 "Multithreaded Gauss-Seidel methods currently only work when the input " 1232 "matrix is a Tpetra::CrsMatrix.");
1233 local_matrix_type kcsr = crsMat->getLocalMatrix ();
1235 const bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
1236 using KokkosSparse::Experimental::gauss_seidel_numeric;
1237 gauss_seidel_numeric<mt_kernel_handle_type,
1240 scalar_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
1241 A_->getNodeNumRows (), A_->getNodeNumCols (),
1249 ComputeTime_ += timer->totalElapsedTime ();
1255 template<
class MatrixType>
1258 ApplyInverseJacobi (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1259 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1262 if (hasBlockCrsMatrix_) {
1263 ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1267 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1268 const double numVectors = as<double> (X.getNumVectors ());
1269 if (ZeroStartingSolution_) {
1274 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1280 double flopUpdate = 0.0;
1281 if (DampingFactor_ == STS::one ()) {
1283 flopUpdate = numGlobalRows * numVectors;
1287 flopUpdate = 2.0 * numGlobalRows * numVectors;
1289 ApplyFlops_ += flopUpdate;
1290 if (NumSweeps_ == 1) {
1296 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1299 updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1301 for (
int j = startSweep; j < NumSweeps_; ++j) {
1303 applyMat (Y, *cachedMV_);
1304 cachedMV_->update (STS::one (), X, -STS::one ());
1305 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, *cachedMV_, STS::one ());
1319 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1320 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1321 ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1322 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1325 template<
class MatrixType>
1327 Relaxation<MatrixType>::
1328 ApplyInverseJacobi_BlockCrsMatrix (
const Tpetra::MultiVector<scalar_type,
1330 global_ordinal_type,
1332 Tpetra::MultiVector<scalar_type,
1334 global_ordinal_type,
1335 node_type>& Y)
const 1337 typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
1338 local_ordinal_type, global_ordinal_type, node_type> BMV;
1340 const block_crs_matrix_type* blockMatConst =
1341 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
1342 TEUCHOS_TEST_FOR_EXCEPTION
1343 (blockMatConst == NULL, std::logic_error,
"This method should never be " 1344 "called if the matrix A_ is not a BlockCrsMatrix. Please report this " 1345 "bug to the Ifpack2 developers.");
1350 block_crs_matrix_type* blockMat =
1351 const_cast<block_crs_matrix_type*
> (blockMatConst);
1353 auto meshRowMap = blockMat->getRowMap ();
1354 auto meshColMap = blockMat->getColMap ();
1355 const local_ordinal_type blockSize = blockMat->getBlockSize ();
1357 BMV X_blk (X, *meshColMap, blockSize);
1358 BMV Y_blk (Y, *meshRowMap, blockSize);
1360 if (ZeroStartingSolution_) {
1364 Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1365 if (NumSweeps_ == 1) {
1370 auto pointRowMap = Y.getMap ();
1371 const size_t numVecs = X.getNumVectors ();
1375 BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1379 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1381 for (
int j = startSweep; j < NumSweeps_; ++j) {
1382 blockMat->applyBlock (Y_blk, A_times_Y);
1384 Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1385 X_blk, A_times_Y, STS::one ());
1389 template<
class MatrixType>
1391 Relaxation<MatrixType>::
1392 ApplyInverseGS (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1393 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1395 typedef Relaxation<MatrixType> this_type;
1405 const block_crs_matrix_type* blockCrsMat =
1406 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
1407 const crs_matrix_type* crsMat =
1408 dynamic_cast<const crs_matrix_type*
> (A_.getRawPtr ());
1409 if (blockCrsMat != NULL) {
1410 const_cast<this_type*
> (
this)->ApplyInverseGS_BlockCrsMatrix (*blockCrsMat, X, Y);
1411 }
else if (crsMat != NULL) {
1412 ApplyInverseGS_CrsMatrix (*crsMat, X, Y);
1414 ApplyInverseGS_RowMatrix (X, Y);
1419 template<
class MatrixType>
1421 Relaxation<MatrixType>::
1422 ApplyInverseGS_RowMatrix (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1423 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1425 using Teuchos::Array;
1426 using Teuchos::ArrayRCP;
1427 using Teuchos::ArrayView;
1431 using Teuchos::rcpFromRef;
1432 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1437 if (ZeroStartingSolution_) {
1438 Y.putScalar (STS::zero ());
1441 const size_t NumVectors = X.getNumVectors ();
1442 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1443 Array<local_ordinal_type> Indices (maxLength);
1444 Array<scalar_type> Values (maxLength);
1447 const size_t numMyRows = A_->getNodeNumRows();
1448 const local_ordinal_type* rowInd = 0;
1449 size_t numActive = numMyRows;
1450 bool do_local = ! localSmoothingIndices_.is_null ();
1452 rowInd = localSmoothingIndices_.getRawPtr ();
1453 numActive = localSmoothingIndices_.size ();
1458 if (Importer_.is_null ()) {
1459 updateCachedMultiVector(Y.getMap(),NumVectors);
1461 updateCachedMultiVector(Importer_->getTargetMap(),NumVectors);
1466 Y2 = rcpFromRef (Y);
1470 ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView ();
1471 ArrayView<const scalar_type> d_ptr = d_rcp();
1474 bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
1476 if (constant_stride) {
1478 size_t x_stride = X.getStride();
1479 size_t y2_stride = Y2->getStride();
1480 ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
1481 ArrayRCP<const scalar_type> x_rcp = X.get1dView();
1482 ArrayView<scalar_type> y2_ptr = y2_rcp();
1483 ArrayView<const scalar_type> x_ptr = x_rcp();
1484 Array<scalar_type> dtemp(NumVectors,STS::zero());
1486 for (
int j = 0; j < NumSweeps_; ++j) {
1489 if (Importer_.is_null ()) {
1492 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1496 if (! DoBackwardGS_) {
1497 for (
size_t ii = 0; ii < numActive; ++ii) {
1498 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1500 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1501 dtemp.assign(NumVectors,STS::zero());
1503 for (
size_t k = 0; k < NumEntries; ++k) {
1504 const local_ordinal_type col = Indices[k];
1505 for (
size_t m = 0; m < NumVectors; ++m) {
1506 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1510 for (
size_t m = 0; m < NumVectors; ++m) {
1511 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1518 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1519 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1521 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1522 dtemp.assign (NumVectors, STS::zero ());
1524 for (
size_t k = 0; k < NumEntries; ++k) {
1525 const local_ordinal_type col = Indices[k];
1526 for (
size_t m = 0; m < NumVectors; ++m) {
1527 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1531 for (
size_t m = 0; m < NumVectors; ++m) {
1532 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1538 Tpetra::deep_copy (Y, *Y2);
1544 ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
1545 ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
1547 for (
int j = 0; j < NumSweeps_; ++j) {
1550 if (Importer_.is_null ()) {
1553 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1557 if (! DoBackwardGS_) {
1558 for (
size_t ii = 0; ii < numActive; ++ii) {
1559 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1561 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1563 for (
size_t m = 0; m < NumVectors; ++m) {
1564 scalar_type dtemp = STS::zero ();
1565 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1566 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1568 for (
size_t k = 0; k < NumEntries; ++k) {
1569 const local_ordinal_type col = Indices[k];
1570 dtemp += Values[k] * y2_local[col];
1572 y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1579 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1580 local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1583 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1585 for (
size_t m = 0; m < NumVectors; ++m) {
1586 scalar_type dtemp = STS::zero ();
1587 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1588 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1590 for (
size_t k = 0; k < NumEntries; ++k) {
1591 const local_ordinal_type col = Indices[k];
1592 dtemp += Values[k] * y2_local[col];
1594 y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1601 Tpetra::deep_copy (Y, *Y2);
1608 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1609 const double numVectors = as<double> (X.getNumVectors ());
1610 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1611 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1612 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1613 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1617 template<
class MatrixType>
1619 Relaxation<MatrixType>::
1620 ApplyInverseGS_CrsMatrix (
const crs_matrix_type& A,
1621 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1622 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1625 const Tpetra::ESweepDirection direction =
1626 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1627 if (localSmoothingIndices_.is_null ()) {
1628 A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
1629 NumSweeps_, ZeroStartingSolution_);
1632 A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
1633 DampingFactor_, direction,
1634 NumSweeps_, ZeroStartingSolution_);
1648 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1649 const double numVectors = as<double> (X.getNumVectors ());
1650 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1651 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1652 ApplyFlops_ += NumSweeps_ * numVectors *
1653 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1656 template<
class MatrixType>
1658 Relaxation<MatrixType>::
1659 ApplyInverseGS_BlockCrsMatrix (
const block_crs_matrix_type& A,
1660 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1661 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
1665 using Teuchos::rcpFromRef;
1666 typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
1667 local_ordinal_type, global_ordinal_type, node_type> BMV;
1668 typedef Tpetra::MultiVector<scalar_type,
1669 local_ordinal_type, global_ordinal_type, node_type> MV;
1678 BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
1679 const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
1681 bool performImport =
false;
1683 if (Importer_.is_null ()) {
1684 yBlockCol = rcpFromRef (yBlock);
1687 if (yBlockColumnPointMap_.is_null () ||
1688 yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
1689 yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
1690 yBlockColumnPointMap_ =
1691 rcp (
new BMV (* (A.getColMap ()), A.getBlockSize (),
1692 static_cast<local_ordinal_type
> (yBlock.getNumVectors ())));
1694 yBlockCol = yBlockColumnPointMap_;
1695 performImport =
true;
1698 if (ZeroStartingSolution_) {
1699 yBlockCol->putScalar (STS::zero ());
1701 else if (performImport) {
1702 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
1705 const Tpetra::ESweepDirection direction =
1706 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1708 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
1709 if (performImport && sweep > 0) {
1710 yBlockCol->doImport(yBlock, *Importer_, Tpetra::INSERT);
1712 A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
1713 DampingFactor_, direction);
1714 if (performImport) {
1715 RCP<const MV> yBlockColPointDomain =
1716 yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
1717 Tpetra::deep_copy (Y, *yBlockColPointDomain);
1725 template<
class MatrixType>
1727 Relaxation<MatrixType>::
1728 MTGaussSeidel (
const crs_matrix_type* crsMat,
1729 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1730 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1731 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ,
1732 const scalar_type& ,
1733 const Tpetra::ESweepDirection direction,
1734 const int numSweeps,
1735 const bool zeroInitialGuess)
const 1737 using Teuchos::null;
1740 using Teuchos::rcpFromRef;
1741 using Teuchos::rcp_const_cast;
1743 typedef scalar_type Scalar;
1744 typedef local_ordinal_type LocalOrdinal;
1745 typedef global_ordinal_type GlobalOrdinal;
1746 typedef node_type Node;
1748 const char prefix[] =
"Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1749 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1751 TEUCHOS_TEST_FOR_EXCEPTION
1752 (crsMat == NULL, std::logic_error, prefix <<
"The matrix is NULL. This " 1753 "should never happen. Please report this bug to the Ifpack2 developers.");
1754 TEUCHOS_TEST_FOR_EXCEPTION
1755 (! crsMat->isFillComplete (), std::runtime_error, prefix <<
"The input " 1756 "CrsMatrix is not fill complete. Please call fillComplete on the matrix," 1757 " then do setup again, before calling apply(). \"Do setup\" means that " 1758 "if only the matrix's values have changed since last setup, you need only" 1759 " call compute(). If the matrix's structure may also have changed, you " 1760 "must first call initialize(), then call compute(). If you have not set" 1761 " up this preconditioner for this matrix before, you must first call " 1762 "initialize(), then call compute().");
1763 TEUCHOS_TEST_FOR_EXCEPTION
1764 (numSweeps < 0, std::invalid_argument, prefix <<
"The number of sweeps " 1765 "must be nonnegative, but you provided numSweeps = " << numSweeps <<
1767 if (numSweeps == 0) {
1771 typedef typename Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> MV;
1772 typedef typename crs_matrix_type::import_type import_type;
1773 typedef typename crs_matrix_type::export_type export_type;
1774 typedef typename crs_matrix_type::map_type map_type;
1776 RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1777 RCP<const export_type> exporter = crsMat->getGraph ()->getExporter ();
1778 TEUCHOS_TEST_FOR_EXCEPTION(
1779 ! exporter.is_null (), std::runtime_error,
1780 "This method's implementation currently requires that the matrix's row, " 1781 "domain, and range Maps be the same. This cannot be the case, because " 1782 "the matrix has a nontrivial Export object.");
1784 RCP<const map_type> domainMap = crsMat->getDomainMap ();
1785 RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1786 RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1787 RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1789 #ifdef HAVE_IFPACK2_DEBUG 1794 TEUCHOS_TEST_FOR_EXCEPTION(
1795 ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1796 "Ifpack2::Relaxation::MTGaussSeidel requires that the input " 1797 "multivector X be in the domain Map of the matrix.");
1798 TEUCHOS_TEST_FOR_EXCEPTION(
1799 ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1800 "Ifpack2::Relaxation::MTGaussSeidel requires that the input " 1801 "B be in the range Map of the matrix.");
1802 TEUCHOS_TEST_FOR_EXCEPTION(
1803 ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
1804 "Ifpack2::Relaxation::MTGaussSeidel requires that the input " 1805 "D be in the row Map of the matrix.");
1806 TEUCHOS_TEST_FOR_EXCEPTION(
1807 ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1808 "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the " 1809 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1810 TEUCHOS_TEST_FOR_EXCEPTION(
1811 ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1812 "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and " 1813 "the range Map of the matrix be the same.");
1819 #endif // HAVE_IFPACK2_DEBUG 1830 RCP<MV> X_domainMap;
1831 bool copyBackOutput =
false;
1832 if (importer.is_null ()) {
1833 if (X.isConstantStride ()) {
1834 X_colMap = rcpFromRef (X);
1835 X_domainMap = rcpFromRef (X);
1842 if (zeroInitialGuess) {
1843 X_colMap->putScalar (ZERO);
1853 X_colMap = rcp (
new MV (colMap, X.getNumVectors ()));
1857 X_domainMap = X_colMap;
1858 if (! zeroInitialGuess) {
1860 deep_copy (*X_domainMap , X);
1861 }
catch (std::exception& e) {
1862 std::ostringstream os;
1863 os <<
"Ifpack2::Relaxation::MTGaussSeidel: " 1864 "deep_copy(*X_domainMap, X) threw an exception: " 1865 << e.what () <<
".";
1866 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, e.what ());
1869 copyBackOutput =
true;
1884 X_colMap = rcp (
new MV (colMap, X.getNumVectors ()));
1886 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1888 #ifdef HAVE_IFPACK2_DEBUG 1889 auto X_colMap_host_view = X_colMap->template getLocalView<Kokkos::HostSpace> ();
1890 auto X_domainMap_host_view = X_domainMap->template getLocalView<Kokkos::HostSpace> ();
1892 if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
1893 TEUCHOS_TEST_FOR_EXCEPTION(
1894 X_colMap_host_view.data () != X_domainMap_host_view.data (),
1895 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: " 1896 "Pointer to start of column Map view of X is not equal to pointer to " 1897 "start of (domain Map view of) X. This may mean that " 1898 "Tpetra::MultiVector::offsetViewNonConst is broken. " 1899 "Please report this bug to the Tpetra developers.");
1902 TEUCHOS_TEST_FOR_EXCEPTION(
1903 X_colMap_host_view.extent (0) < X_domainMap_host_view.extent (0) ||
1904 X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
1905 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: " 1906 "X_colMap has fewer local rows than X_domainMap. " 1907 "X_colMap_host_view.extent(0) = " << X_colMap_host_view.extent (0)
1908 <<
", X_domainMap_host_view.extent(0) = " 1909 << X_domainMap_host_view.extent (0)
1910 <<
", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
1911 <<
", and X_domainMap->getLocalLength() = " 1912 << X_domainMap->getLocalLength ()
1913 <<
". This means that Tpetra::MultiVector::offsetViewNonConst " 1914 "is broken. Please report this bug to the Tpetra developers.");
1916 TEUCHOS_TEST_FOR_EXCEPTION(
1917 X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
1918 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: " 1919 "X_colMap has a different number of columns than X_domainMap. " 1920 "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
1921 <<
" != X_domainMap->getNumVectors() = " 1922 << X_domainMap->getNumVectors ()
1923 <<
". This means that Tpetra::MultiVector::offsetViewNonConst " 1924 "is broken. Please report this bug to the Tpetra developers.");
1925 #endif // HAVE_IFPACK2_DEBUG 1927 if (zeroInitialGuess) {
1929 X_colMap->putScalar (ZERO);
1939 X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
1941 copyBackOutput =
true;
1948 if (B.isConstantStride ()) {
1949 B_in = rcpFromRef (B);
1956 RCP<MV> B_in_nonconst = rcp (
new MV (rowMap, B.getNumVectors()));
1958 deep_copy (*B_in_nonconst, B);
1959 }
catch (std::exception& e) {
1960 std::ostringstream os;
1961 os <<
"Ifpack2::Relaxation::MTGaussSeidel: " 1962 "deep_copy(*B_in_nonconst, B) threw an exception: " 1963 << e.what () <<
".";
1964 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, e.what ());
1966 B_in = rcp_const_cast<
const MV> (B_in_nonconst);
1980 local_matrix_type kcsr = crsMat->getLocalMatrix ();
1981 const size_t NumVectors = X.getNumVectors ();
1983 bool update_y_vector =
true;
1985 bool zero_x_vector =
false;
1987 for (
int sweep = 0; sweep < numSweeps; ++sweep) {
1988 if (! importer.is_null () && sweep > 0) {
1991 X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
1994 for (
size_t indVec = 0; indVec < NumVectors; ++indVec) {
1995 if (direction == Tpetra::Symmetric) {
1996 KokkosSparse::Experimental::symmetric_gauss_seidel_apply
1997 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
1998 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
1999 Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2000 Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2001 zero_x_vector, update_y_vector);
2003 else if (direction == Tpetra::Forward) {
2004 KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply
2005 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2006 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2007 Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
2008 Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2009 zero_x_vector, update_y_vector);
2011 else if (direction == Tpetra::Backward) {
2012 KokkosSparse::Experimental::backward_sweep_gauss_seidel_apply
2013 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2014 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2015 Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
2016 Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2017 zero_x_vector, update_y_vector);
2020 TEUCHOS_TEST_FOR_EXCEPTION(
2021 true, std::invalid_argument,
2022 prefix <<
"The 'direction' enum does not have any of its valid " 2023 "values: Forward, Backward, or Symmetric.");
2027 if (NumVectors > 1){
2028 update_y_vector =
true;
2031 update_y_vector =
false;
2035 if (copyBackOutput) {
2037 deep_copy (X , *X_domainMap);
2038 }
catch (std::exception& e) {
2039 TEUCHOS_TEST_FOR_EXCEPTION(
2040 true, std::runtime_error, prefix <<
"deep_copy(X, *X_domainMap) " 2041 "threw an exception: " << e.what ());
2047 template<
class MatrixType>
2049 Relaxation<MatrixType>::
2050 ApplyInverseMTSGS_CrsMatrix (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2051 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X)
const 2053 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (&(*A_));
2054 TEUCHOS_TEST_FOR_EXCEPTION
2055 (crsMat == NULL, std::logic_error,
"Ifpack2::Relaxation::apply: " 2056 "Multithreaded Gauss-Seidel methods currently only work when the input " 2057 "matrix is a Tpetra::CrsMatrix.");
2060 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2063 TEUCHOS_TEST_FOR_EXCEPTION
2064 (! localSmoothingIndices_.is_null (), std::logic_error,
2065 "Our implementation of Multithreaded Gauss-Seidel does not implement the " 2066 "use case where the user supplies an iteration order. " 2067 "This error used to appear as \"MT GaussSeidel ignores the given " 2069 "I tried to add more explanation, but I didn't implement \"MT " 2070 "GaussSeidel\" [sic]. " 2071 "You'll have to ask the person who did.");
2072 this->MTGaussSeidel (crsMat, X, B, *Diagonal_, DampingFactor_, direction,
2073 NumSweeps_, ZeroStartingSolution_);
2075 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2076 const double numVectors = as<double> (X.getNumVectors ());
2077 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2078 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2079 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2080 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2084 template<
class MatrixType>
2085 void Relaxation<MatrixType>::ApplyInverseMTGS_CrsMatrix (
2086 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2087 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X)
const {
2089 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (&(*A_));
2090 TEUCHOS_TEST_FOR_EXCEPTION(
2091 crsMat == NULL, std::runtime_error,
"Ifpack2::Relaxation::compute: " 2092 "MT methods works for CRSMatrix Only.");
2095 const Tpetra::ESweepDirection direction =
2096 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
2099 TEUCHOS_TEST_FOR_EXCEPTION
2100 (! localSmoothingIndices_.is_null (), std::logic_error,
2101 "Our implementation of Multithreaded Gauss-Seidel does not implement the " 2102 "use case where the user supplies an iteration order. " 2103 "This error used to appear as \"MT GaussSeidel ignores the given " 2105 "I tried to add more explanation, but I didn't implement \"MT " 2106 "GaussSeidel\" [sic]. " 2107 "You'll have to ask the person who did.");
2108 this->MTGaussSeidel (crsMat, X, B, *Diagonal_, DampingFactor_, direction,
2109 NumSweeps_, ZeroStartingSolution_);
2111 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2112 const double numVectors = as<double> (X.getNumVectors ());
2113 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2114 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2115 ApplyFlops_ += NumSweeps_ * numVectors *
2116 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2119 template<
class MatrixType>
2121 Relaxation<MatrixType>::
2122 ApplyInverseSGS (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2123 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 2125 typedef Relaxation<MatrixType> this_type;
2135 const block_crs_matrix_type* blockCrsMat =
dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr());
2136 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (&(*A_));
2137 if (blockCrsMat != NULL) {
2138 const_cast<this_type*
> (
this)->ApplyInverseSGS_BlockCrsMatrix(*blockCrsMat, X, Y);
2140 else if (crsMat != NULL) {
2141 ApplyInverseSGS_CrsMatrix (*crsMat, X, Y);
2143 ApplyInverseSGS_RowMatrix (X, Y);
2148 template<
class MatrixType>
2150 Relaxation<MatrixType>::
2151 ApplyInverseSGS_RowMatrix (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2152 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 2154 using Teuchos::Array;
2155 using Teuchos::ArrayRCP;
2156 using Teuchos::ArrayView;
2160 using Teuchos::rcpFromRef;
2161 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
2167 if (ZeroStartingSolution_) {
2168 Y.putScalar (STS::zero ());
2171 const size_t NumVectors = X.getNumVectors ();
2172 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
2173 Array<local_ordinal_type> Indices (maxLength);
2174 Array<scalar_type> Values (maxLength);
2177 const size_t numMyRows = A_->getNodeNumRows();
2178 const local_ordinal_type * rowInd = 0;
2179 size_t numActive = numMyRows;
2180 bool do_local = !localSmoothingIndices_.is_null();
2182 rowInd = localSmoothingIndices_.getRawPtr();
2183 numActive = localSmoothingIndices_.size();
2189 if (Importer_.is_null ()) {
2190 updateCachedMultiVector(Y.getMap(),NumVectors);
2192 updateCachedMultiVector(Importer_->getTargetMap(),NumVectors);
2197 Y2 = rcpFromRef (Y);
2201 ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView();
2202 ArrayView<const scalar_type> d_ptr = d_rcp();
2205 bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
2207 if(constant_stride) {
2209 size_t x_stride = X.getStride();
2210 size_t y2_stride = Y2->getStride();
2211 ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
2212 ArrayRCP<const scalar_type> x_rcp = X.get1dView();
2213 ArrayView<scalar_type> y2_ptr = y2_rcp();
2214 ArrayView<const scalar_type> x_ptr = x_rcp();
2215 Array<scalar_type> dtemp(NumVectors,STS::zero());
2217 for (
int j = 0; j < NumSweeps_; j++) {
2220 if (Importer_.is_null ()) {
2222 Tpetra::deep_copy (*Y2, Y);
2224 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2227 for (
size_t ii = 0; ii < numActive; ++ii) {
2228 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2230 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2231 dtemp.assign(NumVectors,STS::zero());
2233 for (
size_t k = 0; k < NumEntries; ++k) {
2234 const local_ordinal_type col = Indices[k];
2235 for (
size_t m = 0; m < NumVectors; ++m) {
2236 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2240 for (
size_t m = 0; m < NumVectors; ++m) {
2241 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2247 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2248 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2250 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2251 dtemp.assign(NumVectors,STS::zero());
2253 for (
size_t k = 0; k < NumEntries; ++k) {
2254 const local_ordinal_type col = Indices[k];
2255 for (
size_t m = 0; m < NumVectors; ++m) {
2256 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2260 for (
size_t m = 0; m < NumVectors; ++m) {
2261 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2267 Tpetra::deep_copy (Y, *Y2);
2273 ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
2274 ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
2276 for (
int iter = 0; iter < NumSweeps_; ++iter) {
2279 if (Importer_.is_null ()) {
2281 Tpetra::deep_copy (*Y2, Y);
2283 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2287 for (
size_t ii = 0; ii < numActive; ++ii) {
2288 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2289 const scalar_type diag = d_ptr[i];
2291 A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2293 for (
size_t m = 0; m < NumVectors; ++m) {
2294 scalar_type dtemp = STS::zero ();
2295 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2296 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2298 for (
size_t k = 0; k < NumEntries; ++k) {
2299 const local_ordinal_type col = Indices[k];
2300 dtemp += Values[k] * y2_local[col];
2302 y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2308 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2309 local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2310 const scalar_type diag = d_ptr[i];
2312 A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2314 for (
size_t m = 0; m < NumVectors; ++m) {
2315 scalar_type dtemp = STS::zero ();
2316 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2317 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2319 for (
size_t k = 0; k < NumEntries; ++k) {
2320 const local_ordinal_type col = Indices[k];
2321 dtemp += Values[k] * y2_local[col];
2323 y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2329 Tpetra::deep_copy (Y, *Y2);
2335 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2336 const double numVectors = as<double> (X.getNumVectors ());
2337 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2338 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2339 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2340 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2344 template<
class MatrixType>
2346 Relaxation<MatrixType>::
2347 ApplyInverseSGS_CrsMatrix (
const crs_matrix_type& A,
2348 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2349 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 2352 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2353 if (localSmoothingIndices_.is_null ()) {
2354 A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
2355 NumSweeps_, ZeroStartingSolution_);
2358 A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
2359 DampingFactor_, direction,
2360 NumSweeps_, ZeroStartingSolution_);
2377 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2378 const double numVectors = as<double> (X.getNumVectors ());
2379 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2380 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2381 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2382 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2385 template<
class MatrixType>
2387 Relaxation<MatrixType>::
2388 ApplyInverseSGS_BlockCrsMatrix (
const block_crs_matrix_type& A,
2389 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2390 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
2394 using Teuchos::rcpFromRef;
2395 typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
2396 local_ordinal_type, global_ordinal_type, node_type> BMV;
2397 typedef Tpetra::MultiVector<scalar_type,
2398 local_ordinal_type, global_ordinal_type, node_type> MV;
2407 BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
2408 const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
2410 bool performImport =
false;
2412 if (Importer_.is_null ()) {
2413 yBlockCol = Teuchos::rcpFromRef (yBlock);
2416 if (yBlockColumnPointMap_.is_null () ||
2417 yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
2418 yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
2419 yBlockColumnPointMap_ =
2420 rcp (
new BMV (* (A.getColMap ()), A.getBlockSize (),
2421 static_cast<local_ordinal_type
> (yBlock.getNumVectors ())));
2423 yBlockCol = yBlockColumnPointMap_;
2424 performImport =
true;
2427 if (ZeroStartingSolution_) {
2428 yBlockCol->putScalar (STS::zero ());
2430 else if (performImport) {
2431 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2435 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2437 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
2438 if (performImport && sweep > 0) {
2439 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2441 A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
2442 DampingFactor_, direction);
2443 if (performImport) {
2444 RCP<const MV> yBlockColPointDomain =
2445 yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
2446 MV yBlockView = yBlock.getMultiVectorView ();
2447 Tpetra::deep_copy (yBlockView, *yBlockColPointDomain);
2453 template<
class MatrixType>
2456 std::ostringstream os;
2461 os <<
"\"Ifpack2::Relaxation\": {";
2463 os <<
"Initialized: " << (isInitialized () ?
"true" :
"false") <<
", " 2464 <<
"Computed: " << (isComputed () ?
"true" :
"false") <<
", ";
2470 if (PrecType_ == Ifpack2::Details::JACOBI) {
2472 }
else if (PrecType_ == Ifpack2::Details::GS) {
2473 os <<
"Gauss-Seidel";
2474 }
else if (PrecType_ == Ifpack2::Details::SGS) {
2475 os <<
"Symmetric Gauss-Seidel";
2476 }
else if (PrecType_ == Ifpack2::Details::MTGS) {
2477 os <<
"MT Gauss-Seidel";
2478 }
else if (PrecType_ == Ifpack2::Details::MTSGS) {
2479 os <<
"MT Symmetric Gauss-Seidel";
2485 os <<
", " <<
"sweeps: " << NumSweeps_ <<
", " 2486 <<
"damping factor: " << DampingFactor_ <<
", ";
2488 os <<
"use l1: " << DoL1Method_ <<
", " 2489 <<
"l1 eta: " << L1Eta_ <<
", ";
2492 if (A_.is_null ()) {
2493 os <<
"Matrix: null";
2496 os <<
"Global matrix dimensions: [" 2497 << A_->getGlobalNumRows () <<
", " << A_->getGlobalNumCols () <<
"]" 2498 <<
", Global nnz: " << A_->getGlobalNumEntries();
2506 template<
class MatrixType>
2510 const Teuchos::EVerbosityLevel verbLevel)
const 2512 using Teuchos::OSTab;
2513 using Teuchos::TypeNameTraits;
2514 using Teuchos::VERB_DEFAULT;
2515 using Teuchos::VERB_NONE;
2516 using Teuchos::VERB_LOW;
2517 using Teuchos::VERB_MEDIUM;
2518 using Teuchos::VERB_HIGH;
2519 using Teuchos::VERB_EXTREME;
2522 const Teuchos::EVerbosityLevel vl =
2523 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2525 const int myRank = this->getComm ()->getRank ();
2533 if (vl != VERB_NONE && myRank == 0) {
2537 out <<
"\"Ifpack2::Relaxation\":" << endl;
2539 out <<
"MatrixType: \"" << TypeNameTraits<MatrixType>::name () <<
"\"" 2541 if (this->getObjectLabel () !=
"") {
2542 out <<
"Label: " << this->getObjectLabel () << endl;
2544 out <<
"Initialized: " << (isInitialized () ?
"true" :
"false") << endl
2545 <<
"Computed: " << (isComputed () ?
"true" :
"false") << endl
2546 <<
"Parameters: " << endl;
2549 out <<
"\"relaxation: type\": ";
2550 if (PrecType_ == Ifpack2::Details::JACOBI) {
2552 }
else if (PrecType_ == Ifpack2::Details::GS) {
2553 out <<
"Gauss-Seidel";
2554 }
else if (PrecType_ == Ifpack2::Details::SGS) {
2555 out <<
"Symmetric Gauss-Seidel";
2556 }
else if (PrecType_ == Ifpack2::Details::MTGS) {
2557 out <<
"MT Gauss-Seidel";
2558 }
else if (PrecType_ == Ifpack2::Details::MTSGS) {
2559 out <<
"MT Symmetric Gauss-Seidel";
2566 <<
"\"relaxation: sweeps\": " << NumSweeps_ << endl
2567 <<
"\"relaxation: damping factor\": " << DampingFactor_ << endl
2568 <<
"\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2569 <<
"\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2570 <<
"\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2571 <<
"\"relaxation: use l1\": " << DoL1Method_ << endl
2572 <<
"\"relaxation: l1 eta\": " << L1Eta_ << endl;
2574 out <<
"Computed quantities:" << endl;
2577 out <<
"Global number of rows: " << A_->getGlobalNumRows () << endl
2578 <<
"Global number of columns: " << A_->getGlobalNumCols () << endl;
2580 if (checkDiagEntries_ && isComputed ()) {
2581 out <<
"Properties of input diagonal entries:" << endl;
2584 out <<
"Magnitude of minimum-magnitude diagonal entry: " 2585 << globalMinMagDiagEntryMag_ << endl
2586 <<
"Magnitude of maximum-magnitude diagonal entry: " 2587 << globalMaxMagDiagEntryMag_ << endl
2588 <<
"Number of diagonal entries with small magnitude: " 2589 << globalNumSmallDiagEntries_ << endl
2590 <<
"Number of zero diagonal entries: " 2591 << globalNumZeroDiagEntries_ << endl
2592 <<
"Number of diagonal entries with negative real part: " 2593 << globalNumNegDiagEntries_ << endl
2594 <<
"Abs 2-norm diff between computed and actual inverse " 2595 <<
"diagonal: " << globalDiagNormDiff_ << endl;
2598 if (isComputed ()) {
2599 out <<
"Saved diagonal offsets: " 2600 << (savedDiagOffsets_ ?
"true" :
"false") << endl;
2602 out <<
"Call counts and total times (in seconds): " << endl;
2605 out <<
"initialize: " << endl;
2608 out <<
"Call count: " << NumInitialize_ << endl;
2609 out <<
"Total time: " << InitializeTime_ << endl;
2611 out <<
"compute: " << endl;
2614 out <<
"Call count: " << NumCompute_ << endl;
2615 out <<
"Total time: " << ComputeTime_ << endl;
2617 out <<
"apply: " << endl;
2620 out <<
"Call count: " << NumApply_ << endl;
2621 out <<
"Total time: " << ApplyTime_ << endl;
2629 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \ 2630 template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >; 2632 #endif // IFPACK2_RELAXATION_DEF_HPP std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2454
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:406
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:422
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:452
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:184
void compute()
Compute the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:882
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:484
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:416
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:434
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:440
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:164
Ifpack2 implementation details.
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:393
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:384
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:247
void setParameters(const Teuchos::ParameterList ¶ms)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:363
File for utility functions.
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:471
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:464
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:230
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:244
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:458
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:446
Definition: Ifpack2_Container.hpp:774
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:428
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:241
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:373
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:238
virtual ~Relaxation()
Destructor.
Definition: Ifpack2_Relaxation_def.hpp:225
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:586
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:223
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object's attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2509
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
void getValidParameters(Teuchos::ParameterList ¶ms)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:50
void initialize()
Initialize the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:607
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:250