Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_Container.hpp
Go to the documentation of this file.
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_CONTAINER_HPP
44 #define IFPACK2_CONTAINER_HPP
45 
48 
49 #include "Ifpack2_ConfigDefs.hpp"
50 #include "Tpetra_RowMatrix.hpp"
51 #include "Teuchos_Describable.hpp"
52 #include <Ifpack2_Partitioner.hpp>
53 #include <Tpetra_Map.hpp>
54 #include <Tpetra_Experimental_BlockCrsMatrix_decl.hpp>
56 #include <Teuchos_Time.hpp>
57 #include <iostream>
58 #include <type_traits>
59 
60 #ifndef DOXYGEN_SHOULD_SKIP_THIS
61 namespace Teuchos {
62  // Forward declaration to avoid include.
63  class ParameterList;
64 } // namespace Teuchos
65 #endif // DOXYGEN_SHOULD_SKIP_THIS
66 
67 namespace Ifpack2 {
68 
113 template<class MatrixType>
116 
117 protected:
118  typedef typename MatrixType::scalar_type scalar_type;
119  typedef typename MatrixType::local_ordinal_type local_ordinal_type;
120  typedef typename MatrixType::global_ordinal_type global_ordinal_type;
121  typedef typename MatrixType::node_type node_type;
122  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> mv_type;
123  typedef Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> vector_type;
124  typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
126  typedef Tpetra::Import<local_ordinal_type, global_ordinal_type, node_type> import_type;
128  typedef Tpetra::Experimental::BlockCrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> block_crs_matrix_type;
129  typedef Tpetra::RowMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> row_matrix_type;
130 
131  static_assert(std::is_same<MatrixType, row_matrix_type>::value,
132  "Ifpack2::Container: Please use MatrixType = Tpetra::RowMatrix.");
133 
135  typedef typename Kokkos::Details::ArithTraits<scalar_type>::val_type impl_scalar_type;
137 
138 public:
139  typedef typename mv_type::dual_view_type::t_host HostView;
140 
151  const Teuchos::RCP<const import_type>& importer,
152  int OverlapLevel,
153  scalar_type DampingFactor) :
154  inputMatrix_ (matrix),
155  OverlapLevel_ (OverlapLevel),
156  DampingFactor_ (DampingFactor),
157  Importer_ (importer)
158  {
159  using Teuchos::Ptr;
160  using Teuchos::RCP;
161  using Teuchos::Array;
162  using Teuchos::ArrayView;
163  using Teuchos::Comm;
164  // typedef Tpetra::Import<local_ordinal_type, global_ordinal_type, node_type> import_type; // unused
165  NumLocalRows_ = inputMatrix_->getNodeNumRows();
166  NumGlobalRows_ = inputMatrix_->getGlobalNumRows();
167  NumGlobalNonzeros_ = inputMatrix_->getGlobalNumEntries();
168  IsParallel_ = inputMatrix_->getRowMap()->getComm()->getSize() != 1;
169  auto global_bcrs =
170  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(inputMatrix_);
171  hasBlockCrs_ = !global_bcrs.is_null();
172  if(hasBlockCrs_)
173  bcrsBlockSize_ = global_bcrs->getBlockSize();
174  else
175  bcrsBlockSize_ = 1;
176  setBlockSizes(partitions);
177  }
178 
191  const Teuchos::Array<local_ordinal_type>& localRows) :
192  inputMatrix_ (matrix),
193  numBlocks_ (1),
194  partitions_ (localRows.size()),
195  blockRows_ (1),
196  partitionIndices_ (1),
197  OverlapLevel_ (0),
198  DampingFactor_ (STS::one()),
199  Importer_ (Teuchos::null)
200  {
201  NumLocalRows_ = inputMatrix_->getNodeNumRows();
202  NumGlobalRows_ = inputMatrix_->getGlobalNumRows();
203  NumGlobalNonzeros_ = inputMatrix_->getGlobalNumEntries();
204  IsParallel_ = inputMatrix_->getRowMap()->getComm()->getSize() > 1;
205  blockRows_[0] = localRows.size();
206  partitionIndices_[0] = 0;
207  const block_crs_matrix_type* global_bcrs =
208  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(inputMatrix_).get();
209  hasBlockCrs_ = global_bcrs;
210  if(hasBlockCrs_)
211  bcrsBlockSize_ = global_bcrs->getBlockSize();
212  else
213  bcrsBlockSize_ = 1;
214  for(local_ordinal_type i = 0; i < localRows.size(); i++)
215  partitions_[i] = localRows[i];
216  }
217 
219  virtual ~Container() {};
220 
237  {
239  (&partitions_[partitionIndices_[blockIndex]], blockRows_[blockIndex]);
240  }
241 
250  virtual void initialize () = 0;
251 
254  {
255  //First, create a grand total of all the rows in all the blocks
256  //Note: If partitioner allowed overlap, this could be greater than the # of local rows
257  local_ordinal_type totalBlockRows = 0;
258  numBlocks_ = partitions.size();
261  for(int i = 0; i < numBlocks_; i++)
262  {
263  local_ordinal_type rowsInBlock = partitions[i].size();
264  blockRows_[i] = rowsInBlock;
265  partitionIndices_[i] = totalBlockRows;
266  totalBlockRows += rowsInBlock;
267  }
268  partitions_.resize(totalBlockRows);
269  //set partitions_: each entry is the partition/block of the row
270  local_ordinal_type iter = 0;
271  for(int i = 0; i < numBlocks_; i++)
272  {
273  for(int j = 0; j < blockRows_[i]; j++)
274  {
275  partitions_[iter++] = partitions[i][j];
276  }
277  }
278  }
279 
280  void getMatDiag() const
281  {
282  if(Diag_.is_null())
283  {
284  Diag_ = rcp(new vector_type(inputMatrix_->getDomainMap()));
285  inputMatrix_->getLocalDiagCopy(*Diag_);
286  }
287  }
288 
294  virtual void compute () = 0;
295 
296  void DoJacobi(HostView& X, HostView& Y, int stride) const;
297  void DoOverlappingJacobi(HostView& X, HostView& Y, HostView& W, int stride) const;
298  void DoGaussSeidel(HostView& X, HostView& Y, HostView& Y2, int stride) const;
299  void DoSGS(HostView& X, HostView& Y, HostView& Y2, int stride) const;
300 
302  virtual void setParameters (const Teuchos::ParameterList& List) = 0;
303 
305  virtual bool isInitialized () const = 0;
306 
308  virtual bool isComputed () const = 0;
309 
322  virtual void
323  apply (HostView& X,
324  HostView& Y,
325  int blockIndex,
326  int stride,
327  Teuchos::ETransp mode = Teuchos::NO_TRANS,
328  scalar_type alpha = Teuchos::ScalarTraits<scalar_type>::one(),
329  scalar_type beta = Teuchos::ScalarTraits<scalar_type>::zero()) const = 0;
330 
333  // The underlying container implements the splitting <tt>A = D + R</tt>. Only
334  // it can have efficient access to D and R, as these are constructed in the
335  // symbolic and numeric phases.
336  //
337  // This is the first performance-portable implementation of a block
338  // relaxation, and it is supported currently only by BlockTriDiContainer.
339  virtual void applyInverseJacobi (const mv_type& /* X */, mv_type& /* Y */,
340  bool /* zeroStartingSolution = false */,
341  int /* numSweeps = 1 */) const
342  { TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "Not implemented."); }
343 
345  void applyMV (mv_type& X, mv_type& Y) const
346  {
347  HostView XView = X.template getLocalView<Kokkos::HostSpace>();
348  HostView YView = Y.template getLocalView<Kokkos::HostSpace>();
349  this->apply (XView, YView, 0, X.getStride());
350  }
351 
372  virtual void
373  weightedApply (HostView& X,
374  HostView& Y,
375  HostView& W,
376  int blockIndex,
377  int stride,
378  Teuchos::ETransp mode = Teuchos::NO_TRANS,
379  scalar_type alpha = Teuchos::ScalarTraits<scalar_type>::one(),
380  scalar_type beta = Teuchos::ScalarTraits<scalar_type>::zero()) const = 0;
381 
383  void weightedApplyMV (mv_type& X,
384  mv_type& Y,
385  vector_type& W)
386  {
387  HostView XView = X.template getLocalView<Kokkos::HostSpace>();
388  HostView YView = Y.template getLocalView<Kokkos::HostSpace>();
389  HostView WView = W.template getLocalView<Kokkos::HostSpace>();
390  weightedApply (XView, YView, WView, 0, X.getStride());
391  }
392 
393  virtual void clearBlocks();
394 
396  virtual std::ostream& print (std::ostream& os) const = 0;
397 
400  static std::string getName()
401  {
402  return "Generic";
403  }
404 
405 protected:
408 
412  Teuchos::Array<local_ordinal_type> partitions_; //size: total # of local rows (in all local blocks)
424  scalar_type DampingFactor_;
428  local_ordinal_type NumLocalRows_;
430  global_ordinal_type NumGlobalRows_;
432  global_ordinal_type NumGlobalNonzeros_;
437 };
438 
440 template <class MatrixType>
441 inline std::ostream&
442 operator<< (std::ostream& os, const Ifpack2::Container<MatrixType>& obj)
443 {
444  return obj.print (os);
445 }
446 
447 template <class MatrixType>
448 void Container<MatrixType>::DoJacobi(HostView& X, HostView& Y, int stride) const
449 {
450  const scalar_type one = STS::one();
451  // Note: Flop counts copied naively from Ifpack.
452  // use partitions_ and blockRows_
453  size_t numVecs = X.extent(1);
454  // Non-overlapping Jacobi
455  for (local_ordinal_type i = 0; i < numBlocks_; i++)
456  {
457  // may happen that a partition is empty
458  if(blockRows_[i] != 1 || hasBlockCrs_)
459  {
460  if(blockRows_[i] == 0 )
461  continue;
462  apply(X, Y, i, stride, Teuchos::NO_TRANS, DampingFactor_, one);
463  }
464  else // singleton, can't access Containers_[i] as it was never filled and may be null.
465  {
466  local_ordinal_type LRID = partitions_[partitionIndices_[i]];
467  getMatDiag();
468  HostView diagView = Diag_->template getLocalView<Kokkos::HostSpace>();
469  impl_scalar_type d = (impl_scalar_type) one / diagView(LRID, 0);
470  for(size_t nv = 0; nv < numVecs; nv++)
471  {
472  impl_scalar_type x = X(LRID, nv);
473  Y(LRID, nv) = x * d;
474  }
475  }
476  }
477 }
478 
479 template <class MatrixType>
480 void Container<MatrixType>::DoOverlappingJacobi(HostView& X, HostView& Y, HostView& W, int stride) const
481 {
482  // Overlapping Jacobi
483  for(local_ordinal_type i = 0; i < numBlocks_; i++)
484  {
485  // may happen that a partition is empty
486  if(blockRows_[i] == 0)
487  continue;
488  if(blockRows_[i] != 1)
489  weightedApply(X, Y, W, i, stride, Teuchos::NO_TRANS, DampingFactor_, STS::one());
490  }
491 }
492 
493 template<class MatrixType>
494 void Container<MatrixType>::DoGaussSeidel(HostView& X, HostView& Y, HostView& Y2, int stride) const
495 {
496  using Teuchos::Array;
497  using Teuchos::ArrayRCP;
498  using Teuchos::ArrayView;
499  using Teuchos::Ptr;
500  using Teuchos::RCP;
501  using Teuchos::rcp;
502  using Teuchos::rcpFromRef;
503  // Note: Flop counts copied naively from Ifpack.
504  const scalar_type one = STS::one();
505  const size_t Length = inputMatrix_->getNodeMaxNumRowEntries();
506  auto numVecs = X.extent(1);
507  Array<scalar_type> Values;
508  Array<local_ordinal_type> Indices;
509  Indices.resize(Length);
510 
511  Values.resize(bcrsBlockSize_ * bcrsBlockSize_ * Length); //note: if A was not a BlockCRS, bcrsBlockSize_ is 1
512  // I think I decided I need two extra vectors:
513  // One to store the sum of the corrections (initialized to zero)
514  // One to store the temporary residual (doesn't matter if it is zeroed or not)
515  // My apologies for making the names clear and meaningful. (X=RHS, Y=guess?! Nice.)
516  HostView Resid("", X.extent(0), X.extent(1));
517  for(local_ordinal_type i = 0; i < numBlocks_; i++)
518  {
519  if(blockRows_[i] > 1 || hasBlockCrs_)
520  {
521  if (blockRows_[i] == 0)
522  continue;
523  // update from previous block
524  ArrayView<const local_ordinal_type> localRows = getLocalRows (i);
525  const size_t localNumRows = blockRows_[i];
526  for(size_t j = 0; j < localNumRows; j++)
527  {
528  const local_ordinal_type LID = localRows[j]; // Containers_[i]->ID (j);
529  size_t NumEntries;
530  inputMatrix_->getLocalRowCopy(LID, Indices(), Values(), NumEntries);
531  for(size_t m = 0; m < numVecs; m++)
532  {
533  if(hasBlockCrs_)
534  {
535  for (int localR = 0; localR < bcrsBlockSize_; localR++)
536  Resid(LID * bcrsBlockSize_ + localR, m) = X(LID * bcrsBlockSize_ + localR, m);
537  for (size_t k = 0; k < NumEntries; ++k)
538  {
539  const local_ordinal_type col = Indices[k];
540  for (int localR = 0; localR < bcrsBlockSize_; localR++)
541  {
542  for(int localC = 0; localC < bcrsBlockSize_; localC++)
543  {
544  Resid(LID * bcrsBlockSize_ + localR, m) -=
545  Values[k * bcrsBlockSize_ * bcrsBlockSize_ + localR + localC * bcrsBlockSize_]
546  * Y2(col * bcrsBlockSize_ + localC, m);
547  }
548  }
549  }
550  }
551  else
552  {
553  Resid(LID, m) = X(LID, m);
554  for (size_t k = 0; k < NumEntries; ++k)
555  {
556  const local_ordinal_type col = Indices[k];
557  Resid(LID, m) -= Values[k] * Y2(col, m);
558  }
559  }
560  }
561  }
562  // solve with this block
563  //
564  // Note: I'm abusing the ordering information, knowing that X/Y
565  // and Y2 have the same ordering for on-proc unknowns.
566  //
567  // Note: Add flop counts for inverse apply
568  apply(Resid, Y2, i, stride, Teuchos::NO_TRANS, DampingFactor_, one);
569  }
570  else if(blockRows_[i] == 1)
571  {
572  // singleton, can't access Containers_[i] as it was never filled and may be null.
573  // a singleton calculation is exact, all residuals should be zero.
574  local_ordinal_type LRID = partitionIndices_[i]; // by definition, a singleton 1 row in block.
575  getMatDiag();
576  HostView diagView = Diag_->template getLocalView<Kokkos::HostSpace>();
577  impl_scalar_type d = (impl_scalar_type) one / diagView(LRID, 0);
578  for(size_t m = 0; m < numVecs; m++)
579  {
580  impl_scalar_type x = X(LRID, m);
581  impl_scalar_type newy = x * d;
582  Y2(LRID, m) = newy;
583  }
584  } // end else
585  } // end for numBlocks_
586  if(IsParallel_)
587  {
588  auto numMyRows = inputMatrix_->getNodeNumRows();
589  for (size_t m = 0; m < numVecs; ++m)
590  {
591  for (size_t i = 0; i < numMyRows * bcrsBlockSize_; ++i)
592  {
593  Y(i, m) = Y2(i, m);
594  }
595  }
596  }
597 }
598 
599 template<class MatrixType>
600 void Container<MatrixType>::DoSGS(HostView& X, HostView& Y, HostView& Y2, int stride) const
601 {
602  using Teuchos::Array;
603  using Teuchos::ArrayRCP;
604  using Teuchos::ArrayView;
605  using Teuchos::Ptr;
606  using Teuchos::ptr;
607  using Teuchos::RCP;
608  using Teuchos::rcp;
609  using Teuchos::rcpFromRef;
610  const scalar_type one = STS::one();
611  auto numVecs = X.extent(1);
612  const size_t Length = inputMatrix_->getNodeMaxNumRowEntries();
613  Array<scalar_type> Values;
614  Array<local_ordinal_type> Indices(Length);
615  Values.resize(bcrsBlockSize_ * bcrsBlockSize_ * Length);
616  // I think I decided I need two extra vectors:
617  // One to store the sum of the corrections (initialized to zero)
618  // One to store the temporary residual (doesn't matter if it is zeroed or not)
619  // My apologies for making the names clear and meaningful. (X=RHS, Y=guess?! Nice.)
620  HostView Resid("", X.extent(0), X.extent(1));
621  // Forward Sweep
622  for(local_ordinal_type i = 0; i < numBlocks_; i++)
623  {
624  if(blockRows_[i] != 1 || hasBlockCrs_)
625  {
626  if(blockRows_[i] == 0)
627  continue; // Skip empty partitions
628  // update from previous block
629  ArrayView<const local_ordinal_type> localRows = getLocalRows(i);
630  for(local_ordinal_type j = 0; j < blockRows_[i]; j++)
631  {
632  const local_ordinal_type LID = localRows[j]; // Containers_[i]->ID (j);
633  size_t NumEntries;
634  inputMatrix_->getLocalRowCopy(LID, Indices(), Values(), NumEntries);
635  //set tmpresid = initresid - A*correction
636  for(size_t m = 0; m < numVecs; m++)
637  {
638  if(hasBlockCrs_)
639  {
640  for(int localR = 0; localR < bcrsBlockSize_; localR++)
641  Resid(LID * bcrsBlockSize_ + localR, m) = X(LID * bcrsBlockSize_ + localR, m);
642  for(size_t k = 0; k < NumEntries; ++k)
643  {
644  const local_ordinal_type col = Indices[k];
645  for (int localR = 0; localR < bcrsBlockSize_; localR++)
646  {
647  for(int localC = 0; localC < bcrsBlockSize_; localC++)
648  Resid(LID * bcrsBlockSize_ + localR, m) -=
649  Values[k * (bcrsBlockSize_ * bcrsBlockSize_) + (localR + localC * bcrsBlockSize_)]
650  * Y2(col * bcrsBlockSize_ + localC, m);
651  }
652  }
653  }
654  else
655  {
656  Resid(LID, m) = X(LID, m);
657  for(size_t k = 0; k < NumEntries; k++)
658  {
659  local_ordinal_type col = Indices[k];
660  Resid(LID, m) -= Values[k] * Y2(col, m);
661  }
662  }
663  }
664  }
665  // solve with this block
666  //
667  // Note: I'm abusing the ordering information, knowing that X/Y
668  // and Y2 have the same ordering for on-proc unknowns.
669  //
670  // Note: Add flop counts for inverse apply
671  apply(Resid, Y2, i, stride, Teuchos::NO_TRANS, DampingFactor_, one);
672  // operations for all getrow's
673  }
674  else // singleton, can't access Containers_[i] as it was never filled and may be null.
675  {
676  local_ordinal_type LRID = partitions_[partitionIndices_[i]];
677  getMatDiag();
678  HostView diagView = Diag_->template getLocalView<Kokkos::HostSpace>();
679  impl_scalar_type d = (impl_scalar_type) one / diagView(LRID, 0);
680  for(size_t m = 0; m < numVecs; m++)
681  {
682  impl_scalar_type x = X(LRID, m);
683  impl_scalar_type newy = x * d;
684  Y2(LRID, m) = newy;
685  }
686  } // end else
687  } // end forward sweep over NumLocalBlocks
688  // Reverse Sweep
689  //
690  // mfh 12 July 2013: The unusual iteration bounds, and the use of
691  // i-1 rather than i in the loop body, ensure correctness even if
692  // local_ordinal_type is unsigned. "i = numBlocks_-1; i >= 0;
693  // i--" will loop forever if local_ordinal_type is unsigned, because
694  // unsigned integers are (trivially) always nonnegative.
695  for(local_ordinal_type i = numBlocks_; i > 0; --i)
696  {
697  if(hasBlockCrs_ || blockRows_[i-1] != 1)
698  {
699  if(blockRows_[i - 1] == 0)
700  continue;
701  // update from previous block
702  ArrayView<const local_ordinal_type> localRows = getLocalRows(i-1);
703  for(local_ordinal_type j = 0; j < blockRows_[i-1]; j++)
704  {
705  const local_ordinal_type LID = localRows[j]; // Containers_[i-1]->ID (j);
706  size_t NumEntries;
707  inputMatrix_->getLocalRowCopy(LID, Indices(), Values(), NumEntries);
708  //set tmpresid = initresid - A*correction
709  for (size_t m = 0; m < numVecs; m++)
710  {
711  if(hasBlockCrs_)
712  {
713  for(int localR = 0; localR < bcrsBlockSize_; localR++)
714  Resid(LID * bcrsBlockSize_ + localR, m) = X(LID * bcrsBlockSize_ + localR, m);
715  for(size_t k = 0; k < NumEntries; ++k)
716  {
717  const local_ordinal_type col = Indices[k];
718  for(int localR = 0; localR < bcrsBlockSize_; localR++)
719  {
720  for(int localC = 0; localC < bcrsBlockSize_; localC++)
721  Resid(LID*bcrsBlockSize_+localR, m) -=
722  Values[k * (bcrsBlockSize_ * bcrsBlockSize_) + (localR + localC * bcrsBlockSize_)]
723  * Y2(col * bcrsBlockSize_ + localC, m);
724  }
725  }
726  }
727  else
728  {
729  Resid(LID, m) = X(LID, m);
730  for(size_t k = 0; k < NumEntries; ++k)
731  {
732  local_ordinal_type col = Indices[k];
733  Resid(LID, m) -= Values[k] * Y2(col, m);
734  }
735  }
736  }
737  }
738  // solve with this block
739  //
740  // Note: I'm abusing the ordering information, knowing that X/Y
741  // and Y2 have the same ordering for on-proc unknowns.
742  //
743  // Note: Add flop counts for inverse apply
744  apply(Resid, Y2, i - 1, stride, Teuchos::NO_TRANS, DampingFactor_, one);
745  // operations for all getrow's
746  } // end Partitioner_->numRowsInPart (i) != 1 )
747  // else do nothing, as by definition with a singleton, the residuals are zero.
748  } //end reverse sweep
749  if(IsParallel_)
750  {
751  auto numMyRows = inputMatrix_->getNodeNumRows();
752  for (size_t m = 0; m < numVecs; ++m)
753  {
754  for (size_t i = 0; i < numMyRows * bcrsBlockSize_; ++i)
755  {
756  Y(i, m) = Y2(i, m);
757  }
758  }
759  }
760 }
761 
762 template<class MatrixType>
763 void Container<MatrixType>::clearBlocks()
764 {
765  numBlocks_ = 0;
766  partitions_.clear();
767  blockRows_.clear();
768  partitionIndices_.clear();
769  Diag_ = Teuchos::null; //Diag_ will be recreated if needed
770 }
771 
772 } // namespace Ifpack2
773 
774 namespace Teuchos {
775 
780 template<class MatrixType>
781 class TEUCHOSCORE_LIB_DLL_EXPORT TypeNameTraits< ::Ifpack2::Container<MatrixType> >
782 {
783  public:
784  static std::string name () {
785  return std::string ("Ifpack2::Container<") +
787  }
788 
789  static std::string concreteName (const ::Ifpack2::Container<MatrixType>&) {
790  return name ();
791  }
792 };
793 
794 } // namespace Teuchos
795 
796 #endif // IFPACK2_CONTAINER_HPP
int OverlapLevel_
Number of rows of overlap for adjacent blocks.
Definition: Ifpack2_Container.hpp:422
global_ordinal_type NumGlobalNonzeros_
Number of nonzeros in input matrix.
Definition: Ifpack2_Container.hpp:432
virtual void initialize()=0
Do all set-up operations that only require matrix structure.
int numBlocks_
The number of blocks (partitions) in the container.
Definition: Ifpack2_Container.hpp:410
Teuchos::Array< local_ordinal_type > blockRows_
Number of rows in each block.
Definition: Ifpack2_Container.hpp:414
static std::string getName()
Definition: Ifpack2_Container.hpp:400
global_ordinal_type NumGlobalRows_
Number of global rows in input matrix.
Definition: Ifpack2_Container.hpp:430
bool hasBlockCrs_
Whether the input matrix is a BlockCRS matrix.
Definition: Ifpack2_Container.hpp:434
scalar_type DampingFactor_
Damping factor, passed to apply() as alpha.
Definition: Ifpack2_Container.hpp:424
bool is_null() const
Ifpack2::Partitioner:
Definition: Ifpack2_Partitioner.hpp:179
int bcrsBlockSize_
If hasBlockCrs_, the number of DOFs per vertex. Otherwise 1.
Definition: Ifpack2_Container.hpp:436
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
virtual bool isComputed() const =0
Return true if the container has been successfully computed.
local_ordinal_type NumLocalRows_
Number of local rows in input matrix.
Definition: Ifpack2_Container.hpp:428
virtual void weightedApply(HostView &X, HostView &Y, HostView &W, int blockIndex, int stride, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const =0
Compute Y := alpha * diag(D) * M^{-1} (diag(D) * X) + beta*Y.
Teuchos::RCP< const row_matrix_type > inputMatrix_
The input matrix to the constructor.
Definition: Ifpack2_Container.hpp:407
size_type size() const
Teuchos::RCP< vector_type > Diag_
Diagonal elements.
Definition: Ifpack2_Container.hpp:418
void resize(size_type new_size, const value_type &x=value_type())
Container(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< local_ordinal_type > > &partitions, const Teuchos::RCP< const import_type > &importer, int OverlapLevel, scalar_type DampingFactor)
Constructor.
Definition: Ifpack2_Container.hpp:149
Teuchos::RCP< const Tpetra::Import< local_ordinal_type, global_ordinal_type, node_type > > Importer_
Importer for importing off-process elements of MultiVectors.
Definition: Ifpack2_Container.hpp:426
bool IsParallel_
Whether the problem is distributed across multiple MPI processes.
Definition: Ifpack2_Container.hpp:420
void weightedApplyMV(mv_type &X, mv_type &Y, vector_type &W)
Wrapper for weightedApply with MVs, used in unit tests (never called by BlockRelaxation) ...
Definition: Ifpack2_Container.hpp:383
Teuchos::ArrayView< const local_ordinal_type > getLocalRows(int blockIndex) const
Local indices of the rows of the input matrix that belong to this block.
Definition: Ifpack2_Container.hpp:236
Teuchos::Array< local_ordinal_type > partitionIndices_
Starting index in partitions_ of local row indices for each block.
Definition: Ifpack2_Container.hpp:416
virtual void compute()=0
Extract the local diagonal block and prepare the solver.
Interface for creating and solving a local linear problem.
Definition: Ifpack2_Container.hpp:114
virtual void applyInverseJacobi(const mv_type &, mv_type &, bool, int) const
Compute Y := (1 - a) Y + a D^{-1} (X - R*Y).
Definition: Ifpack2_Container.hpp:339
virtual void apply(HostView &X, HostView &Y, int blockIndex, int stride, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const =0
Compute Y := alpha * M^{-1} X + beta*Y.
virtual void setParameters(const Teuchos::ParameterList &List)=0
Set parameters.
virtual std::ostream & print(std::ostream &os) const =0
Print basic information about the container to os.
virtual ~Container()
Destructor.
Definition: Ifpack2_Container.hpp:219
Teuchos::Array< local_ordinal_type > partitions_
Local indices of the rows of the input matrix that belong to this block.
Definition: Ifpack2_Container.hpp:412
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
virtual bool isInitialized() const =0
Return true if the container has been successfully initialized.
void setBlockSizes(const Teuchos::Array< Teuchos::Array< local_ordinal_type > > &partitions)
Initialize arrays with information about block sizes.
Definition: Ifpack2_Container.hpp:253
Kokkos::Details::ArithTraits< scalar_type >::val_type impl_scalar_type
Internal representation of Scalar in Kokkos::View.
Definition: Ifpack2_Container.hpp:132
Container(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< local_ordinal_type > &localRows)
Constructor for single block.
Definition: Ifpack2_Container.hpp:190
void applyMV(mv_type &X, mv_type &Y) const
Wrapper for apply with MVs, used in unit tests (never called by BlockRelaxation)
Definition: Ifpack2_Container.hpp:345
static std::string name()