43 #ifndef IFPACK2_BANDEDCONTAINER_DEF_HPP 44 #define IFPACK2_BANDEDCONTAINER_DEF_HPP 46 #include "Teuchos_LAPACK.hpp" 47 #include "Tpetra_CrsMatrix.hpp" 53 # include "Teuchos_DefaultMpiComm.hpp" 55 # include "Teuchos_DefaultSerialComm.hpp" 60 template<
class MatrixType,
class LocalScalarType>
63 const Teuchos::Array<Teuchos::Array<LO> >& partitions,
64 const Teuchos::RCP<const import_type>&,
66 ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed),
67 ipiv_(this->blockRows_.size() * this->scalarsPerRow_),
68 kl_(this->numBlocks_, -1),
69 ku_(this->numBlocks_, -1),
70 scalarOffsets_(this->numBlocks_)
72 TEUCHOS_TEST_FOR_EXCEPTION(
73 ! matrix->hasColMap (), std::invalid_argument,
"Ifpack2::BandedContainer: " 74 "The constructor's input matrix must have a column Map.");
77 template<
class MatrixType,
class LocalScalarType>
81 template<
class MatrixType,
class LocalScalarType>
85 if(List.isParameter(
"relaxation: banded container superdiagonals"))
87 int ku = List.get<
int>(
"relaxation: banded container superdiagonals");
88 for(LO b = 0; b < this->numBlocks_; b++)
91 if(List.isParameter(
"relaxation: banded container subdiagonals"))
93 int kl = List.get<
int>(
"relaxation: banded container subdiagonals");
94 for(LO b = 0; b < this->numBlocks_; b++)
99 template<
class MatrixType,
class LocalScalarType>
103 using Teuchos::Array;
104 using Teuchos::ArrayView;
106 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
107 size_t colToOffsetSize = this->inputMatrix_->getNodeNumCols();
108 if(this->pointIndexed_)
109 colToOffsetSize *= this->bcrsBlockSize_;
110 Array<LO> colToBlockOffset(colToOffsetSize, INVALID);
113 for(
int i = 0; i < this->numBlocks_; i++)
118 if(this->scalarsPerRow_ > 1)
121 LO blockStart = this->blockOffsets_[i];
122 LO blockEnd = (i == this->numBlocks_ - 1) ? this->blockRows_.size() : this->blockOffsets_[i + 1];
123 ArrayView<const LO> blockRows = this->getBlockRows(i);
127 for(
size_t j = 0; j < size_t(blockRows.size()); j++)
129 LO localCol = this->translateRowToCol(blockRows[j]);
130 colToBlockOffset[localCol] = blockStart + j;
132 for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
138 LO inputRow = this->blockRows_[blockStart + blockRow];
139 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values, numEntries);
140 for(LO k = 0; k < numEntries; k++)
142 LO colOffset = colToBlockOffset[indices[k]];
143 if(blockStart <= colOffset && colOffset < blockEnd)
148 LO blockCol = colOffset - blockStart;
149 for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
151 for(LO br = 0; br < this->bcrsBlockSize_; br++)
153 LO r = this->bcrsBlockSize_ * blockRow + br;
154 LO c = this->bcrsBlockSize_ * blockCol + bc;
168 LO blockStart = this->blockOffsets_[i];
169 LO blockEnd = (i == this->numBlocks_ - 1) ? this->blockRows_.size() : this->blockOffsets_[i + 1];
170 ArrayView<const LO> blockRows = this->getBlockRows(i);
174 for(
size_t j = 0; j < size_t(blockRows.size()); j++)
177 LO localCol = this->translateRowToCol(blockRows[j]);
178 colToBlockOffset[localCol] = blockStart + j;
180 for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
183 LO inputSplitRow = this->blockRows_[blockStart + blockRow];
184 auto rowView = this->getInputRowView(inputSplitRow);
185 for(
size_t k = 0; k < rowView.size(); k++)
187 LO colOffset = colToBlockOffset[rowView.ind(k)];
188 if(blockStart <= colOffset && colOffset < blockEnd)
190 LO blockCol = colOffset - blockStart;
191 maxSub = std::max(maxSub, blockRow - blockCol);
192 maxSuper = std::max(maxSuper, blockCol - blockRow);
202 template<
class MatrixType,
class LocalScalarType>
214 for(LO b = 0; b < this->numBlocks_; b++)
216 LO stride = 2 * kl_[b] + ku_[b] + 1;
217 scalarOffsets_[b] = totalScalars;
218 totalScalars += stride * this->blockSizes_[b] * this->scalarsPerRow_;
220 scalars_.resize(totalScalars);
221 for(
int b = 0; b < this->numBlocks_; b++)
225 LO nrows = this->blockSizes_[b] * this->scalarsPerRow_;
226 diagBlocks_.emplace_back(Teuchos::View, scalars_.data() + scalarOffsets_[b], 2 * kl_[b] + ku_[b] + 1, nrows, nrows, kl_[b], kl_[b] + ku_[b]);
227 diagBlocks_[b].putScalar(Teuchos::ScalarTraits<LSC>::zero());
229 std::fill (ipiv_.begin (), ipiv_.end (), 0);
232 this->IsComputed_ =
false;
233 this->IsInitialized_ =
true;
236 template<
class MatrixType,
class LocalScalarType>
241 TEUCHOS_TEST_FOR_EXCEPTION(
242 ipiv_.size () != this->blockRows_.size() * this->scalarsPerRow_, std::logic_error,
243 "Ifpack2::BandedContainer::compute: ipiv_ array has the wrong size. " 244 "Please report this bug to the Ifpack2 developers.");
246 this->IsComputed_ =
false;
247 if (!this->isInitialized ()) {
254 this->IsComputed_ =
true;
257 template<
class MatrixType,
class LocalScalarType>
260 using Teuchos::Array;
261 using Teuchos::ArrayView;
262 using STS = Teuchos::ScalarTraits<SC>;
263 SC zero = STS::zero();
264 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
271 if(this->scalarsPerRow_ > 1)
273 Array<LO> colToBlockOffset(this->inputBlockMatrix_->getNodeNumCols(), INVALID);
274 for(
int i = 0; i < this->numBlocks_; i++)
277 LO blockStart = this->blockOffsets_[i];
278 LO blockEnd = blockStart + this->blockSizes_[i];
279 ArrayView<const LO> blockRows = this->getBlockRows(i);
283 for(
size_t j = 0; j < size_t(blockRows.size()); j++)
285 LO localCol = this->translateRowToCol(blockRows[j]);
286 colToBlockOffset[localCol] = blockStart + j;
288 for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
294 LO inputRow = this->blockRows_[blockStart + blockRow];
295 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values, numEntries);
296 for(LO k = 0; k < numEntries; k++)
298 LO colOffset = colToBlockOffset[indices[k]];
299 if(blockStart <= colOffset && colOffset < blockEnd)
304 LO blockCol = colOffset - blockStart;
305 for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
307 for(LO br = 0; br < this->bcrsBlockSize_; br++)
309 LO r = this->bcrsBlockSize_ * blockRow + br;
310 LO c = this->bcrsBlockSize_ * blockCol + bc;
311 auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
313 diagBlocks_[i](r, c) = val;
325 Array<LO> colToBlockOffset(this->inputMatrix_->getNodeNumCols() * this->bcrsBlockSize_, INVALID);
326 for(
int i = 0; i < this->numBlocks_; i++)
329 LO blockStart = this->blockOffsets_[i];
330 LO blockEnd = blockStart + this->blockSizes_[i];
331 ArrayView<const LO> blockRows = this->getBlockRows(i);
335 for(
size_t j = 0; j < size_t(blockRows.size()); j++)
338 LO localCol = this->translateRowToCol(blockRows[j]);
339 colToBlockOffset[localCol] = blockStart + j;
341 for(
size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++)
344 LO inputPointRow = this->blockRows_[blockStart + blockRow];
345 auto rowView = this->getInputRowView(inputPointRow);
346 for(
size_t k = 0; k < rowView.size(); k++)
348 LO colOffset = colToBlockOffset[rowView.ind(k)];
349 if(blockStart <= colOffset && colOffset < blockEnd)
351 LO blockCol = colOffset - blockStart;
352 auto val = rowView.val(k);
354 diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
362 template<
class MatrixType,
class LocalScalarType>
364 BandedContainer<MatrixType, LocalScalarType>::
369 ContainerImpl<MatrixType, LocalScalarType>::clearBlocks();
372 template<
class MatrixType,
class LocalScalarType>
374 BandedContainer<MatrixType, LocalScalarType>::
377 Teuchos::LAPACK<int, LSC> lapack;
380 for(
int i = 0; i < this->numBlocks_; i++)
383 TEUCHOS_TEST_FOR_EXCEPTION(diagBlocks_[i].values()==0, std::invalid_argument,
384 "BandedContainer<T>::factor: Diagonal block is an empty SerialBandDenseMatrix<T>!");
385 TEUCHOS_TEST_FOR_EXCEPTION(diagBlocks_[i].upperBandwidth() < diagBlocks_[i].lowerBandwidth(), std::invalid_argument,
386 "BandedContainer<T>::factor: Diagonal block needs kl additional superdiagonals for factorization! However, the number of superdiagonals is smaller than the number of subdiagonals!");
387 int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
388 lapack.GBTRF (diagBlocks_[i].numRows(),
389 diagBlocks_[i].numCols(),
390 diagBlocks_[i].lowerBandwidth(),
391 diagBlocks_[i].upperBandwidth() - diagBlocks_[i].lowerBandwidth(),
392 diagBlocks_[i].values(),
393 diagBlocks_[i].stride(),
398 TEUCHOS_TEST_FOR_EXCEPTION(
399 INFO < 0, std::logic_error,
"Ifpack2::BandedContainer::factor: " 400 "LAPACK's _GBTRF (LU factorization with partial pivoting) was called " 401 "incorrectly. INFO = " << INFO <<
" < 0. " 402 "Please report this bug to the Ifpack2 developers.");
406 TEUCHOS_TEST_FOR_EXCEPTION(
407 INFO > 0, std::runtime_error,
"Ifpack2::BandedContainer::factor: " 408 "LAPACK's _GBTRF (LU factorization with partial pivoting) reports that the " 409 "computed U factor is exactly singular. U(" << INFO <<
"," << INFO <<
") " 410 "(one-based index i) is exactly zero. This probably means that the input " 411 "matrix has a singular diagonal block.");
415 template<
class MatrixType,
class LocalScalarType>
417 BandedContainer<MatrixType, LocalScalarType>::
418 solveBlock(HostSubviewLocal X,
421 Teuchos::ETransp mode,
423 const LSC beta)
const 425 #ifdef HAVE_IFPACK2_DEBUG 426 TEUCHOS_TEST_FOR_EXCEPTION(
427 X.extent (0) != Y.extent (0),
428 std::logic_error,
"Ifpack2::BandedContainer::solveBlock: X and Y have " 429 "incompatible dimensions (" << X.extent (0) <<
" resp. " 430 << Y.extent (0) <<
"). Please report this bug to " 431 "the Ifpack2 developers.");
432 TEUCHOS_TEST_FOR_EXCEPTION(
433 X.extent (0) !=
static_cast<size_t> (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numCols() : diagBlocks_[blockIndex].numRows()),
434 std::logic_error,
"Ifpack2::BandedContainer::solveBlock: The input " 435 "multivector X has incompatible dimensions from those of the " 436 "inverse operator (" << X.extent (0) <<
" vs. " 437 << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numCols() : diagBlocks_[blockIndex].numRows())
438 <<
"). Please report this bug to the Ifpack2 developers.");
439 TEUCHOS_TEST_FOR_EXCEPTION(
440 Y.extent (0) !=
static_cast<size_t> (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRows() : diagBlocks_[blockIndex].numCols()),
441 std::logic_error,
"Ifpack2::BandedContainer::solveBlock: The output " 442 "multivector Y has incompatible dimensions from those of the " 443 "inverse operator (" << Y.extent (0) <<
" vs. " 444 << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRows() : diagBlocks_[blockIndex].numCols())
445 <<
"). Please report this bug to the Ifpack2 developers.");
448 size_t numRows = X.extent (0);
449 size_t numVecs = X.extent (1);
451 LSC zero = Teuchos::ScalarTraits<LSC>::zero ();
457 for(
size_t j = 0; j < numVecs; j++)
458 for(
size_t i = 0; i < numRows; i++)
462 for(
size_t j = 0; j < numVecs; j++)
463 for(
size_t i = 0; i < numRows; i++)
464 Y(i, j) = beta * Y(i, j);
468 Teuchos::LAPACK<int, LSC> lapack;
472 std::vector<LSC> yTemp(numVecs * numRows);
473 for(
size_t j = 0; j < numVecs; j++)
475 for(
size_t i = 0; i < numRows; i++)
476 yTemp[j * numRows + i] = X(i, j);
480 const char trans = (mode == Teuchos::CONJ_TRANS ?
'C' : (mode == Teuchos::TRANS ?
'T' :
'N'));
482 const int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
485 diagBlocks_[blockIndex].numCols(),
486 diagBlocks_[blockIndex].lowerBandwidth(),
488 diagBlocks_[blockIndex].upperBandwidth() - diagBlocks_[blockIndex].lowerBandwidth(),
490 diagBlocks_[blockIndex].values(),
491 diagBlocks_[blockIndex].stride(),
498 for(
size_t j = 0; j < numVecs; j++)
500 for(
size_t i = 0; i < numRows; i++)
502 Y(i, j) *= ISC(beta);
503 Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
508 for(
size_t j = 0; j < numVecs; j++)
510 for(
size_t i = 0; i < numRows; i++)
511 Y(i, j) = ISC(yTemp[j * numRows + i]);
515 TEUCHOS_TEST_FOR_EXCEPTION(
516 INFO != 0, std::runtime_error,
"Ifpack2::BandedContainer::solveBlock: " 517 "LAPACK's _GBTRS (solve using LU factorization with partial pivoting) " 518 "failed with INFO = " << INFO <<
" != 0.");
522 template<
class MatrixType,
class LocalScalarType>
527 Teuchos::FancyOStream fos (Teuchos::rcpFromRef (os));
528 fos.setOutputToRootOnly (0);
533 template<
class MatrixType,
class LocalScalarType>
538 std::ostringstream oss;
539 oss << Teuchos::Describable::description();
540 if (this->isInitialized()) {
541 if (this->isComputed()) {
542 oss <<
"{status = initialized, computed";
545 oss <<
"{status = initialized, not computed";
549 oss <<
"{status = not initialized, not computed";
555 template<
class MatrixType,
class LocalScalarType>
559 const Teuchos::EVerbosityLevel verbLevel)
const 561 if(verbLevel==Teuchos::VERB_NONE)
return;
562 os <<
"================================================================================" << std::endl;
563 os <<
"Ifpack2::BandedContainer" << std::endl;
564 for(
int i = 0; i < this->numBlocks_; i++)
566 os <<
"Block " << i <<
": Number of rows = " << this->blockSizes_[i] << std::endl;
567 os <<
"Block " << i <<
": Number of subdiagonals = " << diagBlocks_[i].lowerBandwidth() << std::endl;
568 os <<
"Block " << i <<
": Number of superdiagonals = " << diagBlocks_[i].upperBandwidth() << std::endl;
570 os <<
"isInitialized() = " << this->IsInitialized_ << std::endl;
571 os <<
"isComputed() = " << this->IsComputed_ << std::endl;
572 os <<
"================================================================================" << std::endl;
576 template<
class MatrixType,
class LocalScalarType>
584 #define IFPACK2_BANDEDCONTAINER_INSTANT(S,LO,GO,N) \ 585 template class Ifpack2::BandedContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >; 587 #endif // IFPACK2_BANDEDCONTAINER_HPP virtual void initialize() override
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_BandedContainer_def.hpp:205
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS). This class allows a custom scalar type (LocalScalarType) to be used for storing blocks and solving the block systems. Hiding this template parameter from the Container interface simplifies the BlockRelaxation and ContainerFactory classes.
Definition: Ifpack2_Container_decl.hpp:342
virtual void compute() override
Initialize and compute each block.
Definition: Ifpack2_BandedContainer_def.hpp:239
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_BandedContainer_def.hpp:577
virtual std::ostream & print(std::ostream &os) const override
Print information about this object to the given output stream.
Definition: Ifpack2_BandedContainer_def.hpp:525
virtual ~BandedContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_BandedContainer_def.hpp:79
void computeBandwidth()
Definition: Ifpack2_BandedContainer_def.hpp:101
BandedContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &, bool pointIndexed)
Constructor.
Definition: Ifpack2_BandedContainer_def.hpp:62
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_BandedContainer_def.hpp:558
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:73
virtual void setParameters(const Teuchos::ParameterList &List) override
Set all necessary parameters (super- and sub-diagonal bandwidth)
Definition: Ifpack2_BandedContainer_def.hpp:83
Store and solve a local Banded linear problem.
Definition: Ifpack2_BandedContainer_decl.hpp:102
virtual std::string description() const override
A one-line description of this object.
Definition: Ifpack2_BandedContainer_def.hpp:536