Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Superlu_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Amesos2: Templated Direct Sparse Solver Package
6 // Copyright 2011 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ***********************************************************************
41 //
42 // @HEADER
43 
53 #ifndef AMESOS2_SUPERLU_DEF_HPP
54 #define AMESOS2_SUPERLU_DEF_HPP
55 
56 #include <Teuchos_Tuple.hpp>
57 #include <Teuchos_ParameterList.hpp>
58 #include <Teuchos_StandardParameterEntryValidators.hpp>
59 
61 #include "Amesos2_Superlu_decl.hpp"
62 
63 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
64 // TODO: This 'using namespace SLU' is not a good thing.
65 // It was added because kernels does not namespace SuperLU but Amesos2 does.
66 // Declaring the namespace SLU allows us to reuse that file without duplication.
67 // We need to determine a uniform system to avoid this this but for now, at least
68 // this issue is only present when TRSV is enabled.
69 using namespace SLU;
70 #include "KokkosSparse_sptrsv_superlu.hpp"
71 #endif
72 
73 namespace Amesos2 {
74 
75 
76 template <class Matrix, class Vector>
78  Teuchos::RCP<const Matrix> A,
79  Teuchos::RCP<Vector> X,
80  Teuchos::RCP<const Vector> B )
81  : SolverCore<Amesos2::Superlu,Matrix,Vector>(A, X, B)
82  , is_contiguous_(true) // default is set by params
83  , use_triangular_solves_(false) // default is set by params
84  , use_metis_(false)
85  , symmetrize_metis_(true)
86 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
87  , sptrsv_invert_diag_(true)
88  , sptrsv_invert_offdiag_ (false)
89  , sptrsv_u_in_csr_ (true)
90  , sptrsv_merge_supernodes_ (false)
91  , sptrsv_use_spmv_ (false)
92 #endif
93 {
94  // ilu_set_default_options is called later in set parameter list if required.
95  // This is not the ideal way, but the other option to always call
96  // ilu_set_default_options here and assuming it won't have any side effect
97  // in the TPL is more dangerous. It is not a good idea to rely on external
98  // libraries' internal "features".
99  SLU::set_default_options(&(data_.options));
100  // Override some default options
101  data_.options.PrintStat = SLU::NO;
102 
103  SLU::StatInit(&(data_.stat));
104 
105  Kokkos::resize(data_.perm_r, this->globalNumRows_);
106  Kokkos::resize(data_.perm_c, this->globalNumCols_);
107  Kokkos::resize(data_.etree, this->globalNumCols_);
108  Kokkos::resize(data_.R, this->globalNumRows_);
109  Kokkos::resize(data_.C, this->globalNumCols_);
110 
111  data_.relax = SLU::sp_ienv(2); // Query optimal relax param from superlu
112  data_.panel_size = SLU::sp_ienv(1); // Query optimal panel size
113 
114  data_.equed = 'N'; // No equilibration
115  data_.rowequ = false;
116  data_.colequ = false;
117  data_.A.Store = NULL;
118  data_.L.Store = NULL;
119  data_.U.Store = NULL;
120  data_.X.Store = NULL;
121  data_.B.Store = NULL;
122 
123  ILU_Flag_=false; // default: turn off ILU
124 }
125 
126 
127 template <class Matrix, class Vector>
129 {
130  /* Free Superlu data_types
131  * - Matrices
132  * - Vectors
133  * - Stat object
134  */
135  SLU::StatFree( &(data_.stat) ) ;
136 
137  // Storage is initialized in numericFactorization_impl()
138  if ( data_.A.Store != NULL ){
139  SLU::Destroy_SuperMatrix_Store( &(data_.A) );
140  }
141 
142  // only root allocated these SuperMatrices.
143  if ( data_.L.Store != NULL ){ // will only be true for this->root_
144  SLU::Destroy_SuperNode_Matrix( &(data_.L) );
145  SLU::Destroy_CompCol_Matrix( &(data_.U) );
146  }
147 }
148 
149 template <class Matrix, class Vector>
150 std::string
152 {
153  std::ostringstream oss;
154  oss << "SuperLU solver interface";
155  if (ILU_Flag_) {
156  oss << ", \"ILUTP\" : {";
157  oss << "drop tol = " << data_.options.ILU_DropTol;
158  oss << ", fill factor = " << data_.options.ILU_FillFactor;
159  oss << ", fill tol = " << data_.options.ILU_FillTol;
160  switch(data_.options.ILU_MILU) {
161  case SLU::SMILU_1 :
162  oss << ", MILU 1";
163  break;
164  case SLU::SMILU_2 :
165  oss << ", MILU 2";
166  break;
167  case SLU::SMILU_3 :
168  oss << ", MILU 3";
169  break;
170  case SLU::SILU :
171  default:
172  oss << ", regular ILU";
173  }
174  switch(data_.options.ILU_Norm) {
175  case SLU::ONE_NORM :
176  oss << ", 1-norm";
177  break;
178  case SLU::TWO_NORM :
179  oss << ", 2-norm";
180  break;
181  case SLU::INF_NORM :
182  default:
183  oss << ", infinity-norm";
184  }
185  oss << "}";
186  } else {
187  oss << ", direct solve";
188  }
189  return oss.str();
190  /*
191 
192  // ILU parameters
193  if( parameterList->isParameter("RowPerm") ){
194  RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
195  parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
196  data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
197  }
198 
199 
200  */
201 }
202 
203 template<class Matrix, class Vector>
204 int
206 {
207  /*
208  * Get column permutation vector perm_c[], according to permc_spec:
209  * permc_spec = NATURAL: natural ordering
210  * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
211  * permc_spec = MMD_ATA: minimum degree on structure of A'*A
212  * permc_spec = COLAMD: approximate minimum degree column ordering
213  * permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
214  */
215  int permc_spec = data_.options.ColPerm;
216  if ( permc_spec != SLU::MY_PERMC && this->root_ ){
217 #ifdef HAVE_AMESOS2_TIMERS
218  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
219 #endif
220 
221  SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
222  }
223 
224  return(0);
225 }
226 
227 
228 template <class Matrix, class Vector>
229 int
231 {
232  /*
233  * SuperLU performs symbolic factorization and numeric factorization
234  * together, but does leave some options for reusing symbolic
235  * structures that have been created on previous factorizations. If
236  * our Amesos2 user calls this function, that is an indication that
237  * the symbolic structure of the matrix is no longer valid, and
238  * SuperLU should do the factorization from scratch.
239  *
240  * This can be accomplished by setting the options.Fact flag to
241  * DOFACT, as well as setting our own internal flag to false.
242  */
243  same_symbolic_ = false;
244  data_.options.Fact = SLU::DOFACT;
245 
246  if (use_metis_) {
247 #ifdef HAVE_AMESOS2_TIMERS
248  Teuchos::RCP< Teuchos::Time > MetisTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for METIS");
249  Teuchos::TimeMonitor numFactTimer(*MetisTimer_);
250 #endif
251  size_t n = this->globalNumRows_;
252  size_t nz = this->globalNumNonZeros_;
253  host_size_type_array host_rowind_view_ = host_rows_view_;
254  host_ordinal_type_array host_colptr_view_ = host_col_ptr_view_;
255 
256  /* symmetrize the matrix (A + A') if needed */
257  if (symmetrize_metis_) {
258  int new_nz = 0;
259  int *new_col_ptr;
260  int *new_row_ind;
261 
262  // NOTE: both size_type and ordinal_type are defined as int
263  // Consider using "symmetrize_graph" in KokkosKernels, if this becomes significant in time.
264  SLU::at_plus_a(n, nz, host_col_ptr_view_.data(), host_rows_view_.data(),
265  &new_nz, &new_col_ptr, &new_row_ind);
266 
267  // reallocate (so that we won't overwrite the original)
268  // and copy for output
269  nz = new_nz;
270  Kokkos::resize (host_rowind_view_, nz);
271  Kokkos::realloc(host_colptr_view_, n+1);
272  for (size_t i = 0; i <= n; i++) {
273  host_colptr_view_(i) = new_col_ptr[i];
274  }
275  for (size_t i = 0; i < nz; i++) {
276  host_rowind_view_(i) = new_row_ind[i];
277  }
278 
279  // free
280  SLU::SUPERLU_FREE(new_col_ptr);
281  SLU::SUPERLU_FREE(new_row_ind);
282  }
283 
284  // reorder will convert both graph and perm/iperm to the internal METIS integer type
285  using metis_int_array_type = host_ordinal_type_array;
286  metis_int_array_type metis_perm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_perm"), n);
287  metis_int_array_type metis_iperm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_iperm"), n);
288 
289  // call METIS
290  size_t new_nnz = 0;
291  Amesos2::Util::reorder(
292  host_colptr_view_, host_rowind_view_,
293  metis_perm, metis_iperm, new_nnz, false);
294 
295  for (size_t i = 0; i < n; i++) {
296  data_.perm_r(i) = metis_iperm(i);
297  data_.perm_c(i) = metis_iperm(i);
298  }
299 
300  // SLU will use user-specified METIS ordering
301  data_.options.ColPerm = SLU::MY_PERMC;
302  data_.options.RowPerm = SLU::MY_PERMR;
303  // Turn off Equil not to mess up METIS ordering?
304  //data_.options.Equil = SLU::NO;
305  }
306 
307  return(0);
308 }
309 
310 
311 template <class Matrix, class Vector>
312 int
314 {
315  using Teuchos::as;
316 
317  // Cleanup old L and U matrices if we are not reusing a symbolic
318  // factorization. Stores and other data will be allocated in gstrf.
319  // Only rank 0 has valid pointers
320  if ( !same_symbolic_ && data_.L.Store != NULL ){
321  SLU::Destroy_SuperNode_Matrix( &(data_.L) );
322  SLU::Destroy_CompCol_Matrix( &(data_.U) );
323  data_.L.Store = NULL;
324  data_.U.Store = NULL;
325  }
326 
327  if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
328 
329  int info = 0;
330  if ( this->root_ ){
331 
332 #ifdef HAVE_AMESOS2_DEBUG
333  TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
334  std::runtime_error,
335  "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
336  TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
337  std::runtime_error,
338  "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
339 #endif
340 
341  if( data_.options.Equil == SLU::YES ){
342  magnitude_type rowcnd, colcnd, amax;
343  int info2 = 0;
344 
345  // calculate row and column scalings
346  function_map::gsequ(&(data_.A), data_.R.data(),
347  data_.C.data(), &rowcnd, &colcnd,
348  &amax, &info2);
349  TEUCHOS_TEST_FOR_EXCEPTION
350  (info2 < 0, std::runtime_error,
351  "SuperLU's gsequ function returned with status " << info2 << " < 0."
352  " This means that argument " << (-info2) << " given to the function"
353  " had an illegal value.");
354  if (info2 > 0) {
355  if (info2 <= data_.A.nrow) {
356  TEUCHOS_TEST_FOR_EXCEPTION
357  (true, std::runtime_error, "SuperLU's gsequ function returned with "
358  "info = " << info2 << " > 0, and info <= A.nrow = " << data_.A.nrow
359  << ". This means that row " << info2 << " of A is exactly zero.");
360  }
361  else if (info2 > data_.A.ncol) {
362  TEUCHOS_TEST_FOR_EXCEPTION
363  (true, std::runtime_error, "SuperLU's gsequ function returned with "
364  "info = " << info2 << " > 0, and info > A.ncol = " << data_.A.ncol
365  << ". This means that column " << (info2 - data_.A.nrow) << " of "
366  "A is exactly zero.");
367  }
368  else {
369  TEUCHOS_TEST_FOR_EXCEPTION
370  (true, std::runtime_error, "SuperLU's gsequ function returned "
371  "with info = " << info2 << " > 0, but its value is not in the "
372  "range permitted by the documentation. This should never happen "
373  "(it appears to be SuperLU's fault).");
374  }
375  }
376 
377  // apply row and column scalings if necessary
378  function_map::laqgs(&(data_.A), data_.R.data(),
379  data_.C.data(), rowcnd, colcnd,
380  amax, &(data_.equed));
381 
382  // check what types of equilibration was actually done
383  data_.rowequ = (data_.equed == 'R') || (data_.equed == 'B');
384  data_.colequ = (data_.equed == 'C') || (data_.equed == 'B');
385  }
386 
387  // Apply the column permutation computed in preOrdering. Place the
388  // column-permuted matrix in AC
389  SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
390  data_.etree.data(), &(data_.AC));
391 
392  { // Do factorization
393 #ifdef HAVE_AMESOS2_TIMERS
394  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
395 #endif
396 
397 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
398  std::cout << "Superlu:: Before numeric factorization" << std::endl;
399  std::cout << "nzvals_ : " << nzvals_.toString() << std::endl;
400  std::cout << "rowind_ : " << rowind_.toString() << std::endl;
401  std::cout << "colptr_ : " << colptr_.toString() << std::endl;
402 #endif
403 
404  if(ILU_Flag_==false) {
405  function_map::gstrf(&(data_.options), &(data_.AC),
406  data_.relax, data_.panel_size, data_.etree.data(),
407  NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
408  &(data_.L), &(data_.U),
409 #ifdef HAVE_AMESOS2_SUPERLU5_API
410  &(data_.lu),
411 #endif
412  &(data_.stat), &info);
413  }
414  else {
415  function_map::gsitrf(&(data_.options), &(data_.AC),
416  data_.relax, data_.panel_size, data_.etree.data(),
417  NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
418  &(data_.L), &(data_.U),
419 #ifdef HAVE_AMESOS2_SUPERLU5_API
420  &(data_.lu),
421 #endif
422  &(data_.stat), &info);
423  }
424 
425  }
426  // Cleanup. AC data will be alloc'd again for next factorization (if at all)
427  SLU::Destroy_CompCol_Permuted( &(data_.AC) );
428 
429  // Set the number of non-zero values in the L and U factors
430  this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
431  ((SLU::NCformat*)data_.U.Store)->nnz) );
432  }
433 
434  /* All processes should have the same error code */
435  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
436 
437  global_size_type info_st = as<global_size_type>(info);
438  TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
439  std::runtime_error,
440  "Factorization complete, but matrix is singular. Division by zero eminent");
441  TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
442  std::runtime_error,
443  "Memory allocation failure in Superlu factorization");
444 
445  data_.options.Fact = SLU::FACTORED;
446  same_symbolic_ = true;
447 
448  if(use_triangular_solves_) {
449 #ifdef HAVE_AMESOS2_TIMERS
450  Teuchos::RCP< Teuchos::Time > SpTrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv setup");
451  Teuchos::TimeMonitor numFactTimer(*SpTrsvTimer_);
452 #endif
453  triangular_solve_factor();
454  }
455 
456  return(info);
457 }
458 
459 
460 template <class Matrix, class Vector>
461 int
463  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
464 {
465  using Teuchos::as;
466 #ifdef HAVE_AMESOS2_TIMERS
467  Teuchos::RCP< Teuchos::Time > Amesos2SolveTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for Amesos2");
468  Teuchos::TimeMonitor solveTimer(*Amesos2SolveTimer_);
469 #endif
470 
471  const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
472  const size_t nrhs = X->getGlobalNumVectors();
473 
474  bool bDidAssignX = false; // will be set below
475  bool bDidAssignB = false; // will be set below
476  { // Get values from RHS B
477 #ifdef HAVE_AMESOS2_TIMERS
478  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
479  Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
480 #endif
481 
482  // In general we may want to write directly to the x space without a copy.
483  // So we 'get' x which may be a direct view assignment to the MV.
484  const bool initialize_data = true;
485  const bool do_not_initialize_data = false;
486  if(use_triangular_solves_) { // to device
487 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
488  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
489  device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
490  as<size_t>(ld_rhs),
491  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
492  this->rowIndexBase_);
493  bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
494  device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
495  as<size_t>(ld_rhs),
496  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
497  this->rowIndexBase_);
498 #endif
499  }
500  else { // to host
501  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
502  host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
503  as<size_t>(ld_rhs),
504  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
505  this->rowIndexBase_);
506  bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
507  host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
508  as<size_t>(ld_rhs),
509  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
510  this->rowIndexBase_);
511  }
512  }
513 
514  // If equilibration was applied at numeric, then gssvx and gsisx are going to
515  // modify B, so we can't use the optimized assignment to B since we'll change
516  // the source test vector and then fail the subsequent cycle. We need a copy.
517  // If bDidAssignB is false, then we skip the copy since it was copied already.
518  if(bDidAssignB && data_.equed != 'N') {
519  if(use_triangular_solves_) { // to device
520 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
521  device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
522  device_bValues_.extent(0), device_bValues_.extent(1));
523  Kokkos::deep_copy(copyB, device_bValues_);
524  device_bValues_ = copyB;
525 #endif
526  }
527  else {
528  host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
529  host_bValues_.extent(0), host_bValues_.extent(1));
530  Kokkos::deep_copy(copyB, host_bValues_);
531  host_bValues_ = copyB;
532  }
533  }
534 
535  int ierr = 0; // returned error code
536 
537  magnitude_type rpg, rcond;
538  if ( this->root_ ) {
539  // Note: the values of B and X (after solution) are adjusted
540  // appropriately within gssvx for row and column scalings.
541  { // Do solve!
542 #ifdef HAVE_AMESOS2_TIMERS
543  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
544 #endif
545 
546  if(use_triangular_solves_) {
547  triangular_solve();
548  }
549  else {
550  Kokkos::resize(data_.ferr, nrhs);
551  Kokkos::resize(data_.berr, nrhs);
552 
553  {
554  #ifdef HAVE_AMESOS2_TIMERS
555  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
556  #endif
557  SLU::Dtype_t dtype = type_map::dtype;
558  int i_ld_rhs = as<int>(ld_rhs);
559  function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
560  convert_bValues_, host_bValues_, i_ld_rhs,
561  SLU::SLU_DN, dtype, SLU::SLU_GE);
562  function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
563  convert_xValues_, host_xValues_, i_ld_rhs,
564  SLU::SLU_DN, dtype, SLU::SLU_GE);
565  }
566 
567  if(ILU_Flag_==false) {
568  function_map::gssvx(&(data_.options), &(data_.A),
569  data_.perm_c.data(), data_.perm_r.data(),
570  data_.etree.data(), &(data_.equed), data_.R.data(),
571  data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
572  &(data_.X), &rpg, &rcond, data_.ferr.data(),
573  data_.berr.data(),
574  #ifdef HAVE_AMESOS2_SUPERLU5_API
575  &(data_.lu),
576  #endif
577  &(data_.mem_usage), &(data_.stat), &ierr);
578  }
579  else {
580  function_map::gsisx(&(data_.options), &(data_.A),
581  data_.perm_c.data(), data_.perm_r.data(),
582  data_.etree.data(), &(data_.equed), data_.R.data(),
583  data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
584  &(data_.X), &rpg, &rcond,
585  #ifdef HAVE_AMESOS2_SUPERLU5_API
586  &(data_.lu),
587  #endif
588  &(data_.mem_usage), &(data_.stat), &ierr);
589  }
590 
591  // Cleanup X and B stores
592  SLU::Destroy_SuperMatrix_Store( &(data_.X) );
593  SLU::Destroy_SuperMatrix_Store( &(data_.B) );
594  data_.X.Store = NULL;
595  data_.B.Store = NULL;
596  }
597 
598  } // do solve
599 
600  // convert back to Kokkos views
601  {
602 #ifdef HAVE_AMESOS2_TIMERS
603  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
604 #endif
605  function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
606  function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
607  }
608 
609  // Cleanup X and B stores
610  SLU::Destroy_SuperMatrix_Store( &(data_.X) );
611  SLU::Destroy_SuperMatrix_Store( &(data_.B) );
612  data_.X.Store = NULL;
613  data_.B.Store = NULL;
614  }
615 
616  /* All processes should have the same error code */
617  Teuchos::broadcast(*(this->getComm()), 0, &ierr);
618 
619  global_size_type ierr_st = as<global_size_type>(ierr);
620  TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
621  std::invalid_argument,
622  "Argument " << -ierr << " to SuperLU xgssvx had illegal value" );
623  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
624  std::runtime_error,
625  "Factorization complete, but U is exactly singular" );
626  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
627  std::runtime_error,
628  "SuperLU allocated " << ierr - this->globalNumCols_ << " bytes of "
629  "memory before allocation failure occured." );
630 
631  /* Update X's global values */
632 
633  // if bDidAssignX, then we solved straight to the adapter's X memory space without
634  // requiring additional memory allocation, so the x data is already in place.
635  if(!bDidAssignX) {
636 #ifdef HAVE_AMESOS2_TIMERS
637  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
638 #endif
639 
640  if(use_triangular_solves_) { // to device
641 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
642  Util::put_1d_data_helper_kokkos_view<
643  MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
644  as<size_t>(ld_rhs),
645  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
646  this->rowIndexBase_);
647 #endif
648  }
649  else {
650  Util::put_1d_data_helper_kokkos_view<
651  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
652  as<size_t>(ld_rhs),
653  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
654  this->rowIndexBase_);
655  }
656  }
657 
658  return(ierr);
659 }
660 
661 
662 template <class Matrix, class Vector>
663 bool
665 {
666  // The Superlu factorization routines can handle square as well as
667  // rectangular matrices, but Superlu can only apply the solve routines to
668  // square matrices, so we check the matrix for squareness.
669  return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
670 }
671 
672 
673 template <class Matrix, class Vector>
674 void
675 Superlu<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
676 {
677  using Teuchos::RCP;
678  using Teuchos::getIntegralValue;
679  using Teuchos::ParameterEntryValidator;
680 
681  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
682 
683  ILU_Flag_ = parameterList->get<bool>("ILU_Flag",false);
684  if (ILU_Flag_) {
685  SLU::ilu_set_default_options(&(data_.options));
686  // Override some default options
687  data_.options.PrintStat = SLU::NO;
688  }
689 
690  data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
691  // The SuperLU transpose option can override the Amesos2 option
692  if( parameterList->isParameter("Trans") ){
693  RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry("Trans").validator();
694  parameterList->getEntry("Trans").setValidator(trans_validator);
695 
696  data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList, "Trans");
697  }
698 
699  if( parameterList->isParameter("IterRefine") ){
700  RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry("IterRefine").validator();
701  parameterList->getEntry("IterRefine").setValidator(refine_validator);
702 
703  data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList, "IterRefine");
704  }
705 
706  if( parameterList->isParameter("ColPerm") ){
707  RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
708  parameterList->getEntry("ColPerm").setValidator(colperm_validator);
709 
710  data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList, "ColPerm");
711  }
712 
713  data_.options.DiagPivotThresh = parameterList->get<double>("DiagPivotThresh", 1.0);
714 
715  bool equil = parameterList->get<bool>("Equil", true);
716  data_.options.Equil = equil ? SLU::YES : SLU::NO;
717 
718  bool symmetric_mode = parameterList->get<bool>("SymmetricMode", false);
719  data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
720 
721  // ILU parameters
722  if( parameterList->isParameter("RowPerm") ){
723  RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
724  parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
725  data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
726  }
727 
728  /*if( parameterList->isParameter("ILU_DropRule") ) {
729  RCP<const ParameterEntryValidator> droprule_validator = valid_params->getEntry("ILU_DropRule").validator();
730  parameterList->getEntry("ILU_DropRule").setValidator(droprule_validator);
731  data_.options.ILU_DropRule = getIntegralValue<SLU::rule_t>(*parameterList, "ILU_DropRule");
732  }*/
733 
734  data_.options.ILU_DropTol = parameterList->get<double>("ILU_DropTol", 0.0001);
735 
736  data_.options.ILU_FillFactor = parameterList->get<double>("ILU_FillFactor", 10.0);
737 
738  if( parameterList->isParameter("ILU_Norm") ) {
739  RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry("ILU_Norm").validator();
740  parameterList->getEntry("ILU_Norm").setValidator(norm_validator);
741  data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList, "ILU_Norm");
742  }
743 
744  if( parameterList->isParameter("ILU_MILU") ) {
745  RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry("ILU_MILU").validator();
746  parameterList->getEntry("ILU_MILU").setValidator(milu_validator);
747  data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList, "ILU_MILU");
748  }
749 
750  data_.options.ILU_FillTol = parameterList->get<double>("ILU_FillTol", 0.01);
751 
752  is_contiguous_ = parameterList->get<bool>("IsContiguous", true);
753 
754  use_triangular_solves_ = parameterList->get<bool>("Enable_KokkosKernels_TriangularSolves", false);
755 
756  use_metis_ = parameterList->get<bool>("UseMetis", false);
757  symmetrize_metis_ = parameterList->get<bool>("SymmetrizeMetis", true);
758  if(use_triangular_solves_) {
759 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
760  // specify whether to invert diagonal blocks
761  sptrsv_invert_diag_ = parameterList->get<bool>("SpTRSV_Invert_Diag", true);
762  // specify whether to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
763  sptrsv_invert_offdiag_ = parameterList->get<bool>("SpTRSV_Invert_OffDiag", false);
764  // specify whether to store U in CSR format
765  sptrsv_u_in_csr_ = parameterList->get<bool>("SpTRSV_U_CSR", true);
766  // specify whether to merge supernodes with similar sparsity structures
767  sptrsv_merge_supernodes_ = parameterList->get<bool>("SpTRSV_Merge_Supernodes", false);
768  // specify whether to use spmv for sptrsv
769  sptrsv_use_spmv_ = parameterList->get<bool>("SpTRSV_Use_SpMV", false);
770 #else
771  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
772  "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
773 #endif
774  }
775 }
776 
777 
778 template <class Matrix, class Vector>
779 Teuchos::RCP<const Teuchos::ParameterList>
781 {
782  using std::string;
783  using Teuchos::tuple;
784  using Teuchos::ParameterList;
785  using Teuchos::EnhancedNumberValidator;
786  using Teuchos::setStringToIntegralParameter;
787  using Teuchos::stringToIntegralParameterEntryValidator;
788 
789  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
790 
791  if( is_null(valid_params) ){
792  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
793 
794  setStringToIntegralParameter<SLU::trans_t>("Trans", "NOTRANS",
795  "Solve for the transpose system or not",
796  tuple<string>("TRANS","NOTRANS","CONJ"),
797  tuple<string>("Solve with transpose",
798  "Do not solve with transpose",
799  "Solve with the conjugate transpose"),
800  tuple<SLU::trans_t>(SLU::TRANS,
801  SLU::NOTRANS,
802  SLU::CONJ),
803  pl.getRawPtr());
804 
805  setStringToIntegralParameter<SLU::IterRefine_t>("IterRefine", "NOREFINE",
806  "Type of iterative refinement to use",
807  tuple<string>("NOREFINE", "SLU_SINGLE", "SLU_DOUBLE"),
808  tuple<string>("Do not use iterative refinement",
809  "Do single iterative refinement",
810  "Do double iterative refinement"),
811  tuple<SLU::IterRefine_t>(SLU::NOREFINE,
812  SLU::SLU_SINGLE,
813  SLU::SLU_DOUBLE),
814  pl.getRawPtr());
815 
816  // Note: MY_PERMC not yet supported
817  setStringToIntegralParameter<SLU::colperm_t>("ColPerm", "COLAMD",
818  "Specifies how to permute the columns of the "
819  "matrix for sparsity preservation",
820  tuple<string>("NATURAL", "MMD_AT_PLUS_A",
821  "MMD_ATA", "COLAMD"),
822  tuple<string>("Natural ordering",
823  "Minimum degree ordering on A^T + A",
824  "Minimum degree ordering on A^T A",
825  "Approximate minimum degree column ordering"),
826  tuple<SLU::colperm_t>(SLU::NATURAL,
827  SLU::MMD_AT_PLUS_A,
828  SLU::MMD_ATA,
829  SLU::COLAMD),
830  pl.getRawPtr());
831 
832  Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
833  = Teuchos::rcp( new EnhancedNumberValidator<double>(0.0, 1.0) );
834  pl->set("DiagPivotThresh", 1.0,
835  "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
836  diag_pivot_thresh_validator); // partial pivoting
837 
838  pl->set("Equil", true, "Whether to equilibrate the system before solve");
839 
840  pl->set("SymmetricMode", false,
841  "Specifies whether to use the symmetric mode. "
842  "Gives preference to diagonal pivots and uses "
843  "an (A^T + A)-based column permutation.");
844 
845  // ILU parameters
846 
847  setStringToIntegralParameter<SLU::rowperm_t>("RowPerm", "LargeDiag",
848  "Type of row permutation strategy to use",
849  tuple<string>("NOROWPERM","LargeDiag","MY_PERMR"),
850  tuple<string>("Use natural ordering",
851  "Use weighted bipartite matching algorithm",
852  "Use the ordering given in perm_r input"),
853  tuple<SLU::rowperm_t>(SLU::NOROWPERM,
854 #if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
855  SLU::LargeDiag_MC64,
856 #else
857  SLU::LargeDiag,
858 #endif
859  SLU::MY_PERMR),
860  pl.getRawPtr());
861 
862  /*setStringToIntegralParameter<SLU::rule_t>("ILU_DropRule", "DROP_BASIC",
863  "Type of dropping strategy to use",
864  tuple<string>("DROP_BASIC","DROP_PROWS",
865  "DROP_COLUMN","DROP_AREA",
866  "DROP_DYNAMIC","DROP_INTERP"),
867  tuple<string>("ILUTP(t)","ILUTP(p,t)",
868  "Variant of ILUTP(p,t) for j-th column",
869  "Variant of ILUTP to control memory",
870  "Dynamically adjust threshold",
871  "Compute second dropping threshold by interpolation"),
872  tuple<SLU::rule_t>(SLU::DROP_BASIC,SLU::DROP_PROWS,SLU::DROP_COLUMN,
873  SLU::DROP_AREA,SLU::DROP_DYNAMIC,SLU::DROP_INTERP),
874  pl.getRawPtr());*/
875 
876  pl->set("ILU_DropTol", 0.0001, "ILUT drop tolerance");
877 
878  pl->set("ILU_FillFactor", 10.0, "ILUT fill factor");
879 
880  setStringToIntegralParameter<SLU::norm_t>("ILU_Norm", "INF_NORM",
881  "Type of norm to use",
882  tuple<string>("ONE_NORM","TWO_NORM","INF_NORM"),
883  tuple<string>("1-norm","2-norm","inf-norm"),
884  tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
885  pl.getRawPtr());
886 
887  setStringToIntegralParameter<SLU::milu_t>("ILU_MILU", "SILU",
888  "Type of modified ILU to use",
889  tuple<string>("SILU","SMILU_1","SMILU_2","SMILU_3"),
890  tuple<string>("Regular ILU","MILU 1","MILU 2","MILU 3"),
891  tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
892  SLU::SMILU_3),
893  pl.getRawPtr());
894 
895  pl->set("ILU_FillTol", 0.01, "ILUT fill tolerance");
896 
897  pl->set("ILU_Flag", false, "ILU flag: if true, run ILU routines");
898 
899  pl->set("Enable_KokkosKernels_TriangularSolves", false, "Whether to use triangular solves.");
900 
901  pl->set("IsContiguous", true, "Whether GIDs contiguous");
902 
903  // call METIS before SuperLU
904  pl->set("UseMetis", false, "Whether to call METIS before SuperLU");
905  pl->set("SymmetrizeMetis", true, "Whether to symmetrize matrix before METIS");
906 
907 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
908  pl->set("SpTRSV_Invert_Diag", true, "specify whether to invert diagonal blocks for supernodal sparse-trianguular solve");
909  pl->set("SpTRSV_Invert_OffDiag", false, "specify whether to apply diagonal-inversion to off-diagonal blocks for supernodal sparse-trianguular solve");
910  pl->set("SpTRSV_U_CSR", true, "specify whether to store U in CSR forma for supernodal sparse-trianguular solve");
911  pl->set("SpTRSV_Merge_Supernodes", false, "specify whether to merge supernodes with similar sparsity structures for supernodal sparse-trianguular solve");
912  pl->set("SpTRSV_Use_SpMV", false, "specify whether to use spmv for supernodal sparse-trianguular solve");
913 #endif
914 
915  valid_params = pl;
916  }
917 
918  return valid_params;
919 }
920 
921 
922 template <class Matrix, class Vector>
923 bool
925 {
926  using Teuchos::as;
927 
928 #ifdef HAVE_AMESOS2_TIMERS
929  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
930 #endif
931 
932  // SuperLU does not need the matrix at symbolicFactorization()
933  if( current_phase == SYMBFACT ) return false;
934 
935  // Cleanup old store memory if it's non-NULL (should only ever be non-NULL at root_)
936  if( data_.A.Store != NULL ){
937  SLU::Destroy_SuperMatrix_Store( &(data_.A) );
938  data_.A.Store = NULL;
939  }
940 
941  // Only the root image needs storage allocated
942  if( this->root_ ){
943  Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
944  Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
945  Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
946  }
947 
948  int nnz_ret = 0;
949  {
950 #ifdef HAVE_AMESOS2_TIMERS
951  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
952 #endif
953 
954  TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
955  std::runtime_error,
956  "Row and column maps have different indexbase ");
957 
958  if ( is_contiguous_ == true ) {
960  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
961  host_size_type_array>::do_get(this->matrixA_.ptr(),
962  host_nzvals_view_, host_rows_view_,
963  host_col_ptr_view_, nnz_ret, ROOTED,
964  ARBITRARY,
965  this->rowIndexBase_);
966  }
967  else {
969  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
970  host_size_type_array>::do_get(this->matrixA_.ptr(),
971  host_nzvals_view_, host_rows_view_,
972  host_col_ptr_view_, nnz_ret, CONTIGUOUS_AND_ROOTED,
973  ARBITRARY,
974  this->rowIndexBase_);
975  }
976  }
977 
978  // Get the SLU data type for this type of matrix
979  SLU::Dtype_t dtype = type_map::dtype;
980 
981  if( this->root_ ){
982  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
983  std::runtime_error,
984  "Did not get the expected number of non-zero vals");
985 
986  function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
987  this->globalNumRows_, this->globalNumCols_,
988  nnz_ret,
989  convert_nzvals_, host_nzvals_view_,
990  host_rows_view_.data(),
991  host_col_ptr_view_.data(),
992  SLU::SLU_NC, dtype, SLU::SLU_GE);
993  }
994 
995  return true;
996 }
997 
998 template <class Matrix, class Vector>
999 void
1001 {
1002 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1003  size_t ld_rhs = this->matrixA_->getGlobalNumRows();
1004 
1005  // convert etree to parents
1006  SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
1007  int nsuper = 1 + Lstore->nsuper; // # of supernodal columns
1008  Kokkos::resize(data_.parents, nsuper);
1009  for (int s = 0; s < nsuper; s++) {
1010  int j = Lstore->sup_to_col[s+1]-1; // the last column index of this supernode
1011  if (data_.etree[j] == static_cast<int>(ld_rhs)) {
1012  data_.parents(s) = -1;
1013  } else {
1014  data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
1015  }
1016  }
1017 
1018  deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r); // will use device to permute
1019  deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c); // will use device to permute
1020  if (data_.options.Equil == SLU::YES) {
1021  deep_copy_or_assign_view(device_trsv_R_, data_.R); // will use device to scale
1022  deep_copy_or_assign_view(device_trsv_C_, data_.C); // will use device to scale
1023  }
1024 
1025  // Create handles for U and U^T solves
1026  if (sptrsv_use_spmv_) {
1027  device_khL_.create_sptrsv_handle(
1028  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, true);
1029  device_khU_.create_sptrsv_handle(
1030  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, false);
1031  } else {
1032  device_khL_.create_sptrsv_handle(
1033  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, true);
1034  device_khU_.create_sptrsv_handle(
1035  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, false);
1036  }
1037 
1038  // specify whether to store U in CSR format
1039  device_khU_.set_sptrsv_column_major (!sptrsv_u_in_csr_);
1040 
1041  // specify whether to merge supernodes with similar sparsity structure
1042  device_khL_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1043  device_khU_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1044 
1045  // specify whether to invert diagonal blocks
1046  device_khL_.set_sptrsv_invert_diagonal (sptrsv_invert_diag_);
1047  device_khU_.set_sptrsv_invert_diagonal (sptrsv_invert_diag_);
1048 
1049  // specify wheather to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
1050  device_khL_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag_);
1051  device_khU_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag_);
1052 
1053  // set etree
1054  device_khL_.set_sptrsv_etree(data_.parents.data());
1055  device_khU_.set_sptrsv_etree(data_.parents.data());
1056 
1057  // set permutation
1058  device_khL_.set_sptrsv_perm(data_.perm_r.data());
1059  device_khU_.set_sptrsv_perm(data_.perm_c.data());
1060 
1061  int block_size = -1; // TODO: add needed params
1062  if (block_size >= 0) {
1063  std::cout << " Block Size : " << block_size << std::endl;
1064  device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1065  device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1066  }
1067 
1068  // Do symbolic analysis
1069  {
1070 #ifdef HAVE_AMESOS2_TIMERS
1071  Teuchos::RCP< Teuchos::Time > SymboTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv symbolic");
1072  Teuchos::TimeMonitor numFactTimer(*SymboTimer_);
1073 #endif
1074  KokkosSparse::Experimental::sptrsv_symbolic
1075  (&device_khL_, &device_khU_, data_.L, data_.U);
1076  }
1077 
1078  // Do numerical compute
1079  {
1080 #ifdef HAVE_AMESOS2_TIMERS
1081  Teuchos::RCP< Teuchos::Time > NumerTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv numeric");
1082  Teuchos::TimeMonitor numFactTimer(*NumerTimer_);
1083 #endif
1084  KokkosSparse::Experimental::sptrsv_compute
1085  (&device_khL_, &device_khU_, data_.L, data_.U);
1086  }
1087 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1088 }
1089 
1090 template <class Matrix, class Vector>
1091 void
1092 Superlu<Matrix,Vector>::triangular_solve() const
1093 {
1094 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1095  size_t ld_rhs = device_xValues_.extent(0);
1096  size_t nrhs = device_xValues_.extent(1);
1097 
1098  Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
1099  Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
1100 
1101  // forward pivot
1102  auto local_device_bValues = device_bValues_;
1103  auto local_device_trsv_perm_r = device_trsv_perm_r_;
1104  auto local_device_trsv_rhs = device_trsv_rhs_;
1105 
1106  if (data_.rowequ) {
1107  auto local_device_trsv_R = device_trsv_R_;
1108  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1109  KOKKOS_LAMBDA(size_t j) {
1110  for(size_t k = 0; k < nrhs; ++k) {
1111  local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_trsv_R(j) * local_device_bValues(j,k);
1112  }
1113  });
1114  } else {
1115  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1116  KOKKOS_LAMBDA(size_t j) {
1117  for(size_t k = 0; k < nrhs; ++k) {
1118  local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
1119  }
1120  });
1121  }
1122 
1123  for(size_t k = 0; k < nrhs; ++k) { // sptrsv_solve does not batch
1124  auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
1125  auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
1126 
1127  // do L solve= - numeric (only rhs is modified) on the default device/host space
1128  {
1129 #ifdef HAVE_AMESOS2_TIMERS
1130  Teuchos::RCP< Teuchos::Time > SpLtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for L solve");
1131  Teuchos::TimeMonitor solveTimer(*SpLtrsvTimer_);
1132 #endif
1133  KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
1134  }
1135  // do L^T solve - numeric (only rhs is modified) on the default device/host space
1136  {
1137 #ifdef HAVE_AMESOS2_TIMERS
1138  Teuchos::RCP< Teuchos::Time > SpUtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for U solve");
1139  Teuchos::TimeMonitor solveTimer(*SpUtrsvTimer_);
1140 #endif
1141  KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
1142  }
1143  } // end loop over rhs vectors
1144 
1145  // backward pivot
1146  auto local_device_xValues = device_xValues_;
1147  auto local_device_trsv_perm_c = device_trsv_perm_c_;
1148  if (data_.colequ) {
1149  auto local_device_trsv_C = device_trsv_C_;
1150  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1151  KOKKOS_LAMBDA(size_t j) {
1152  for(size_t k = 0; k < nrhs; ++k) {
1153  local_device_xValues(j,k) = local_device_trsv_C(j) * local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1154  }
1155  });
1156  } else {
1157  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1158  KOKKOS_LAMBDA(size_t j) {
1159  for(size_t k = 0; k < nrhs; ++k) {
1160  local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1161  }
1162  });
1163  }
1164 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1165 }
1166 
1167 template<class Matrix, class Vector>
1168 const char* Superlu<Matrix,Vector>::name = "SuperLU";
1169 
1170 
1171 } // end namespace Amesos2
1172 
1173 #endif // AMESOS2_SUPERLU_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition: Amesos2_Superlu_def.hpp:462
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:953
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlu_def.hpp:924
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition: Amesos2_Superlu_def.hpp:313
~Superlu()
Destructor.
Definition: Amesos2_Superlu_def.hpp:128
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlu_def.hpp:780
Amesos2 Superlu declarations.
Definition: Amesos2_TypeDecl.hpp:143
Definition: Amesos2_AbstractConcreteMatrixAdapter.hpp:48
Definition: Amesos2_Superlu_FunctionMap.hpp:74
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
std::string description() const
Returns a short description of this Solver.
Definition: Amesos2_Superlu_def.hpp:151
std::string name() const
Return the name of this solver.
Definition: Amesos2_SolverCore_def.hpp:509
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlu_def.hpp:664
Definition: Amesos2_TypeDecl.hpp:127
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Superlu_def.hpp:675
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlu_def.hpp:205
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
Definition: Amesos2_TypeDecl.hpp:128
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition: Amesos2_Superlu_def.hpp:230
Amesos2 interface to the SuperLU package.
Definition: Amesos2_Superlu_decl.hpp:76