Intrepid2
Intrepid2_Data.hpp
Go to the documentation of this file.
1 //
2 // Intrepid2_Data.hpp
3 // QuadraturePerformance
4 //
5 // Created by Roberts, Nathan V on 8/24/20.
6 //
7 
8 #ifndef Intrepid2_Data_h
9 #define Intrepid2_Data_h
10 
12 #include "Intrepid2_ScalarView.hpp"
13 #include "Intrepid2_Utils.hpp"
14 
20 namespace Intrepid2 {
32  {
37  };
38 
43  {
44  int logicalExtent;
45  DataVariationType variationType;
46  int dataExtent;
47  int variationModulus; // should be equal to dataExtent variationType other than MODULAR and CONSTANT
48  int blockPlusDiagonalLastNonDiagonal = -1; // only relevant for variationType == BLOCK_PLUS_DIAGONAL
49  };
50 
52  KOKKOS_INLINE_FUNCTION
54  {
55  const int myNominalExtent = myData.logicalExtent;
56 #ifdef HAVE_INTREPID2_DEBUG
57  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(myNominalExtent != otherData.logicalExtent, std::invalid_argument, "both Data objects must match in their logical extent in the specified dimension");
58 #endif
59  const DataVariationType & myVariation = myData.variationType;
60  const DataVariationType & otherVariation = otherData.variationType;
61 
62  const int & myVariationModulus = myData.variationModulus;
63  const int & otherVariationModulus = otherData.variationModulus;
64 
65  int myDataExtent = myData.dataExtent;
66  int otherDataExtent = otherData.dataExtent;
67 
69  combinedDimensionInfo.logicalExtent = myNominalExtent;
70 
71  switch (myVariation)
72  {
73  case CONSTANT:
74  switch (otherVariation)
75  {
76  case CONSTANT:
77  case MODULAR:
78  case GENERAL:
80  combinedDimensionInfo = otherData;
81  }
82  break;
83  case MODULAR:
84  switch (otherVariation)
85  {
86  case CONSTANT:
87  combinedDimensionInfo = myData;
88  break;
89  case MODULAR:
90  if (myVariationModulus == otherVariationModulus)
91  {
92  // in this case, expect both to have the same data extent
93  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(myDataExtent != otherDataExtent, std::logic_error, "Unexpected data extent/modulus combination");
94  combinedDimensionInfo.variationType = MODULAR;
95  combinedDimensionInfo.dataExtent = myDataExtent;
96  combinedDimensionInfo.variationModulus = myVariationModulus;
97  }
98  else
99  {
100  // both modular with two different moduli
101  // we could do something clever with e.g. least common multiples, but for now we just use GENERAL
102  // (this is not a use case we anticipate being a common one)
103  combinedDimensionInfo.variationType = GENERAL;
104  combinedDimensionInfo.dataExtent = myNominalExtent;
105  combinedDimensionInfo.variationModulus = myNominalExtent;
106  }
107  break;
108  case BLOCK_PLUS_DIAGONAL:
109  combinedDimensionInfo.variationType = GENERAL;
110  combinedDimensionInfo.dataExtent = myNominalExtent;
111  combinedDimensionInfo.variationModulus = myNominalExtent;
112  break;
113  case GENERAL:
114  // otherData is GENERAL: its info dominates
115  combinedDimensionInfo = otherData;
116  break;
117  }
118  break;
119  case BLOCK_PLUS_DIAGONAL:
120  switch (otherVariation)
121  {
122  case CONSTANT:
123  combinedDimensionInfo = myData;
124  break;
125  case MODULAR:
126  combinedDimensionInfo.variationType = GENERAL;
127  combinedDimensionInfo.dataExtent = myNominalExtent;
128  combinedDimensionInfo.variationModulus = myNominalExtent;
129  break;
130  case GENERAL:
131  // otherData is GENERAL: its info dominates
132  combinedDimensionInfo = otherData;
133  break;
134  case BLOCK_PLUS_DIAGONAL:
135  combinedDimensionInfo.variationType = GENERAL;
136  combinedDimensionInfo.dataExtent = max(myDataExtent,otherDataExtent);
137  combinedDimensionInfo.variationModulus = combinedDimensionInfo.dataExtent;
138  // for this case, we want to take the minimum of the two Data objects' blockPlusDiagonalLastNonDiagonal as the combined object's blockPlusDiagonalLastNonDiagonal
139  combinedDimensionInfo.blockPlusDiagonalLastNonDiagonal = min(myData.blockPlusDiagonalLastNonDiagonal, otherData.blockPlusDiagonalLastNonDiagonal);
140  }
141  break;
142  case GENERAL:
143  switch (otherVariation)
144  {
145  case CONSTANT:
146  case MODULAR:
147  case GENERAL:
148  case BLOCK_PLUS_DIAGONAL:
149  combinedDimensionInfo = myData;
150  }
151  }
152  return combinedDimensionInfo;
153  }
154 
163 template<class DataScalar,typename DeviceType>
164 class ZeroView {
165 public:
166  static ScalarView<DataScalar,DeviceType> zeroView()
167  {
168  static ScalarView<DataScalar,DeviceType> zeroView = ScalarView<DataScalar,DeviceType>("zero",1);
169  static bool havePushedFinalizeHook = false;
170  if (!havePushedFinalizeHook)
171  {
172  Kokkos::push_finalize_hook( [=] {
173  zeroView = ScalarView<DataScalar,DeviceType>();
174  });
175  havePushedFinalizeHook = true;
176  }
177  return zeroView;
178  }
179 };
180 
198  template<class DataScalar,typename DeviceType>
199  class Data {
200  public:
201  using value_type = DataScalar;
202  using execution_space = typename DeviceType::execution_space;
203  private:
204  ordinal_type dataRank_;
205  Kokkos::View<DataScalar*, DeviceType> data1_; // the rank 1 data that is explicitly stored
206  Kokkos::View<DataScalar**, DeviceType> data2_; // the rank 2 data that is explicitly stored
207  Kokkos::View<DataScalar***, DeviceType> data3_; // the rank 3 data that is explicitly stored
208  Kokkos::View<DataScalar****, DeviceType> data4_; // the rank 4 data that is explicitly stored
209  Kokkos::View<DataScalar*****, DeviceType> data5_; // the rank 5 data that is explicitly stored
210  Kokkos::View<DataScalar******, DeviceType> data6_; // the rank 6 data that is explicitly stored
211  Kokkos::View<DataScalar*******, DeviceType> data7_; // the rank 7 data that is explicitly stored
212  Kokkos::Array<int,7> extents_; // logical extents in each dimension
213  Kokkos::Array<DataVariationType,7> variationType_; // for each dimension, whether the data varies in that dimension
214  Kokkos::Array<int,7> variationModulus_; // for each dimension, a value by which indices should be modulused (only used when variationType_ is MODULAR)
215  int blockPlusDiagonalLastNonDiagonal_ = -1; // last row/column that is part of the non-diagonal part of the matrix indicated by BLOCK_PLUS_DIAGONAL (if any dimensions are thus marked)
216 
217  bool hasNontrivialModulusUNUSED_; // this is a little nutty, but having this UNUSED member variable improves performance, probably by shifting the alignment of underlyingMatchesLogical_. This is true with nvcc; it may also be true with Apple clang
218  bool underlyingMatchesLogical_; // if true, this Data object has the same rank and extent as the underlying view
219  Kokkos::Array<ordinal_type,7> activeDims_;
220  int numActiveDims_; // how many of the 7 entries are actually filled in
221 
222  ordinal_type rank_;
223 
224  using reference_type = typename ScalarView<DataScalar,DeviceType>::reference_type;
225  using const_reference_type = typename ScalarView<const DataScalar,DeviceType>::reference_type;
226  // we use reference_type as the return for operator() for performance reasons, especially significant when using Sacado types
227  using return_type = const_reference_type;
228 
229  ScalarView<DataScalar,DeviceType> zeroView_; // one-entry (zero); used to allow getEntry() to return 0 for off-diagonal entries in BLOCK_PLUS_DIAGONAL
230 
232  KOKKOS_INLINE_FUNCTION
233  static int blockPlusDiagonalNumNondiagonalEntries(const int &lastNondiagonal)
234  {
235  return (lastNondiagonal + 1) * (lastNondiagonal + 1);
236  }
237 
239  KOKKOS_INLINE_FUNCTION
240  static int blockPlusDiagonalBlockEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i, const int &j)
241  {
242  return i * (lastNondiagonal + 1) + j;
243  }
244 
246  KOKKOS_INLINE_FUNCTION
247  static int blockPlusDiagonalDiagonalEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i)
248  {
249  return i - (lastNondiagonal + 1) + numNondiagonalEntries;
250  }
251 
253  KOKKOS_INLINE_FUNCTION
254  int getUnderlyingViewExtent(const int &dim) const
255  {
256  switch (dataRank_)
257  {
258  case 1: return data1_.extent_int(dim);
259  case 2: return data2_.extent_int(dim);
260  case 3: return data3_.extent_int(dim);
261  case 4: return data4_.extent_int(dim);
262  case 5: return data5_.extent_int(dim);
263  case 6: return data6_.extent_int(dim);
264  case 7: return data7_.extent_int(dim);
265  default: return -1;
266  }
267  }
268 
271  {
272  zeroView_ = ZeroView<DataScalar,DeviceType>::zeroView(); // one-entry (zero); used to allow getEntry() to return 0 for off-diagonal entries in BLOCK_PLUS_DIAGONAL
273  // check that rank is compatible with the claimed extents:
274  for (int d=rank_; d<7; d++)
275  {
276  INTREPID2_TEST_FOR_EXCEPTION(extents_[d] > 1, std::invalid_argument, "Nominal extents may not be > 1 in dimensions beyond the rank of the container");
277  }
278 
279  numActiveDims_ = 0;
280  int blockPlusDiagonalCount = 0;
281  underlyingMatchesLogical_ = true;
282  for (ordinal_type i=0; i<7; i++)
283  {
284  if (variationType_[i] == GENERAL)
285  {
286  if (extents_[i] != 0)
287  {
288  variationModulus_[i] = extents_[i];
289  }
290  else
291  {
292  variationModulus_[i] = 1;
293  }
294  activeDims_[numActiveDims_] = i;
295  numActiveDims_++;
296  }
297  else if (variationType_[i] == MODULAR)
298  {
299  underlyingMatchesLogical_ = false;
300  if (extents_[i] != getUnderlyingViewExtent(numActiveDims_))
301  {
302  const int dataExtent = getUnderlyingViewExtent(numActiveDims_);
303  const int logicalExtent = extents_[i];
304  const int modulus = dataExtent;
305 
306  INTREPID2_TEST_FOR_EXCEPTION( dataExtent * (logicalExtent / dataExtent) != logicalExtent, std::invalid_argument, "data extent must evenly divide logical extent");
307 
308  variationModulus_[i] = modulus;
309  }
310  else
311  {
312  variationModulus_[i] = extents_[i];
313  }
314  activeDims_[numActiveDims_] = i;
315  numActiveDims_++;
316  }
317  else if (variationType_[i] == BLOCK_PLUS_DIAGONAL)
318  {
319  underlyingMatchesLogical_ = false;
320  blockPlusDiagonalCount++;
321  if (blockPlusDiagonalCount == 1) // first dimension thus marked --> active
322  {
323 
324 #ifdef HAVE_INTREPID2_DEBUG
325  const int numNondiagonalEntries = blockPlusDiagonalNumNondiagonalEntries(blockPlusDiagonalLastNonDiagonal_);
326  const int dataExtent = getUnderlyingViewExtent(numActiveDims_); // flat storage of all matrix entries
327  const int logicalExtent = extents_[i];
328  const int numDiagonalEntries = logicalExtent - (blockPlusDiagonalLastNonDiagonal_ + 1);
329  const int expectedDataExtent = numNondiagonalEntries + numDiagonalEntries;
330  INTREPID2_TEST_FOR_EXCEPTION(dataExtent != expectedDataExtent, std::invalid_argument, ("BLOCK_PLUS_DIAGONAL data extent of " + std::to_string(dataExtent) + " does not match expected based on blockPlusDiagonalLastNonDiagonal setting of " + std::to_string(blockPlusDiagonalLastNonDiagonal_)).c_str());
331 #endif
332 
333  activeDims_[numActiveDims_] = i;
334  numActiveDims_++;
335  }
336  variationModulus_[i] = getUnderlyingViewExtent(numActiveDims_);
337  INTREPID2_TEST_FOR_EXCEPTION(variationType_[i+1] != BLOCK_PLUS_DIAGONAL, std::invalid_argument, "BLOCK_PLUS_DIAGONAL ranks must be contiguous");
338  i++; // skip over the next BLOCK_PLUS_DIAGONAL
339  variationModulus_[i] = 1; // trivial modulus (should not ever be used)
340  INTREPID2_TEST_FOR_EXCEPTION(blockPlusDiagonalCount > 1, std::invalid_argument, "BLOCK_PLUS_DIAGONAL can only apply to two ranks");
341  }
342  else // CONSTANT
343  {
344  if (i < rank_)
345  {
346  underlyingMatchesLogical_ = false;
347  }
348  variationModulus_[i] = 1; // trivial modulus
349  }
350  }
351 
352  if (rank_ != dataRank_)
353  {
354  underlyingMatchesLogical_ = false;
355  }
356 
357  for (int d=numActiveDims_; d<7; d++)
358  {
359  // for *inactive* dims, the activeDims_ map just is the identity
360  // (this allows getEntry() to work even when the logical rank of the Data object is lower than that of the underlying View. This can happen for gradients in 1D.)
361  activeDims_[d] = d;
362  }
363  for (int d=0; d<7; d++)
364  {
365  INTREPID2_TEST_FOR_EXCEPTION(variationModulus_[d] == 0, std::logic_error, "variationModulus should not ever be 0");
366  }
367  }
368 
369  public:
372  {
373  template<class ViewType, class ...IntArgs>
374  static KOKKOS_INLINE_FUNCTION reference_type get(const ViewType &view, const IntArgs&... intArgs)
375  {
376  return view.getWritableEntry(intArgs...);
377  }
378  };
379 
380  template<class BinaryOperator, class ThisUnderlyingViewType, class AUnderlyingViewType, class BUnderlyingViewType,
381  class ArgExtractorThis, class ArgExtractorA, class ArgExtractorB, bool includeInnerLoop=false>
383  {
384  private:
385  ThisUnderlyingViewType this_underlying_;
386  AUnderlyingViewType A_underlying_;
387  BUnderlyingViewType B_underlying_;
388  BinaryOperator binaryOperator_;
389  int innerLoopSize_;
390  public:
391  InPlaceCombinationFunctor(ThisUnderlyingViewType this_underlying, AUnderlyingViewType A_underlying, BUnderlyingViewType B_underlying,
392  BinaryOperator binaryOperator)
393  :
394  this_underlying_(this_underlying),
395  A_underlying_(A_underlying),
396  B_underlying_(B_underlying),
397  binaryOperator_(binaryOperator)
398  {
399  INTREPID2_TEST_FOR_EXCEPTION(includeInnerLoop,std::invalid_argument,"If includeInnerLoop is true, must specify the size of the inner loop");
400  }
401 
402  InPlaceCombinationFunctor(ThisUnderlyingViewType this_underlying, AUnderlyingViewType A_underlying, BUnderlyingViewType B_underlying,
403  BinaryOperator binaryOperator, int innerLoopSize)
404  :
405  this_underlying_(this_underlying),
406  A_underlying_(A_underlying),
407  B_underlying_(B_underlying),
408  binaryOperator_(binaryOperator),
409  innerLoopSize_(innerLoopSize)
410  {
411  INTREPID2_TEST_FOR_EXCEPTION(includeInnerLoop,std::invalid_argument,"If includeInnerLoop is true, must specify the size of the inner loop");
412  }
413 
414  template<class ...IntArgs, bool M=includeInnerLoop>
415  KOKKOS_INLINE_FUNCTION
416  enable_if_t<!M, void>
417  operator()(const IntArgs&... args) const
418  {
419  auto & result = ArgExtractorThis::get( this_underlying_, args... );
420  const auto & A_val = ArgExtractorA::get( A_underlying_, args... );
421  const auto & B_val = ArgExtractorB::get( B_underlying_, args... );
422 
423  result = binaryOperator_(A_val,B_val);
424  }
425 
426  template<class ...IntArgs, bool M=includeInnerLoop>
427  KOKKOS_INLINE_FUNCTION
428  enable_if_t<M, void>
429  operator()(const IntArgs&... args) const
430  {
431  using int_type = std::tuple_element_t<0, std::tuple<IntArgs...>>;
432  for (int_type iFinal=0; iFinal<static_cast<int_type>(innerLoopSize_); iFinal++)
433  {
434  auto & result = ArgExtractorThis::get( this_underlying_, args..., iFinal );
435  const auto & A_val = ArgExtractorA::get( A_underlying_, args..., iFinal );
436  const auto & B_val = ArgExtractorB::get( B_underlying_, args..., iFinal );
437 
438  result = binaryOperator_(A_val,B_val);
439  }
440  }
441  };
442 
444  template<class BinaryOperator, class PolicyType, class ThisUnderlyingViewType, class AUnderlyingViewType, class BUnderlyingViewType,
445  class ArgExtractorThis, class ArgExtractorA, class ArgExtractorB>
446  void storeInPlaceCombination(PolicyType &policy, ThisUnderlyingViewType &this_underlying,
447  AUnderlyingViewType &A_underlying, BUnderlyingViewType &B_underlying,
448  BinaryOperator &binaryOperator, ArgExtractorThis argThis, ArgExtractorA argA, ArgExtractorB argB)
449  {
450  using Functor = InPlaceCombinationFunctor<BinaryOperator, ThisUnderlyingViewType, AUnderlyingViewType, BUnderlyingViewType, ArgExtractorThis, ArgExtractorA, ArgExtractorB>;
451  Functor functor(this_underlying, A_underlying, B_underlying, binaryOperator);
452  Kokkos::parallel_for("compute in-place", policy, functor);
453  }
454 
456  template<class BinaryOperator, int rank>
457  enable_if_t<rank != 7, void>
458  storeInPlaceCombination(const Data<DataScalar,DeviceType> &A, const Data<DataScalar,DeviceType> &B, BinaryOperator binaryOperator)
459  {
460  auto policy = dataExtentRangePolicy<rank>();
461 
462  // shallow copy of this to avoid implicit references to this in calls to getWritableEntry() below
463  Data<DataScalar,DeviceType> thisData = *this;
464 
465  const bool A_1D = A.getUnderlyingViewRank() == 1;
466  const bool B_1D = B.getUnderlyingViewRank() == 1;
467  const bool this_1D = this->getUnderlyingViewRank() == 1;
468  const bool A_constant = A_1D && (A.getUnderlyingViewSize() == 1);
469  const bool B_constant = B_1D && (B.getUnderlyingViewSize() == 1);
470  const bool this_constant = this_1D && (this->getUnderlyingViewSize() == 1);
471  const bool A_full = A.underlyingMatchesLogical();
472  const bool B_full = B.underlyingMatchesLogical();
473  const bool this_full = this->underlyingMatchesLogical();
474 
476 
477  const FullArgExtractor<reference_type> fullArgs;
478  const FullArgExtractor<const_reference_type> fullArgsConst;
479  const FullArgExtractorWritableData fullArgsWritable;
480 
487 
488  // this lambda returns -1 if there is not a rank-1 underlying view whose data extent matches the logical extent in the corresponding dimension;
489  // otherwise, it returns the logical index of the corresponding dimension.
490  auto get1DArgIndex = [](const Data<DataScalar,DeviceType> &data) -> int
491  {
492  const auto & variationTypes = data.getVariationTypes();
493  for (int d=0; d<rank; d++)
494  {
495  if (variationTypes[d] == GENERAL)
496  {
497  return d;
498  }
499  }
500  return -1;
501  };
502  if (this_constant)
503  {
504  // then A, B are constant, too
505  auto thisAE = constArg;
506  auto AAE = constArg;
507  auto BAE = constArg;
508  auto & this_underlying = this->getUnderlyingView<1>();
509  auto & A_underlying = A.getUnderlyingView<1>();
510  auto & B_underlying = B.getUnderlyingView<1>();
511  storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
512  }
513  else if (this_full && A_full && B_full)
514  {
515  auto thisAE = fullArgs;
516  auto AAE = fullArgs;
517  auto BAE = fullArgs;
518 
519  auto & this_underlying = this->getUnderlyingView<rank>();
520  auto & A_underlying = A.getUnderlyingView<rank>();
521  auto & B_underlying = B.getUnderlyingView<rank>();
522 
523  storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
524  }
525  else if (A_constant)
526  {
527  auto AAE = constArg;
528  auto & A_underlying = A.getUnderlyingView<1>();
529  if (this_full)
530  {
531  auto thisAE = fullArgs;
532  auto & this_underlying = this->getUnderlyingView<rank>();
533 
534  if (B_full)
535  {
536  auto BAE = fullArgs;
537  auto & B_underlying = B.getUnderlyingView<rank>();
538  storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
539  }
540  else // this_full, not B_full: B may have modular data, etc.
541  {
542  auto BAE = fullArgsConst;
543  storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, AAE, BAE);
544  }
545  }
546  else // this is not full
547  {
548  // below, we optimize for the case of 1D data in B, when A is constant. Still need to handle other cases…
549  if (B_1D && (get1DArgIndex(B) != -1) )
550  {
551  // since A is constant, that implies that this_1D is true, and has the same 1DArgIndex
552  const int argIndex = get1DArgIndex(B);
553  auto & B_underlying = B.getUnderlyingView<1>();
554  auto & this_underlying = this->getUnderlyingView<1>();
555  switch (argIndex)
556  {
557  case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, AAE, arg0); break;
558  case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, AAE, arg1); break;
559  case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, AAE, arg2); break;
560  case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, AAE, arg3); break;
561  case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, AAE, arg4); break;
562  case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, AAE, arg5); break;
563  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
564  }
565  }
566  else
567  {
568  // since storing to Data object requires a call to getWritableEntry(), we use FullArgExtractorWritableData
569  auto thisAE = fullArgsWritable;
570  auto BAE = fullArgsConst;
571  storeInPlaceCombination(policy, thisData, A_underlying, B, binaryOperator, thisAE, AAE, BAE);
572  }
573  }
574  }
575  else if (B_constant)
576  {
577  auto BAE = constArg;
578  auto & B_underlying = B.getUnderlyingView<1>();
579  if (this_full)
580  {
581  auto thisAE = fullArgs;
582  auto & this_underlying = this->getUnderlyingView<rank>();
583  if (A_full)
584  {
585  auto AAE = fullArgs;
586  auto & A_underlying = A.getUnderlyingView<rank>();
587 
588  storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
589  }
590  else // this_full, not A_full: A may have modular data, etc.
591  {
592  // use A (the Data object). This could be further optimized by using A's underlying View and an appropriately-defined ArgExtractor.
593  auto AAE = fullArgsConst;
594  storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, thisAE, AAE, BAE);
595  }
596  }
597  else // this is not full
598  {
599  // below, we optimize for the case of 1D data in A, when B is constant. Still need to handle other cases…
600  if (A_1D && (get1DArgIndex(A) != -1) )
601  {
602  // since B is constant, that implies that this_1D is true, and has the same 1DArgIndex as A
603  const int argIndex = get1DArgIndex(A);
604  auto & A_underlying = A.getUnderlyingView<1>();
605  auto & this_underlying = this->getUnderlyingView<1>();
606  switch (argIndex)
607  {
608  case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, BAE); break;
609  case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, BAE); break;
610  case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, BAE); break;
611  case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, BAE); break;
612  case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, BAE); break;
613  case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, BAE); break;
614  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
615  }
616  }
617  else
618  {
619  // since storing to Data object requires a call to getWritableEntry(), we use FullArgExtractorWritableData
620  auto thisAE = fullArgsWritable;
621  auto AAE = fullArgsConst;
622  storeInPlaceCombination(policy, thisData, A, B_underlying, binaryOperator, thisAE, AAE, BAE);
623  }
624  }
625  }
626  else // neither A nor B constant
627  {
628  if (this_1D && (get1DArgIndex(thisData) != -1))
629  {
630  // possible ways that "this" could have full-extent, 1D data
631  // 1. A constant, B 1D
632  // 2. A 1D, B constant
633  // 3. A 1D, B 1D
634  // The constant possibilities are already addressed above, leaving us with (3). Note that A and B don't have to be full-extent, however
635  const int argThis = get1DArgIndex(thisData);
636  const int argA = get1DArgIndex(A); // if not full-extent, will be -1
637  const int argB = get1DArgIndex(B); // ditto
638 
639  auto & A_underlying = A.getUnderlyingView<1>();
640  auto & B_underlying = B.getUnderlyingView<1>();
641  auto & this_underlying = this->getUnderlyingView<1>();
642  if ((argA != -1) && (argB != -1))
643  {
644 #ifdef INTREPID2_HAVE_DEBUG
645  INTREPID2_TEST_FOR_EXCEPTION(argA != argThis, std::logic_error, "Unexpected 1D arg combination.");
646  INTREPID2_TEST_FOR_EXCEPTION(argB != argThis, std::logic_error, "Unexpected 1D arg combination.");
647 #endif
648  switch (argThis)
649  {
650  case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, arg0); break;
651  case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, arg1); break;
652  case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, arg2); break;
653  case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, arg3); break;
654  case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, arg4); break;
655  case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, arg5); break;
656  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
657  }
658  }
659  else if (argA != -1)
660  {
661  // B is not full-extent in dimension argThis; use the Data object
662  switch (argThis)
663  {
664  case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg0, arg0, fullArgsConst); break;
665  case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg1, arg1, fullArgsConst); break;
666  case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg2, arg2, fullArgsConst); break;
667  case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg3, arg3, fullArgsConst); break;
668  case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg4, arg4, fullArgsConst); break;
669  case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg5, arg5, fullArgsConst); break;
670  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
671  }
672  }
673  else
674  {
675  // A is not full-extent in dimension argThis; use the Data object
676  switch (argThis)
677  {
678  case 0: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg0, fullArgsConst, arg0); break;
679  case 1: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg1, fullArgsConst, arg1); break;
680  case 2: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg2, fullArgsConst, arg2); break;
681  case 3: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg3, fullArgsConst, arg3); break;
682  case 4: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg4, fullArgsConst, arg4); break;
683  case 5: storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg5, fullArgsConst, arg5); break;
684  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
685  }
686  }
687  }
688  else if (this_full)
689  {
690  // This case uses A,B Data objects; could be optimized by dividing into subcases and using underlying Views with appropriate ArgExtractors.
691  auto & this_underlying = this->getUnderlyingView<rank>();
692  auto thisAE = fullArgs;
693 
694  if (A_full)
695  {
696  auto & A_underlying = A.getUnderlyingView<rank>();
697  auto AAE = fullArgs;
698 
699  if (B_1D && (get1DArgIndex(B) != -1))
700  {
701  const int argIndex = get1DArgIndex(B);
702  auto & B_underlying = B.getUnderlyingView<1>();
703  switch (argIndex)
704  {
705  case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg0); break;
706  case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg1); break;
707  case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg2); break;
708  case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg3); break;
709  case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg4); break;
710  case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg5); break;
711  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
712  }
713  }
714  else
715  {
716  // A is full; B is not full, but not constant or full-extent 1D
717  // unoptimized in B access:
718  auto BAE = fullArgsConst;
719  storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, AAE, BAE);
720  }
721  }
722  else // A is not full
723  {
724  if (A_1D && (get1DArgIndex(A) != -1))
725  {
726  const int argIndex = get1DArgIndex(A);
727  auto & A_underlying = A.getUnderlyingView<1>();
728  if (B_full)
729  {
730  auto & B_underlying = B.getUnderlyingView<rank>();
731  auto BAE = fullArgs;
732  switch (argIndex)
733  {
734  case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg0, BAE); break;
735  case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg1, BAE); break;
736  case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg2, BAE); break;
737  case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg3, BAE); break;
738  case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg4, BAE); break;
739  case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg5, BAE); break;
740  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
741  }
742  }
743  else
744  {
745  auto BAE = fullArgsConst;
746  switch (argIndex)
747  {
748  case 0: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg0, BAE); break;
749  case 1: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg1, BAE); break;
750  case 2: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg2, BAE); break;
751  case 3: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg3, BAE); break;
752  case 4: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg4, BAE); break;
753  case 5: storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg5, BAE); break;
754  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid/unexpected arg index");
755  }
756  }
757  }
758  else // A not full, and not full-extent 1D
759  {
760  // unoptimized in A, B accesses.
761  auto AAE = fullArgsConst;
762  auto BAE = fullArgsConst;
763  storeInPlaceCombination(policy, this_underlying, A, B, binaryOperator, thisAE, AAE, BAE);
764  }
765  }
766  }
767  else
768  {
769  // completely un-optimized case: we use Data objects for this, A, B.
770  auto thisAE = fullArgsWritable;
771  auto AAE = fullArgsConst;
772  auto BAE = fullArgsConst;
773  storeInPlaceCombination(policy, thisData, A, B, binaryOperator, thisAE, AAE, BAE);
774  }
775  }
776  }
777 
779  template<class BinaryOperator, int rank>
780  enable_if_t<rank == 7, void>
781  storeInPlaceCombination(const Data<DataScalar,DeviceType> &A, const Data<DataScalar,DeviceType> &B, BinaryOperator binaryOperator)
782  {
783  auto policy = dataExtentRangePolicy<rank>();
784 
785  using DataType = Data<DataScalar,DeviceType>;
786  using ThisAE = FullArgExtractorWritableData;
789 
790  const ordinal_type dim6 = getDataExtent(6);
791  const bool includeInnerLoop = true;
792  using Functor = InPlaceCombinationFunctor<BinaryOperator, DataType, DataType, DataType, ThisAE, AAE, BAE, includeInnerLoop>;
793  Functor functor(*this, A, B, binaryOperator, dim6);
794  Kokkos::parallel_for("compute in-place", policy, functor);
795  }
796  public:
798  template<class UnaryOperator>
799  void applyOperator(UnaryOperator unaryOperator)
800  {
801  using ExecutionSpace = typename DeviceType::execution_space;
802 
803  switch (dataRank_)
804  {
805  case 1:
806  {
807  const int dataRank = 1;
808  auto view = getUnderlyingView<dataRank>();
809 
810  const int dataExtent = this->getDataExtent(0);
811  Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,dataExtent);
812  Kokkos::parallel_for("apply operator in-place", policy,
813  KOKKOS_LAMBDA (const int &i0) {
814  view(i0) = unaryOperator(view(i0));
815  });
816 
817  }
818  break;
819  case 2:
820  {
821  const int dataRank = 2;
822  auto policy = dataExtentRangePolicy<dataRank>();
823  auto view = getUnderlyingView<dataRank>();
824 
825  Kokkos::parallel_for("apply operator in-place", policy,
826  KOKKOS_LAMBDA (const int &i0, const int &i1) {
827  view(i0,i1) = unaryOperator(view(i0,i1));
828  });
829  }
830  break;
831  case 3:
832  {
833  const int dataRank = 3;
834  auto policy = dataExtentRangePolicy<dataRank>();
835  auto view = getUnderlyingView<dataRank>();
836 
837  Kokkos::parallel_for("apply operator in-place", policy,
838  KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2) {
839  view(i0,i1,i2) = unaryOperator(view(i0,i1,i2));
840  });
841  }
842  break;
843  case 4:
844  {
845  const int dataRank = 4;
846  auto policy = dataExtentRangePolicy<dataRank>();
847  auto view = getUnderlyingView<dataRank>();
848 
849  Kokkos::parallel_for("apply operator in-place", policy,
850  KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3) {
851  view(i0,i1,i2,i3) = unaryOperator(view(i0,i1,i2,i3));
852  });
853  }
854  break;
855  case 5:
856  {
857  const int dataRank = 5;
858  auto policy = dataExtentRangePolicy<dataRank>();
859  auto view = getUnderlyingView<dataRank>();
860 
861  Kokkos::parallel_for("apply operator in-place", policy,
862  KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4) {
863  view(i0,i1,i2,i3,i4) = unaryOperator(view(i0,i1,i2,i3,i4));
864  });
865  }
866  break;
867  case 6:
868  {
869  const int dataRank = 6;
870  auto policy = dataExtentRangePolicy<dataRank>();
871  auto view = getUnderlyingView<dataRank>();
872 
873  Kokkos::parallel_for("apply operator in-place", policy,
874  KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4, const int &i5) {
875  view(i0,i1,i2,i3,i4,i5) = unaryOperator(view(i0,i1,i2,i3,i4,i5));
876  });
877  }
878  break;
879  case 7:
880  {
881  const int dataRank = 7;
882  auto policy6 = dataExtentRangePolicy<6>();
883  auto view = getUnderlyingView<dataRank>();
884 
885  const int dim_i6 = view.extent_int(6);
886 
887  Kokkos::parallel_for("apply operator in-place", policy6,
888  KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4, const int &i5) {
889  for (int i6=0; i6<dim_i6; i6++)
890  {
891  view(i0,i1,i2,i3,i4,i5,i6) = unaryOperator(view(i0,i1,i2,i3,i4,i5,i6));
892  }
893  });
894  }
895  break;
896  default:
897  INTREPID2_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Unsupported data rank");
898  }
899  }
900 
902  template<class ...IntArgs>
903  KOKKOS_INLINE_FUNCTION
904  reference_type getWritableEntry(const IntArgs... intArgs) const
905  {
906 #ifdef INTREPID2_HAVE_DEBUG
907  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(numArgs != rank_, std::invalid_argument, "getWritableEntry() should have the same number of arguments as the logical rank.");
908 #endif
909  constexpr int numArgs = sizeof...(intArgs);
910  if (underlyingMatchesLogical_)
911  {
912  // in this case, we require that numArgs == dataRank_
913  return getUnderlyingView<numArgs>()(intArgs...);
914  }
915 
916  // extract the type of the first argument; use that for the arrays below
917  using int_type = std::tuple_element_t<0, std::tuple<IntArgs...>>;
918 
919  const Kokkos::Array<int_type, numArgs> args {intArgs...};
920  Kokkos::Array<int_type, 7> refEntry;
921  for (int d=0; d<numArgs; d++)
922  {
923  switch (variationType_[d])
924  {
925  case CONSTANT: refEntry[d] = 0; break;
926  case GENERAL: refEntry[d] = args[d]; break;
927  case MODULAR: refEntry[d] = args[d] % variationModulus_[d]; break;
928  case BLOCK_PLUS_DIAGONAL:
929  {
930  const int numNondiagonalEntries = blockPlusDiagonalNumNondiagonalEntries(blockPlusDiagonalLastNonDiagonal_);
931 
932  const int_type &i = args[d];
933  if (d+1 >= numArgs)
934  {
935  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::invalid_argument, "BLOCK_PLUS_DIAGONAL must be present for two dimensions; here, encountered only one");
936  }
937  else
938  {
939  const int_type &j = args[d+1];
940 
941  if ((i > static_cast<int_type>(blockPlusDiagonalLastNonDiagonal_)) || (j > static_cast<int_type>(blockPlusDiagonalLastNonDiagonal_)))
942  {
943  if (i != j)
944  {
945  // off diagonal: zero
946  return zeroView_(0); // NOTE: this branches in an argument-dependent way; this is not great for CUDA performance. When using BLOCK_PLUS_DIAGONAL, should generally avoid calls to this getEntry() method. (Use methods that directly take advantage of the data packing instead.)
947  }
948  else
949  {
950  refEntry[d] = blockPlusDiagonalDiagonalEntryIndex(blockPlusDiagonalLastNonDiagonal_, numNondiagonalEntries, i);
951  }
952  }
953  else
954  {
955  refEntry[d] = blockPlusDiagonalBlockEntryIndex(blockPlusDiagonalLastNonDiagonal_, numNondiagonalEntries, i, j);
956  }
957 
958  // skip next d (this is required also to be BLOCK_PLUS_DIAGONAL, and we've consumed its arg as j above)
959  refEntry[d+1] = 0;
960  }
961  d++;
962  }
963  }
964  }
965  // refEntry should be zero-filled beyond numArgs, for cases when rank_ < dataRank_ (this only is allowed if the extra dimensions each has extent 1).
966  for (int d=numArgs; d<7; d++)
967  {
968  refEntry[d] = 0;
969  }
970 
971  if (dataRank_ == 1)
972  {
973  return data1_(refEntry[activeDims_[0]]);
974  }
975  else if (dataRank_ == 2)
976  {
977  return data2_(refEntry[activeDims_[0]],refEntry[activeDims_[1]]);
978  }
979  else if (dataRank_ == 3)
980  {
981  return data3_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]]);
982  }
983  else if (dataRank_ == 4)
984  {
985  return data4_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]]);
986  }
987  else if (dataRank_ == 5)
988  {
989  return data5_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]],
990  refEntry[activeDims_[4]]);
991  }
992  else if (dataRank_ == 6)
993  {
994  return data6_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]],
995  refEntry[activeDims_[4]],refEntry[activeDims_[5]]);
996  }
997  else // dataRank_ == 7
998  {
999  return data7_(refEntry[activeDims_[0]],refEntry[activeDims_[1]],refEntry[activeDims_[2]],refEntry[activeDims_[3]],
1000  refEntry[activeDims_[4]],refEntry[activeDims_[5]],refEntry[activeDims_[6]]);
1001  }
1002  }
1003  public:
1005  template<class ToContainer, class FromContainer>
1006  static void copyContainer(ToContainer to, FromContainer from)
1007  {
1008 // std::cout << "Entered copyContainer().\n";
1009  auto policy = Kokkos::MDRangePolicy<execution_space,Kokkos::Rank<6>>({0,0,0,0,0,0},{from.extent_int(0),from.extent_int(1),from.extent_int(2), from.extent_int(3), from.extent_int(4), from.extent_int(5)});
1010 
1011  Kokkos::parallel_for("copyContainer", policy,
1012  KOKKOS_LAMBDA (const int &i0, const int &i1, const int &i2, const int &i3, const int &i4, const int &i5) {
1013  for (int i6=0; i6<from.extent_int(6); i6++)
1014  {
1015  to.access(i0,i1,i2,i3,i4,i5,i6) = from.access(i0,i1,i2,i3,i4,i5,i6);
1016  }
1017  });
1018  }
1019 
1021  void allocateAndCopyFromDynRankView(ScalarView<DataScalar,DeviceType> data)
1022  {
1023 // std::cout << "Entered allocateAndCopyFromDynRankView().\n";
1024  switch (dataRank_)
1025  {
1026  case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data", data.extent_int(0)); break;
1027  case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1)); break;
1028  case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2)); break;
1029  case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3)); break;
1030  case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3), data.extent_int(4)); break;
1031  case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3), data.extent_int(4), data.extent_int(5)); break;
1032  case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data", data.extent_int(0), data.extent_int(1), data.extent_int(2), data.extent_int(3), data.extent_int(4), data.extent_int(5), data.extent_int(6)); break;
1033  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1034  }
1035 
1036  switch (dataRank_)
1037  {
1038  case 1: copyContainer(data1_,data); break;
1039  case 2: copyContainer(data2_,data); break;
1040  case 3: copyContainer(data3_,data); break;
1041  case 4: copyContainer(data4_,data); break;
1042  case 5: copyContainer(data5_,data); break;
1043  case 6: copyContainer(data6_,data); break;
1044  case 7: copyContainer(data7_,data); break;
1045  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1046  }
1047  }
1048 
1050  Data(std::vector<DimensionInfo> dimInfoVector)
1051  :
1052  // initialize member variables as if default constructor; if dimInfoVector is empty, we want default constructor behavior.
1053  dataRank_(0), extents_({0,0,0,0,0,0,0}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(-1), rank_(dimInfoVector.size())
1054  {
1055  // If dimInfoVector is empty, the member initialization above is correct; otherwise, we set as below.
1056  // Either way, once members are initialized, we must call setActiveDims().
1057  if (dimInfoVector.size() != 0)
1058  {
1059  std::vector<int> dataExtents;
1060 
1061  bool blockPlusDiagonalEncountered = true;
1062  for (int d=0; d<rank_; d++)
1063  {
1064  const DimensionInfo & dimInfo = dimInfoVector[d];
1065  extents_[d] = dimInfo.logicalExtent;
1066  variationType_[d] = dimInfo.variationType;
1067  const bool isBlockPlusDiagonal = (variationType_[d] == BLOCK_PLUS_DIAGONAL);
1068  const bool isSecondBlockPlusDiagonal = isBlockPlusDiagonal && blockPlusDiagonalEncountered;
1069  if (isBlockPlusDiagonal)
1070  {
1071  blockPlusDiagonalEncountered = true;
1072  blockPlusDiagonalLastNonDiagonal_ = dimInfo.blockPlusDiagonalLastNonDiagonal;
1073  }
1074  if ((variationType_[d] != CONSTANT) && (!isSecondBlockPlusDiagonal))
1075  {
1076  dataExtents.push_back(dimInfo.dataExtent);
1077  }
1078  }
1079  if (dataExtents.size() == 0)
1080  {
1081  // constant data
1082  dataExtents.push_back(1);
1083  }
1084  dataRank_ = dataExtents.size();
1085  switch (dataRank_)
1086  {
1087  case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data", dataExtents[0]); break;
1088  case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1]); break;
1089  case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2]); break;
1090  case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3]); break;
1091  case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4]); break;
1092  case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4], dataExtents[5]); break;
1093  case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data", dataExtents[0], dataExtents[1], dataExtents[2], dataExtents[3], dataExtents[4], dataExtents[5], dataExtents[6]); break;
1094  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1095  }
1096  }
1097  setActiveDims();
1098  }
1099 
1101  Data(const ScalarView<DataScalar,DeviceType> &data, int rank, Kokkos::Array<int,7> extents, Kokkos::Array<DataVariationType,7> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1102  :
1103  dataRank_(data.rank()), extents_(extents), variationType_(variationType), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1104  {
1106  setActiveDims();
1107  }
1108 
1110  template<typename OtherDeviceType, class = typename std::enable_if< std::is_same<typename DeviceType::memory_space, typename OtherDeviceType::memory_space>::value>::type,
1111  class = typename std::enable_if<!std::is_same<DeviceType,OtherDeviceType>::value>::type>
1112  Data(const Data<DataScalar,OtherDeviceType> &data)
1113  :
1114  dataRank_(data.getUnderlyingViewRank()), extents_(data.getExtents()), variationType_(data.getVariationTypes()), blockPlusDiagonalLastNonDiagonal_(data.blockPlusDiagonalLastNonDiagonal()), rank_(data.rank())
1115  {
1116 // std::cout << "Entered copy-like Data constructor.\n";
1117  if (dataRank_ != 0) // dataRank_ == 0 indicates an invalid Data object (a placeholder, can indicate zero value)
1118  {
1119  const auto view = data.getUnderlyingView();
1120  switch (dataRank_)
1121  {
1122  case 1: data1_ = data.getUnderlyingView1(); break;
1123  case 2: data2_ = data.getUnderlyingView2(); break;
1124  case 3: data3_ = data.getUnderlyingView3(); break;
1125  case 4: data4_ = data.getUnderlyingView4(); break;
1126  case 5: data5_ = data.getUnderlyingView5(); break;
1127  case 6: data6_ = data.getUnderlyingView6(); break;
1128  case 7: data7_ = data.getUnderlyingView7(); break;
1129  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1130  }
1131  }
1132  setActiveDims();
1133  }
1134 
1136  template<typename OtherDeviceType, class = typename std::enable_if<!std::is_same<typename DeviceType::memory_space, typename OtherDeviceType::memory_space>::value>::type>
1138  :
1139  dataRank_(data.getUnderlyingViewRank()), extents_(data.getExtents()), variationType_(data.getVariationTypes()), blockPlusDiagonalLastNonDiagonal_(data.blockPlusDiagonalLastNonDiagonal()), rank_(data.rank())
1140  {
1141 // std::cout << "Entered copy-like Data constructor.\n";
1142  if (dataRank_ != 0) // dataRank_ == 0 indicates an invalid Data object (a placeholder, can indicate zero value)
1143  {
1144  const auto view = data.getUnderlyingView();
1145  switch (dataRank_)
1146  {
1147  case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data", view.extent_int(0)); break;
1148  case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1)); break;
1149  case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2)); break;
1150  case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3)); break;
1151  case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4)); break;
1152  case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5)); break;
1153  case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5), view.extent_int(6)); break;
1154  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1155  }
1156 
1157  // copy
1158  // (Note: Kokkos::deep_copy() will not generally work if the layouts are different; that's why we do a manual copy here once we have the data on the host):
1159  // first, mirror and copy dataView; then copy to the appropriate data_ member
1160  using MemorySpace = typename DeviceType::memory_space;
1161  switch (dataRank_)
1162  {
1163  case 1: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView1()); copyContainer(data1_, dataViewMirror);} break;
1164  case 2: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView2()); copyContainer(data2_, dataViewMirror);} break;
1165  case 3: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView3()); copyContainer(data3_, dataViewMirror);} break;
1166  case 4: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView4()); copyContainer(data4_, dataViewMirror);} break;
1167  case 5: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView5()); copyContainer(data5_, dataViewMirror);} break;
1168  case 6: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView6()); copyContainer(data6_, dataViewMirror);} break;
1169  case 7: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView7()); copyContainer(data7_, dataViewMirror);} break;
1170  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1171  }
1172  }
1173  setActiveDims();
1174  }
1175 
1177 // Data(const Data<DataScalar,ExecSpaceType> &data)
1178 // :
1179 // dataRank_(data.getUnderlyingViewRank()), extents_(data.getExtents()), variationType_(data.getVariationTypes()), blockPlusDiagonalLastNonDiagonal_(data.blockPlusDiagonalLastNonDiagonal()), rank_(data.rank())
1180 // {
1181 // std::cout << "Entered Data copy constructor.\n";
1182 // if (dataRank_ != 0) // dataRank_ == 0 indicates an invalid Data object (a placeholder, can indicate zero value)
1183 // {
1184 // const auto view = data.getUnderlyingView();
1185 // switch (dataRank_)
1186 // {
1187 // case 1: data1_ = Kokkos::View<DataScalar*, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0)); break;
1188 // case 2: data2_ = Kokkos::View<DataScalar**, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1)); break;
1189 // case 3: data3_ = Kokkos::View<DataScalar***, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2)); break;
1190 // case 4: data4_ = Kokkos::View<DataScalar****, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3)); break;
1191 // case 5: data5_ = Kokkos::View<DataScalar*****, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4)); break;
1192 // case 6: data6_ = Kokkos::View<DataScalar******, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5)); break;
1193 // case 7: data7_ = Kokkos::View<DataScalar*******, DeviceType>("Intrepid2 Data - explicit copy constructor(for debugging)", view.extent_int(0), view.extent_int(1), view.extent_int(2), view.extent_int(3), view.extent_int(4), view.extent_int(5), view.extent_int(6)); break;
1194 // default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1195 // }
1196 //
1197 // // copy
1198 // // (Note: Kokkos::deep_copy() will not generally work if the layouts are different; that's why we do a manual copy here once we have the data on the host):
1199 // // first, mirror and copy dataView; then copy to the appropriate data_ member
1200 // using MemorySpace = typename DeviceType::memory_space;
1201 // switch (dataRank_)
1202 // {
1203 // case 1: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView1()); copyContainer(data1_, dataViewMirror);} break;
1204 // case 2: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView2()); copyContainer(data2_, dataViewMirror);} break;
1205 // case 3: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView3()); copyContainer(data3_, dataViewMirror);} break;
1206 // case 4: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView4()); copyContainer(data4_, dataViewMirror);} break;
1207 // case 5: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView5()); copyContainer(data5_, dataViewMirror);} break;
1208 // case 6: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView6()); copyContainer(data6_, dataViewMirror);} break;
1209 // case 7: {auto dataViewMirror = Kokkos::create_mirror_view_and_copy(MemorySpace(), data.getUnderlyingView7()); copyContainer(data7_, dataViewMirror);} break;
1210 // default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1211 // }
1212 // }
1213 //
1214 // setActiveDims();
1215 // }
1216 
1218  Data(ScalarView<DataScalar,DeviceType> data)
1219  :
1220  Data(data,
1221  data.rank(),
1222  Kokkos::Array<int,7> {data.extent_int(0),data.extent_int(1),data.extent_int(2),data.extent_int(3),data.extent_int(4),data.extent_int(5),data.extent_int(6)},
1223  Kokkos::Array<DataVariationType,7> {GENERAL,GENERAL,GENERAL,GENERAL,GENERAL,GENERAL,GENERAL}, -1)
1224  {}
1225 
1227  template<size_t rank, class ...DynRankViewProperties>
1228  Data(const Kokkos::DynRankView<DataScalar,DeviceType, DynRankViewProperties...> &data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1229  :
1230  dataRank_(data.rank()), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1231  {
1232 // std::cout << "Entered a DynRankView Data() constructor.\n";
1234  for (unsigned d=0; d<rank; d++)
1235  {
1236  extents_[d] = extents[d];
1237  variationType_[d] = variationType[d];
1238  }
1239  setActiveDims();
1240  }
1241 
1242  template<size_t rank, class ...ViewProperties>
1243  Data(Kokkos::View<DataScalar*,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1244  :
1245  dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1246  {
1247  data1_ = data;
1248  for (unsigned d=0; d<rank; d++)
1249  {
1250  extents_[d] = extents[d];
1251  variationType_[d] = variationType[d];
1252  }
1253  setActiveDims();
1254  }
1255 
1256  template<size_t rank, class ...ViewProperties>
1257  Data(Kokkos::View<DataScalar**,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1258  :
1259  dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1260  {
1261  data2_ = data;
1262  for (unsigned d=0; d<rank; d++)
1263  {
1264  extents_[d] = extents[d];
1265  variationType_[d] = variationType[d];
1266  }
1267  setActiveDims();
1268  }
1269 
1270  template<size_t rank, class ...ViewProperties>
1271  Data(Kokkos::View<DataScalar***,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1272  :
1273  dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1274  {
1275  data3_ = data;
1276  for (unsigned d=0; d<rank; d++)
1277  {
1278  extents_[d] = extents[d];
1279  variationType_[d] = variationType[d];
1280  }
1281  setActiveDims();
1282  }
1283 
1284  template<size_t rank, class ...ViewProperties>
1285  Data(Kokkos::View<DataScalar****,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1286  :
1287  dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1288  {
1289  data4_ = data;
1290  for (unsigned d=0; d<rank; d++)
1291  {
1292  extents_[d] = extents[d];
1293  variationType_[d] = variationType[d];
1294  }
1295  setActiveDims();
1296  }
1297 
1298  template<size_t rank, class ...ViewProperties>
1299  Data(Kokkos::View<DataScalar*****,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1300  :
1301  dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1302  {
1303  data5_ = data;
1304  for (unsigned d=0; d<rank; d++)
1305  {
1306  extents_[d] = extents[d];
1307  variationType_[d] = variationType[d];
1308  }
1309  setActiveDims();
1310  }
1311 
1312  template<size_t rank, class ...ViewProperties>
1313  Data(Kokkos::View<DataScalar******,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1314  :
1315  dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1316  {
1317  data6_ = data;
1318  for (unsigned d=0; d<rank; d++)
1319  {
1320  extents_[d] = extents[d];
1321  variationType_[d] = variationType[d];
1322  }
1323  setActiveDims();
1324  }
1325 
1326  template<size_t rank, class ...ViewProperties>
1327  Data(Kokkos::View<DataScalar*******,DeviceType, ViewProperties...> data, Kokkos::Array<int,rank> extents, Kokkos::Array<DataVariationType,rank> variationType, const int blockPlusDiagonalLastNonDiagonal = -1)
1328  :
1329  dataRank_(data.rank), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(blockPlusDiagonalLastNonDiagonal), rank_(rank)
1330  {
1331  data7_ = data;
1332  for (unsigned d=0; d<rank; d++)
1333  {
1334  extents_[d] = extents[d];
1335  variationType_[d] = variationType[d];
1336  }
1337  setActiveDims();
1338  }
1339 
1341  template<size_t rank>
1342  Data(DataScalar constantValue, Kokkos::Array<int,rank> extents)
1343  :
1344  dataRank_(1), extents_({1,1,1,1,1,1,1}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(-1), rank_(rank)
1345  {
1346  data1_ = Kokkos::View<DataScalar*,DeviceType>("Constant Data",1);
1347  Kokkos::deep_copy(data1_, constantValue);
1348  for (unsigned d=0; d<rank; d++)
1349  {
1350  extents_[d] = extents[d];
1351  }
1352  setActiveDims();
1353  }
1354 
1357  :
1358  dataRank_(0), extents_({0,0,0,0,0,0,0}), variationType_({CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT,CONSTANT}), blockPlusDiagonalLastNonDiagonal_(-1), rank_(0)
1359  {
1360  setActiveDims();
1361  }
1362 
1364  KOKKOS_INLINE_FUNCTION
1366  {
1367  return blockPlusDiagonalLastNonDiagonal_;
1368  }
1369 
1371  KOKKOS_INLINE_FUNCTION
1372  Kokkos::Array<int,7> getExtents() const
1373  {
1374  return extents_;
1375  }
1376 
1378  KOKKOS_INLINE_FUNCTION
1379  DimensionInfo getDimensionInfo(const int &dim) const
1380  {
1381  DimensionInfo dimInfo;
1382 
1383  dimInfo.logicalExtent = extent_int(dim);
1384  dimInfo.variationType = variationType_[dim];
1385  dimInfo.dataExtent = getDataExtent(dim);
1386  dimInfo.variationModulus = variationModulus_[dim];
1387 
1388  if (dimInfo.variationType == BLOCK_PLUS_DIAGONAL)
1389  {
1390  dimInfo.blockPlusDiagonalLastNonDiagonal = blockPlusDiagonalLastNonDiagonal_;
1391  }
1392  return dimInfo;
1393  }
1394 
1396  KOKKOS_INLINE_FUNCTION
1397  DimensionInfo combinedDataDimensionInfo(const Data &otherData, const int &dim) const
1398  {
1399  const DimensionInfo myDimInfo = getDimensionInfo(dim);
1400  const DimensionInfo otherDimInfo = otherData.getDimensionInfo(dim);
1401 
1402  return combinedDimensionInfo(myDimInfo, otherDimInfo);
1403  }
1404 
1406  template<int rank>
1407  KOKKOS_INLINE_FUNCTION
1408  enable_if_t<rank==1, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1410  {
1411  #ifdef HAVE_INTREPID2_DEBUG
1412  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1413  #endif
1414  return data1_;
1415  }
1416 
1418  template<int rank>
1419  KOKKOS_INLINE_FUNCTION
1420  enable_if_t<rank==2, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1422  {
1423  #ifdef HAVE_INTREPID2_DEBUG
1424  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1425  #endif
1426  return data2_;
1427  }
1428 
1430  template<int rank>
1431  KOKKOS_INLINE_FUNCTION
1432  enable_if_t<rank==3, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1434  {
1435  #ifdef HAVE_INTREPID2_DEBUG
1436  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1437  #endif
1438  return data3_;
1439  }
1440 
1442  template<int rank>
1443  KOKKOS_INLINE_FUNCTION
1444  enable_if_t<rank==4, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1446  {
1447  #ifdef HAVE_INTREPID2_DEBUG
1448  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1449  #endif
1450  return data4_;
1451  }
1452 
1454  template<int rank>
1455  KOKKOS_INLINE_FUNCTION
1456  enable_if_t<rank==5, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1458  {
1459  #ifdef HAVE_INTREPID2_DEBUG
1460  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1461  #endif
1462  return data5_;
1463  }
1464 
1466  template<int rank>
1467  KOKKOS_INLINE_FUNCTION
1468  enable_if_t<rank==6, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1470  {
1471  #ifdef HAVE_INTREPID2_DEBUG
1472  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1473  #endif
1474  return data6_;
1475  }
1476 
1478  template<int rank>
1479  KOKKOS_INLINE_FUNCTION
1480  enable_if_t<rank==7, const Kokkos::View<typename RankExpander<DataScalar, rank>::value_type, DeviceType> &>
1482  {
1483  #ifdef HAVE_INTREPID2_DEBUG
1484  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(dataRank_ != rank, std::invalid_argument, "getUnderlyingView() called for rank that does not match dataRank_");
1485  #endif
1486  return data7_;
1487  }
1488 
1490  KOKKOS_INLINE_FUNCTION
1491  const Kokkos::View<DataScalar*, DeviceType> & getUnderlyingView1() const
1492  {
1493  return getUnderlyingView<1>();
1494  }
1495 
1497  KOKKOS_INLINE_FUNCTION
1498  const Kokkos::View<DataScalar**, DeviceType> & getUnderlyingView2() const
1499  {
1500  return getUnderlyingView<2>();
1501  }
1502 
1504  KOKKOS_INLINE_FUNCTION
1505  const Kokkos::View<DataScalar***, DeviceType> & getUnderlyingView3() const
1506  {
1507  return getUnderlyingView<3>();
1508  }
1509 
1511  KOKKOS_INLINE_FUNCTION
1512  const Kokkos::View<DataScalar****, DeviceType> & getUnderlyingView4() const
1513  {
1514  return getUnderlyingView<4>();
1515  }
1516 
1518  KOKKOS_INLINE_FUNCTION
1519  const Kokkos::View<DataScalar*****, DeviceType> & getUnderlyingView5() const
1520  {
1521  return getUnderlyingView<5>();
1522  }
1523 
1525  KOKKOS_INLINE_FUNCTION
1526  const Kokkos::View<DataScalar******, DeviceType> & getUnderlyingView6() const
1527  {
1528  return getUnderlyingView<6>();
1529  }
1530 
1532  KOKKOS_INLINE_FUNCTION
1533  const Kokkos::View<DataScalar*******, DeviceType> & getUnderlyingView7() const
1534  {
1535  return getUnderlyingView<7>();
1536  }
1537 
1539  KOKKOS_INLINE_FUNCTION
1540  void setUnderlyingView1(Kokkos::View<DataScalar*, DeviceType> & view) const
1541  {
1542  data1_ = view;
1543  }
1544 
1546  KOKKOS_INLINE_FUNCTION
1547  void setUnderlyingView2(Kokkos::View<DataScalar**, DeviceType> & view) const
1548  {
1549  data2_ = view;
1550  }
1551 
1553  KOKKOS_INLINE_FUNCTION
1554  void setUnderlyingView3(Kokkos::View<DataScalar***, DeviceType> & view) const
1555  {
1556  data3_ = view;
1557  }
1558 
1560  KOKKOS_INLINE_FUNCTION
1561  void setUnderlyingView4(Kokkos::View<DataScalar****, DeviceType> & view) const
1562  {
1563  data4_ = view;
1564  }
1565 
1567  KOKKOS_INLINE_FUNCTION
1568  void setUnderlyingView5(Kokkos::View<DataScalar*****, DeviceType> & view) const
1569  {
1570  data5_ = view;
1571  }
1572 
1574  KOKKOS_INLINE_FUNCTION
1575  void setUnderlyingView6(Kokkos::View<DataScalar******, DeviceType> & view) const
1576  {
1577  data6_ = view;
1578  }
1579 
1581  KOKKOS_INLINE_FUNCTION
1582  void setUnderlyingView7(Kokkos::View<DataScalar*******, DeviceType> & view) const
1583  {
1584  data7_ = view;
1585  }
1586 
1588  ScalarView<DataScalar,DeviceType> getUnderlyingView() const
1589  {
1590  switch (dataRank_)
1591  {
1592  case 1: return data1_;
1593  case 2: return data2_;
1594  case 3: return data3_;
1595  case 4: return data4_;
1596  case 5: return data5_;
1597  case 6: return data6_;
1598  case 7: return data7_;
1599  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1600  }
1601  }
1602 
1604  KOKKOS_INLINE_FUNCTION
1605  ordinal_type getUnderlyingViewRank() const
1606  {
1607  return dataRank_;
1608  }
1609 
1611  KOKKOS_INLINE_FUNCTION
1612  ordinal_type getUnderlyingViewSize() const
1613  {
1614  ordinal_type size = 1;
1615  for (ordinal_type r=0; r<dataRank_; r++)
1616  {
1617  size *= getUnderlyingViewExtent(r);
1618  }
1619  return size;
1620  }
1621 
1623  ScalarView<DataScalar,DeviceType> allocateDynRankViewMatchingUnderlying() const
1624  {
1625  switch (dataRank_)
1626  {
1627  case 1: return getMatchingViewWithLabel(data1_, "Intrepid2 Data", data1_.extent_int(0));
1628  case 2: return getMatchingViewWithLabel(data2_, "Intrepid2 Data", data2_.extent_int(0), data2_.extent_int(1));
1629  case 3: return getMatchingViewWithLabel(data3_, "Intrepid2 Data", data3_.extent_int(0), data3_.extent_int(1), data3_.extent_int(2));
1630  case 4: return getMatchingViewWithLabel(data4_, "Intrepid2 Data", data4_.extent_int(0), data4_.extent_int(1), data4_.extent_int(2), data4_.extent_int(3));
1631  case 5: return getMatchingViewWithLabel(data5_, "Intrepid2 Data", data5_.extent_int(0), data5_.extent_int(1), data5_.extent_int(2), data5_.extent_int(3), data5_.extent_int(4));
1632  case 6: return getMatchingViewWithLabel(data6_, "Intrepid2 Data", data6_.extent_int(0), data6_.extent_int(1), data6_.extent_int(2), data6_.extent_int(3), data6_.extent_int(4), data6_.extent_int(5));
1633  case 7: return getMatchingViewWithLabel(data7_, "Intrepid2 Data", data7_.extent_int(0), data7_.extent_int(1), data7_.extent_int(2), data7_.extent_int(3), data7_.extent_int(4), data7_.extent_int(5), data7_.extent_int(6));
1634  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1635  }
1636  }
1637 
1639  template<class ... DimArgs>
1640  ScalarView<DataScalar,DeviceType> allocateDynRankViewMatchingUnderlying(DimArgs... dims) const
1641  {
1642  switch (dataRank_)
1643  {
1644  case 1: return getMatchingViewWithLabel(data1_, "Intrepid2 Data", dims...);
1645  case 2: return getMatchingViewWithLabel(data2_, "Intrepid2 Data", dims...);
1646  case 3: return getMatchingViewWithLabel(data3_, "Intrepid2 Data", dims...);
1647  case 4: return getMatchingViewWithLabel(data4_, "Intrepid2 Data", dims...);
1648  case 5: return getMatchingViewWithLabel(data5_, "Intrepid2 Data", dims...);
1649  case 6: return getMatchingViewWithLabel(data6_, "Intrepid2 Data", dims...);
1650  case 7: return getMatchingViewWithLabel(data7_, "Intrepid2 Data", dims...);
1651  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1652  }
1653  }
1654 
1656  void clear() const
1657  {
1658  switch (dataRank_)
1659  {
1660  case 1: Kokkos::deep_copy(data1_, 0.0); break;
1661  case 2: Kokkos::deep_copy(data2_, 0.0); break;
1662  case 3: Kokkos::deep_copy(data3_, 0.0); break;
1663  case 4: Kokkos::deep_copy(data4_, 0.0); break;
1664  case 5: Kokkos::deep_copy(data5_, 0.0); break;
1665  case 6: Kokkos::deep_copy(data6_, 0.0); break;
1666  case 7: Kokkos::deep_copy(data7_, 0.0); break;
1667  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1668  }
1669  }
1670 
1672  void copyDataFromDynRankViewMatchingUnderlying(const ScalarView<DataScalar,DeviceType> &dynRankView) const
1673  {
1674 // std::cout << "Entered copyDataFromDynRankViewMatchingUnderlying().\n";
1675  switch (dataRank_)
1676  {
1677  case 1: copyContainer(data1_,dynRankView); break;
1678  case 2: copyContainer(data2_,dynRankView); break;
1679  case 3: copyContainer(data3_,dynRankView); break;
1680  case 4: copyContainer(data4_,dynRankView); break;
1681  case 5: copyContainer(data5_,dynRankView); break;
1682  case 6: copyContainer(data6_,dynRankView); break;
1683  case 7: copyContainer(data7_,dynRankView); break;
1684  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Invalid data rank");
1685  }
1686  }
1687 
1689  KOKKOS_INLINE_FUNCTION int getDataExtent(const ordinal_type &d) const
1690  {
1691  for (unsigned i=0; i<activeDims_.size(); i++)
1692  {
1693  if (activeDims_[i] == d)
1694  {
1695  return getUnderlyingViewExtent(i);
1696  }
1697  else if (activeDims_[i] > d)
1698  {
1699  return 1; // data does not vary in the specified dimension
1700  }
1701  }
1702  return 1; // data does not vary in the specified dimension
1703  }
1704 
1716  KOKKOS_INLINE_FUNCTION
1717  int getVariationModulus(const int &d) const
1718  {
1719  return variationModulus_[d];
1720  }
1721 
1723  KOKKOS_INLINE_FUNCTION
1724  const Kokkos::Array<DataVariationType,7> & getVariationTypes() const
1725  {
1726  return variationType_;
1727  }
1728 
1730  template<class ...IntArgs>
1731  KOKKOS_INLINE_FUNCTION
1732  return_type getEntry(const IntArgs&... intArgs) const
1733  {
1734  return getWritableEntry(intArgs...);
1735  }
1736 
1737  template <bool...> struct bool_pack;
1738 
1739  template <bool... v>
1740  using all_true = std::is_same<bool_pack<true, v...>, bool_pack<v..., true>>;
1741 
1742  template <class ...IntArgs>
1743  using valid_args = all_true<std::is_integral<IntArgs>{}...>;
1744 
1745  static_assert(valid_args<int,long,unsigned>::value, "valid args works");
1746 
1748  template <class ...IntArgs>
1749  KOKKOS_INLINE_FUNCTION
1750 #ifndef __INTEL_COMPILER
1751  // icc has a bug that prevents compilation with this enable_if_t
1752  // (possibly the same as https://community.intel.com/t5/Intel-C-Compiler/Intel-Compiler-bug-while-deducing-template-arguments-inside/m-p/1164358)
1753  // so with icc we'll just skip the argument type/count check
1754  enable_if_t<valid_args<IntArgs...>::value && (sizeof...(IntArgs) <= 7),return_type>
1755 #else
1756  return_type
1757 #endif
1758  operator()(const IntArgs&... intArgs) const {
1759  return getEntry(intArgs...);
1760  }
1761 
1763  KOKKOS_INLINE_FUNCTION
1764  int extent_int(const int& r) const
1765  {
1766  return extents_[r];
1767  }
1768 
1769  template <typename iType>
1770  KOKKOS_INLINE_FUNCTION constexpr
1771  typename std::enable_if<std::is_integral<iType>::value, size_t>::type
1772  extent(const iType& r) const {
1773  return extents_[r];
1774  }
1775 
1777  KOKKOS_INLINE_FUNCTION bool isDiagonal() const
1778  {
1779  if (blockPlusDiagonalLastNonDiagonal_ >= 1) return false;
1780  int numBlockPlusDiagonalTypes = 0;
1781  for (unsigned r = 0; r<variationType_.size(); r++)
1782  {
1783  const auto &entryType = variationType_[r];
1784  if (entryType == BLOCK_PLUS_DIAGONAL) numBlockPlusDiagonalTypes++;
1785  }
1786  // 2 BLOCK_PLUS_DIAGONAL entries, combined with blockPlusDiagonalLastNonDiagonal being -1 or 0 indicates diagonal
1787  if (numBlockPlusDiagonalTypes == 2) return true;
1788  else if (numBlockPlusDiagonalTypes == 0) return false; // no BLOCK_PLUS_DIAGONAL --> not a diagonal matrix
1789  else INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::invalid_argument, "Unexpected number of ranks marked as BLOCK_PLUS_DIAGONAL (should be 0 or 2)");
1790  return false; // statement should be unreachable; included because compilers don't necessarily recognize that fact...
1791  }
1792 
1799  {
1800  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A.rank() != B.rank(), std::invalid_argument, "A and B must have the same logical shape");
1801  const int rank = A.rank();
1802  std::vector<DimensionInfo> dimInfo(rank);
1803  for (int d=0; d<rank; d++)
1804  {
1805  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A.extent_int(d) != B.extent_int(d), std::invalid_argument, "A and B must have the same logical shape");
1806  dimInfo[d] = A.combinedDataDimensionInfo(B, d);
1807  }
1808  Data<DataScalar,DeviceType> result(dimInfo);
1809  return result;
1810  }
1811 
1818  static Data<DataScalar,DeviceType> allocateMatMatResult( const bool transposeA, const Data<DataScalar,DeviceType> &A_MatData, const bool transposeB, const Data<DataScalar,DeviceType> &B_MatData )
1819  {
1820  // we treat last two logical dimensions of matData as the matrix; last dimension of vecData as the vector
1821  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.rank() != B_MatData.rank(), std::invalid_argument, "AmatData and BmatData have incompatible ranks");
1822 
1823  const int D1_DIM = A_MatData.rank() - 2;
1824  const int D2_DIM = A_MatData.rank() - 1;
1825 
1826  const int A_rows = A_MatData.extent_int(D1_DIM);
1827  const int A_cols = A_MatData.extent_int(D2_DIM);
1828  const int B_rows = B_MatData.extent_int(D1_DIM);
1829  const int B_cols = B_MatData.extent_int(D2_DIM);
1830 
1831  const int leftRows = transposeA ? A_cols : A_rows;
1832  const int leftCols = transposeA ? A_rows : A_cols;
1833  const int rightRows = transposeB ? B_cols : B_rows;
1834  const int rightCols = transposeB ? B_rows : B_cols;
1835 
1836  INTREPID2_TEST_FOR_EXCEPTION(leftCols != rightRows, std::invalid_argument, "incompatible matrix dimensions");
1837 
1838  Kokkos::Array<int,7> resultExtents; // logical extents
1839  Kokkos::Array<DataVariationType,7> resultVariationTypes; // for each dimension, whether the data varies in that dimension
1840 
1841  resultExtents[D1_DIM] = leftRows;
1842  resultExtents[D2_DIM] = rightCols;
1843  int resultBlockPlusDiagonalLastNonDiagonal = -1;
1844  if ( (A_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) && (B_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) )
1845  {
1846  // diagonal times diagonal is diagonal; the result will have the maximum of A and B's non-diagonal block size
1847  resultVariationTypes[D1_DIM] = BLOCK_PLUS_DIAGONAL;
1848  resultVariationTypes[D2_DIM] = BLOCK_PLUS_DIAGONAL;
1849  resultBlockPlusDiagonalLastNonDiagonal = std::max(A_MatData.blockPlusDiagonalLastNonDiagonal(), B_MatData.blockPlusDiagonalLastNonDiagonal());
1850  }
1851 
1852  const int resultRank = A_MatData.rank();
1853 
1854  auto A_VariationTypes = A_MatData.getVariationTypes();
1855  auto B_VariationTypes = B_MatData.getVariationTypes();
1856 
1857  Kokkos::Array<int,7> resultActiveDims;
1858  Kokkos::Array<int,7> resultDataDims;
1859  int resultNumActiveDims = 0; // how many of the 7 entries are actually filled in
1860  // the following loop is over the dimensions *prior* to matrix dimensions
1861  for (int i=0; i<resultRank-2; i++)
1862  {
1863  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.extent_int(i) != B_MatData.extent_int(i), std::invalid_argument, "A and B extents must match in each non-matrix dimension");
1864 
1865  resultExtents[i] = A_MatData.extent_int(i);
1866 
1867  const DataVariationType &A_VariationType = A_VariationTypes[i];
1868  const DataVariationType &B_VariationType = B_VariationTypes[i];
1869 
1870  // BLOCK_PLUS_DIAGONAL should only occur in matData, and only in the matrix (final) dimensions
1871  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_VariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
1872  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(B_VariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
1873 
1874  int dataSize = 0;
1875  DataVariationType resultVariationType;
1876  if ((A_VariationType == GENERAL) || (B_VariationType == GENERAL))
1877  {
1878  resultVariationType = GENERAL;
1879  dataSize = resultExtents[i];
1880  }
1881  else if ((B_VariationType == CONSTANT) && (A_VariationType == CONSTANT))
1882  {
1883  resultVariationType = CONSTANT;
1884  dataSize = 1;
1885  }
1886  else if ((B_VariationType == MODULAR) && (A_VariationType == CONSTANT))
1887  {
1888  resultVariationType = MODULAR;
1889  dataSize = B_MatData.getVariationModulus(i);
1890  }
1891  else if ((B_VariationType == CONSTANT) && (A_VariationType == MODULAR))
1892  {
1893  resultVariationType = MODULAR;
1894  dataSize = A_MatData.getVariationModulus(i);
1895  }
1896  else
1897  {
1898  // both are modular. We allow this if they agree on the modulus
1899  auto A_Modulus = A_MatData.getVariationModulus(i);
1900  auto B_Modulus = B_MatData.getVariationModulus(i);
1901  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_Modulus != B_Modulus, std::invalid_argument, "If both matrices have variation type MODULAR, they must agree on the modulus");
1902  resultVariationType = MODULAR;
1903  dataSize = A_Modulus;
1904  }
1905  resultVariationTypes[i] = resultVariationType;
1906 
1907  if (resultVariationType != CONSTANT)
1908  {
1909  resultActiveDims[resultNumActiveDims] = i;
1910  resultDataDims[resultNumActiveDims] = dataSize;
1911  resultNumActiveDims++;
1912  }
1913  }
1914 
1915  // set things for final dimensions:
1916  resultExtents[D1_DIM] = leftRows;
1917  resultExtents[D2_DIM] = rightCols;
1918 
1919  if ( (A_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) && (B_MatData.getVariationTypes()[D1_DIM] == BLOCK_PLUS_DIAGONAL) )
1920  {
1921  // diagonal times diagonal is diagonal; the result will have the maximum of A and B's non-diagonal block size
1922  resultVariationTypes[D1_DIM] = BLOCK_PLUS_DIAGONAL;
1923  resultVariationTypes[D2_DIM] = BLOCK_PLUS_DIAGONAL;
1924  resultBlockPlusDiagonalLastNonDiagonal = std::max(A_MatData.blockPlusDiagonalLastNonDiagonal(), B_MatData.blockPlusDiagonalLastNonDiagonal());
1925 
1926  resultActiveDims[resultNumActiveDims] = resultRank - 2;
1927 
1928  const int numDiagonalEntries = leftRows - (resultBlockPlusDiagonalLastNonDiagonal + 1);
1929  const int numNondiagonalEntries = (resultBlockPlusDiagonalLastNonDiagonal + 1) * (resultBlockPlusDiagonalLastNonDiagonal + 1);
1930 
1931  resultDataDims[resultNumActiveDims] = numDiagonalEntries + numNondiagonalEntries;
1932  resultNumActiveDims++;
1933  }
1934  else
1935  {
1936  // pretty much the only variation types that make sense for matrix dims are GENERAL and BLOCK_PLUS_DIAGONAL
1937  resultVariationTypes[D1_DIM] = GENERAL;
1938  resultVariationTypes[D2_DIM] = GENERAL;
1939 
1940  resultActiveDims[resultNumActiveDims] = resultRank - 2;
1941  resultActiveDims[resultNumActiveDims+1] = resultRank - 1;
1942 
1943  resultDataDims[resultNumActiveDims] = leftRows;
1944  resultDataDims[resultNumActiveDims+1] = rightCols;
1945  resultNumActiveDims += 2;
1946  }
1947 
1948  for (int i=resultRank; i<7; i++)
1949  {
1950  resultVariationTypes[i] = CONSTANT;
1951  resultExtents[i] = 1;
1952  }
1953 
1954  ScalarView<DataScalar,DeviceType> data;
1955  if (resultNumActiveDims == 1)
1956  {
1957  auto viewToMatch = A_MatData.getUnderlyingView1(); // new view will match this one in layout and fad dimension, if any
1958  data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0]);
1959  }
1960  else if (resultNumActiveDims == 2)
1961  {
1962  auto viewToMatch = A_MatData.getUnderlyingView2(); // new view will match this one in layout and fad dimension, if any
1963  data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1]);
1964  }
1965  else if (resultNumActiveDims == 3)
1966  {
1967  auto viewToMatch = A_MatData.getUnderlyingView3(); // new view will match this one in layout and fad dimension, if any
1968  data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2]);
1969  }
1970  else if (resultNumActiveDims == 4)
1971  {
1972  auto viewToMatch = A_MatData.getUnderlyingView4(); // new view will match this one in layout and fad dimension, if any
1973  data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
1974  resultDataDims[3]);
1975  }
1976  else if (resultNumActiveDims == 5)
1977  {
1978  auto viewToMatch = A_MatData.getUnderlyingView5(); // new view will match this one in layout and fad dimension, if any
1979  data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
1980  resultDataDims[3], resultDataDims[4]);
1981  }
1982  else if (resultNumActiveDims == 6)
1983  {
1984  auto viewToMatch = A_MatData.getUnderlyingView6(); // new view will match this one in layout and fad dimension, if any
1985  data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
1986  resultDataDims[3], resultDataDims[4], resultDataDims[5]);
1987  }
1988  else // resultNumActiveDims == 7
1989  {
1990  auto viewToMatch = A_MatData.getUnderlyingView7(); // new view will match this one in layout and fad dimension, if any
1991  data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2],
1992  resultDataDims[3], resultDataDims[4], resultDataDims[5], resultDataDims[6]);
1993  }
1994 
1995  return Data<DataScalar,DeviceType>(data,resultRank,resultExtents,resultVariationTypes,resultBlockPlusDiagonalLastNonDiagonal);
1996  }
1997 
2001  {
2002  // we treat last two logical dimensions of matData as the matrix; last dimension of vecData as the vector
2003  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matData.rank() != vecData.rank() + 1, std::invalid_argument, "matData and vecData have incompatible ranks");
2004  const int vecDim = vecData.extent_int(vecData.rank() - 1);
2005  const int matRows = matData.extent_int(matData.rank() - 2);
2006  const int matCols = matData.extent_int(matData.rank() - 1);
2007 
2008  const int resultRank = vecData.rank();
2009 
2010  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matCols != vecDim, std::invalid_argument, "matData column count != vecData dimension");
2011 
2012  Kokkos::Array<int,7> resultExtents; // logical extents
2013  Kokkos::Array<DataVariationType,7> resultVariationTypes; // for each dimension, whether the data varies in that dimension
2014  auto vecVariationTypes = vecData.getVariationTypes();
2015  auto matVariationTypes = matData.getVariationTypes();
2016 
2017  Kokkos::Array<int,7> resultActiveDims;
2018  Kokkos::Array<int,7> resultDataDims;
2019  int resultNumActiveDims = 0; // how many of the 7 entries are actually filled in
2020  // the following loop is over the dimensions *prior* to matrix/vector dimensions
2021  for (int i=0; i<resultRank-1; i++)
2022  {
2023  resultExtents[i] = vecData.extent_int(i);
2024  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vecData.extent_int(i) != matData.extent_int(i), std::invalid_argument, "matData and vecData extents must match in each non-matrix/vector dimension");
2025 
2026  const DataVariationType &vecVariationType = vecVariationTypes[i];
2027  const DataVariationType &matVariationType = matVariationTypes[i];
2028 
2029  // BLOCK_PLUS_DIAGONAL should only occur in matData, and only in the matrix (final) dimensions
2030  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vecVariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
2031  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matVariationType == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "unsupported variationType");
2032 
2033  int dataSize = 0;
2034  DataVariationType resultVariationType;
2035  if ((vecVariationType == GENERAL) || (matVariationType == GENERAL))
2036  {
2037  resultVariationType = GENERAL;
2038  dataSize = resultExtents[i];
2039  }
2040  else if ((matVariationType == CONSTANT) && (vecVariationType == CONSTANT))
2041  {
2042  resultVariationType = CONSTANT;
2043  dataSize = 1;
2044  }
2045  else if ((matVariationType == MODULAR) && (vecVariationType == CONSTANT))
2046  {
2047  resultVariationType = MODULAR;
2048  dataSize = matData.getVariationModulus(i);
2049  }
2050  else if ((matVariationType == CONSTANT) && (vecVariationType == MODULAR))
2051  {
2052  resultVariationType = MODULAR;
2053  dataSize = matData.getVariationModulus(i);
2054  }
2055  else
2056  {
2057  // both are modular. We allow this if they agree on the modulus
2058  auto matModulus = matData.getVariationModulus(i);
2059  auto vecModulus = vecData.getVariationModulus(i);
2060  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(matModulus != vecModulus, std::invalid_argument, "If matrix and vector both have variation type MODULAR, they must agree on the modulus");
2061  resultVariationType = MODULAR;
2062  dataSize = matModulus;
2063  }
2064  resultVariationTypes[i] = resultVariationType;
2065 
2066  if (resultVariationType != CONSTANT)
2067  {
2068  resultActiveDims[resultNumActiveDims] = i;
2069  resultDataDims[resultNumActiveDims] = dataSize;
2070  resultNumActiveDims++;
2071  }
2072  }
2073  // for the final dimension, the variation type is always GENERAL
2074  // (Some combinations, e.g. CONSTANT/CONSTANT *would* generate a CONSTANT result, but constant matrices don't make a lot of sense beyond 1x1 matrices…)
2075  resultVariationTypes[resultNumActiveDims] = GENERAL;
2076  resultActiveDims[resultNumActiveDims] = resultRank - 1;
2077  resultDataDims[resultNumActiveDims] = matRows;
2078  resultExtents[resultRank-1] = matRows;
2079  resultNumActiveDims++;
2080 
2081  for (int i=resultRank; i<7; i++)
2082  {
2083  resultVariationTypes[i] = CONSTANT;
2084  resultExtents[i] = 1;
2085  }
2086 
2087  ScalarView<DataScalar,DeviceType> data;
2088  if (resultNumActiveDims == 1)
2089  {
2090  data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0]);
2091  }
2092  else if (resultNumActiveDims == 2)
2093  {
2094  data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1]);
2095  }
2096  else if (resultNumActiveDims == 3)
2097  {
2098  data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2]);
2099  }
2100  else if (resultNumActiveDims == 4)
2101  {
2102  data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2103  resultDataDims[3]);
2104  }
2105  else if (resultNumActiveDims == 5)
2106  {
2107  data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2108  resultDataDims[3], resultDataDims[4]);
2109  }
2110  else if (resultNumActiveDims == 6)
2111  {
2112  data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2113  resultDataDims[3], resultDataDims[4], resultDataDims[5]);
2114  }
2115  else // resultNumActiveDims == 7
2116  {
2117  data = matData.allocateDynRankViewMatchingUnderlying(resultDataDims[0], resultDataDims[1], resultDataDims[2],
2118  resultDataDims[3], resultDataDims[4], resultDataDims[5], resultDataDims[6]);
2119  }
2120 
2121  return Data<DataScalar,DeviceType>(data,resultRank,resultExtents,resultVariationTypes);
2122  }
2123 
2125  template<int rank>
2126  enable_if_t<(rank!=1) && (rank!=7), Kokkos::MDRangePolicy<typename DeviceType::execution_space,Kokkos::Rank<rank>> >
2128  {
2129  using ExecutionSpace = typename DeviceType::execution_space;
2130  Kokkos::Array<int,rank> startingOrdinals;
2131  Kokkos::Array<int,rank> extents;
2132 
2133  for (int d=0; d<rank; d++)
2134  {
2135  startingOrdinals[d] = 0;
2136  extents[d] = getDataExtent(d);
2137  }
2138  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<rank>>(startingOrdinals,extents);
2139  return policy;
2140  }
2141 
2143  template<int rank>
2144  enable_if_t<rank==7, Kokkos::MDRangePolicy<typename DeviceType::execution_space,Kokkos::Rank<6>> >
2146  {
2147  using ExecutionSpace = typename DeviceType::execution_space;
2148  Kokkos::Array<int,6> startingOrdinals;
2149  Kokkos::Array<int,6> extents;
2150 
2151  for (int d=0; d<6; d++)
2152  {
2153  startingOrdinals[d] = 0;
2154  extents[d] = getDataExtent(d);
2155  }
2156  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<6>>(startingOrdinals,extents);
2157  return policy;
2158  }
2159 
2160  template<int rank>
2161  inline
2162  enable_if_t<rank==1, Kokkos::RangePolicy<typename DeviceType::execution_space> >
2164  {
2165  using ExecutionSpace = typename DeviceType::execution_space;
2166  Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,getDataExtent(0));
2167  return policy;
2168  }
2169 
2171  template<class BinaryOperator>
2172  void storeInPlaceCombination(const Data<DataScalar,DeviceType> &A, const Data<DataScalar,DeviceType> &B, BinaryOperator binaryOperator)
2173  {
2174  using ExecutionSpace = typename DeviceType::execution_space;
2175 
2176 #ifdef INTREPID2_HAVE_DEBUG
2177  // check logical extents
2178  for (int d=0; d<rank_; d++)
2179  {
2180  INTREPID2_TEST_FOR_EXCEPTION(A.extent_int(d) != this->extent_int(d), std::invalid_argument, "A, B, and this must agree on all logical extents");
2181  INTREPID2_TEST_FOR_EXCEPTION(B.extent_int(d) != this->extent_int(d), std::invalid_argument, "A, B, and this must agree on all logical extents");
2182  }
2183  // TODO: add some checks that data extent of this suffices to accept combined A + B data.
2184 #endif
2185 
2186  const bool this_constant = (this->getUnderlyingViewRank() == 1) && (this->getUnderlyingViewSize() == 1);
2187 
2188  // we special-case for constant output here; since the constant case is essentially all overhead, we want to avoid as much of the overhead of storeInPlaceCombination() as possible…
2189  if (this_constant)
2190  {
2191  // constant data
2192  Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,1); // just 1 entry
2193  auto this_underlying = this->getUnderlyingView<1>();
2194  auto A_underlying = A.getUnderlyingView<1>();
2195  auto B_underlying = B.getUnderlyingView<1>();
2196  Kokkos::parallel_for("compute in-place", policy,
2197  KOKKOS_LAMBDA (const int &i0) {
2198  auto & result = this_underlying(0);
2199  const auto & A_val = A_underlying(0);
2200  const auto & B_val = B_underlying(0);
2201 
2202  result = binaryOperator(A_val,B_val);
2203  });
2204  }
2205  else
2206  {
2207  switch (rank_)
2208  {
2209  case 1: storeInPlaceCombination<BinaryOperator, 1>(A, B, binaryOperator); break;
2210  case 2: storeInPlaceCombination<BinaryOperator, 2>(A, B, binaryOperator); break;
2211  case 3: storeInPlaceCombination<BinaryOperator, 3>(A, B, binaryOperator); break;
2212  case 4: storeInPlaceCombination<BinaryOperator, 4>(A, B, binaryOperator); break;
2213  case 5: storeInPlaceCombination<BinaryOperator, 5>(A, B, binaryOperator); break;
2214  case 6: storeInPlaceCombination<BinaryOperator, 6>(A, B, binaryOperator); break;
2215  case 7: storeInPlaceCombination<BinaryOperator, 7>(A, B, binaryOperator); break;
2216  default:
2217  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::logic_error, "unhandled rank in switch");
2218  }
2219  }
2220  }
2221 
2224  {
2225  auto sum = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2226  {
2227  return a + b;
2228  };
2229  storeInPlaceCombination(A, B, sum);
2230  }
2231 
2234  {
2235  auto product = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2236  {
2237  return a * b;
2238  };
2239  storeInPlaceCombination(A, B, product);
2240  }
2241 
2244  {
2245  auto difference = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2246  {
2247  return a - b;
2248  };
2249  storeInPlaceCombination(A, B, difference);
2250  }
2251 
2254  {
2255  auto quotient = KOKKOS_LAMBDA(const DataScalar &a, const DataScalar &b) -> DataScalar
2256  {
2257  return a / b;
2258  };
2259  storeInPlaceCombination(A, B, quotient);
2260  }
2261 
2264  {
2265  // TODO: add a compile-time (SFINAE-type) guard against DataScalar types that do not support arithmetic operations. (We support Orientation as a DataScalar type; it might suffice just to compare DataScalar to Orientation, and eliminate this method for that case.)
2266  // TODO: check for invalidly shaped containers.
2267 
2268  const int matRows = matData.extent_int(matData.rank() - 2);
2269  const int matCols = matData.extent_int(matData.rank() - 1);
2270 
2271  // shallow copy of this to avoid implicit references to this in call to getWritableEntry() below
2272  Data<DataScalar,DeviceType> thisData = *this;
2273 
2274  using ExecutionSpace = typename DeviceType::execution_space;
2275  // note the use of getDataExtent() below: we only range over the possibly-distinct entries
2276  if (rank_ == 3)
2277  {
2278  // typical case for e.g. gradient data: (C,P,D)
2279  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<3>>({0,0,0},{getDataExtent(0),getDataExtent(1),matRows});
2280  Kokkos::parallel_for("compute mat-vec", policy,
2281  KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal, const int &i) {
2282  auto & val_i = thisData.getWritableEntry(cellOrdinal, pointOrdinal, i);
2283  val_i = 0;
2284  for (int j=0; j<matCols; j++)
2285  {
2286  val_i += matData(cellOrdinal,pointOrdinal,i,j) * vecData(cellOrdinal,pointOrdinal,j);
2287  }
2288  });
2289  }
2290  else if (rank_ == 2)
2291  {
2292  //
2293  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{getDataExtent(0),matRows});
2294  Kokkos::parallel_for("compute mat-vec", policy,
2295  KOKKOS_LAMBDA (const int &vectorOrdinal, const int &i) {
2296  auto & val_i = thisData.getWritableEntry(vectorOrdinal, i);
2297  val_i = 0;
2298  for (int j=0; j<matCols; j++)
2299  {
2300  val_i += matData(vectorOrdinal,i,j) * vecData(vectorOrdinal,j);
2301  }
2302  });
2303  }
2304  else if (rank_ == 1)
2305  {
2306  // single-vector case
2307  Kokkos::RangePolicy<ExecutionSpace> policy(0,matRows);
2308  Kokkos::parallel_for("compute mat-vec", policy,
2309  KOKKOS_LAMBDA (const int &i) {
2310  auto & val_i = thisData.getWritableEntry(i);
2311  val_i = 0;
2312  for (int j=0; j<matCols; j++)
2313  {
2314  val_i += matData(i,j) * vecData(j);
2315  }
2316  });
2317  }
2318  else
2319  {
2320  // TODO: handle other cases
2321  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::logic_error, "rank not yet supported");
2322  }
2323  }
2324 
2331  void storeMatMat( const bool transposeA, const Data<DataScalar,DeviceType> &A_MatData, const bool transposeB, const Data<DataScalar,DeviceType> &B_MatData )
2332  {
2333  // TODO: add a compile-time (SFINAE-type) guard against DataScalar types that do not support arithmetic operations. (We support Orientation as a DataScalar type; it might suffice just to compare DataScalar to Orientation, and eliminate this method for that case.)
2334  // TODO: check for invalidly shaped containers.
2335 
2336  // we treat last two logical dimensions of matData as the matrix; last dimension of vecData as the vector
2337  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A_MatData.rank() != B_MatData.rank(), std::invalid_argument, "AmatData and BmatData have incompatible ranks");
2338 
2339  const int D1_DIM = A_MatData.rank() - 2;
2340  const int D2_DIM = A_MatData.rank() - 1;
2341 
2342  const int A_rows = A_MatData.extent_int(D1_DIM);
2343  const int A_cols = A_MatData.extent_int(D2_DIM);
2344  const int B_rows = B_MatData.extent_int(D1_DIM);
2345  const int B_cols = B_MatData.extent_int(D2_DIM);
2346 
2347  const int leftRows = transposeA ? A_cols : A_rows;
2348  const int leftCols = transposeA ? A_rows : A_cols;
2349  const int rightCols = transposeB ? B_rows : B_cols;
2350 
2351 #ifdef INTREPID2_HAVE_DEBUG
2352  const int rightRows = transposeB ? B_cols : B_rows;
2353  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(leftCols != rightRows, std::invalid_argument, "inner dimensions do not match");
2354 #endif
2355 
2356  // shallow copy of this to avoid implicit references to this in call to getWritableEntry() below
2357  Data<DataScalar,DeviceType> thisData = *this;
2358 
2359  using ExecutionSpace = typename DeviceType::execution_space;
2360 
2361  const int diagonalStart = (variationType_[D1_DIM] == BLOCK_PLUS_DIAGONAL) ? blockPlusDiagonalLastNonDiagonal_ + 1 : leftRows;
2362  // note the use of getDataExtent() below: we only range over the possibly-distinct entries
2363  if (rank_ == 3)
2364  {
2365  // (C,D,D), say
2366  auto policy = Kokkos::RangePolicy<ExecutionSpace>(0,getDataExtent(0));
2367  Kokkos::parallel_for("compute mat-mat", policy,
2368  KOKKOS_LAMBDA (const int &matrixOrdinal) {
2369  for (int i=0; i<diagonalStart; i++)
2370  {
2371  for (int j=0; j<rightCols; j++)
2372  {
2373  auto & val_ij = thisData.getWritableEntry(matrixOrdinal, i, j);
2374  val_ij = 0;
2375  for (int k=0; k<leftCols; k++)
2376  {
2377  const auto & left = transposeA ? A_MatData(matrixOrdinal,k,i) : A_MatData(matrixOrdinal,i,k);
2378  const auto & right = transposeB ? B_MatData(matrixOrdinal,j,k) : B_MatData(matrixOrdinal,k,j);
2379  val_ij += left * right;
2380  }
2381  }
2382  }
2383  for (int i=diagonalStart; i<leftRows; i++)
2384  {
2385  auto & val_ii = thisData.getWritableEntry(matrixOrdinal, i, i);
2386  const auto & left = A_MatData(matrixOrdinal,i,i);
2387  const auto & right = B_MatData(matrixOrdinal,i,i);
2388  val_ii = left * right;
2389  }
2390  });
2391  }
2392  else if (rank_ == 4)
2393  {
2394  // (C,P,D,D), perhaps
2395  auto policy = Kokkos::MDRangePolicy<ExecutionSpace, Kokkos::Rank<2> >({0,0},{getDataExtent(0),getDataExtent(1)});
2396  if (underlyingMatchesLogical_) // receiving data object is completely expanded
2397  {
2398  Kokkos::parallel_for("compute mat-mat", policy,
2399  KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal) {
2400  for (int i=0; i<leftCols; i++)
2401  {
2402  for (int j=0; j<rightCols; j++)
2403  {
2404  auto & val_ij = thisData.getUnderlyingView4()(cellOrdinal,pointOrdinal, i, j);
2405  val_ij = 0;
2406  for (int k=0; k<leftCols; k++)
2407  {
2408  const auto & left = transposeA ? A_MatData(cellOrdinal,pointOrdinal,k,i) : A_MatData(cellOrdinal,pointOrdinal,i,k);
2409  const auto & right = transposeB ? B_MatData(cellOrdinal,pointOrdinal,j,k) : B_MatData(cellOrdinal,pointOrdinal,k,j);
2410  val_ij += left * right;
2411  }
2412  }
2413  }
2414  });
2415  }
2416  else
2417  {
2418  Kokkos::parallel_for("compute mat-mat", policy,
2419  KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal) {
2420  for (int i=0; i<diagonalStart; i++)
2421  {
2422  for (int j=0; j<rightCols; j++)
2423  {
2424  auto & val_ij = thisData.getWritableEntry(cellOrdinal,pointOrdinal, i, j);
2425  val_ij = 0;
2426  for (int k=0; k<leftCols; k++)
2427  {
2428  const auto & left = transposeA ? A_MatData(cellOrdinal,pointOrdinal,k,i) : A_MatData(cellOrdinal,pointOrdinal,i,k);
2429  const auto & right = transposeB ? B_MatData(cellOrdinal,pointOrdinal,j,k) : B_MatData(cellOrdinal,pointOrdinal,k,j);
2430  val_ij += left * right;
2431  }
2432  }
2433  }
2434  for (int i=diagonalStart; i<leftRows; i++)
2435  {
2436  auto & val_ii = thisData.getWritableEntry(cellOrdinal,pointOrdinal, i, i);
2437  const auto & left = A_MatData(cellOrdinal,pointOrdinal,i,i);
2438  const auto & right = B_MatData(cellOrdinal,pointOrdinal,i,i);
2439  val_ii = left * right;
2440  }
2441  });
2442  }
2443  }
2444  else
2445  {
2446  // TODO: handle other cases
2447  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::logic_error, "rank not yet supported");
2448  }
2449  }
2450 
2452  KOKKOS_INLINE_FUNCTION constexpr bool isValid() const
2453  {
2454  return extents_[0] > 0;
2455  }
2456 
2458  KOKKOS_INLINE_FUNCTION
2459  unsigned rank() const
2460  {
2461  return rank_;
2462  }
2463 
2470  void setExtent(const ordinal_type &d, const ordinal_type &newExtent)
2471  {
2472  INTREPID2_TEST_FOR_EXCEPTION(variationType_[d] == BLOCK_PLUS_DIAGONAL, std::invalid_argument, "setExtent is not supported for BLOCK_PLUS_DIAGONAL dimensions");
2473 
2474  if (variationType_[d] == MODULAR)
2475  {
2476  bool dividesEvenly = ((newExtent / variationModulus_[d]) * variationModulus_[d] == newExtent);
2477  INTREPID2_TEST_FOR_EXCEPTION(!dividesEvenly, std::invalid_argument, "when setExtent is called on dimenisions with MODULAR variation, the modulus must divide the new extent evenly");
2478  }
2479 
2480  if ((newExtent != extents_[d]) && (variationType_[d] == GENERAL))
2481  {
2482  // then we need to resize; let's determine the full set of new extents
2483  std::vector<ordinal_type> newExtents(dataRank_,-1);
2484  for (int r=0; r<dataRank_; r++)
2485  {
2486  if (activeDims_[r] == d)
2487  {
2488  // this is the changed dimension
2489  newExtents[r] = newExtent;
2490  }
2491  else
2492  {
2493  // unchanged; copy from existing
2494  newExtents[r] = getUnderlyingViewExtent(r);
2495  }
2496  }
2497 
2498  switch (dataRank_)
2499  {
2500  case 1: Kokkos::resize(data1_,newExtents[0]);
2501  break;
2502  case 2: Kokkos::resize(data2_,newExtents[0],newExtents[1]);
2503  break;
2504  case 3: Kokkos::resize(data3_,newExtents[0],newExtents[1],newExtents[2]);
2505  break;
2506  case 4: Kokkos::resize(data4_,newExtents[0],newExtents[1],newExtents[2],newExtents[3]);
2507  break;
2508  case 5: Kokkos::resize(data5_,newExtents[0],newExtents[1],newExtents[2],newExtents[3],newExtents[4]);
2509  break;
2510  case 6: Kokkos::resize(data6_,newExtents[0],newExtents[1],newExtents[2],newExtents[3],newExtents[4],newExtents[5]);
2511  break;
2512  case 7: Kokkos::resize(data7_,newExtents[0],newExtents[1],newExtents[2],newExtents[3],newExtents[4],newExtents[5],newExtents[6]);
2513  break;
2514  default: INTREPID2_TEST_FOR_EXCEPTION(true, std::logic_error, "Unexpected dataRank_ value");
2515  }
2516  }
2517 
2518  extents_[d] = newExtent;
2519  }
2520 
2522  KOKKOS_INLINE_FUNCTION
2524  {
2525  return underlyingMatchesLogical_;
2526  }
2527  };
2528 }
2529 
2530 #endif /* Intrepid2_Data_h */
enable_if_t< rank==7, Kokkos::MDRangePolicy< typename DeviceType::execution_space, Kokkos::Rank< 6 > > > dataExtentRangePolicy()
returns an MDRangePolicy over the first six underlying data extents (but with the logical shape)...
KOKKOS_INLINE_FUNCTION void setUnderlyingView4(Kokkos::View< DataScalar ****, DeviceType > &view) const
sets the View that stores the unique data. For rank-4 underlying containers.
Header file with various static argument-extractor classes. These are useful for writing efficient...
arbitrary variation
KOKKOS_INLINE_FUNCTION bool isDiagonal() const
returns true for containers that have two dimensions marked as BLOCK_PLUS_DIAGONAL for which the non-...
void applyOperator(UnaryOperator unaryOperator)
applies the specified unary operator to each entry
KOKKOS_INLINE_FUNCTION DimensionInfo getDimensionInfo(const int &dim) const
Returns an object fully specifying the indicated dimension. This is used in determining appropriate s...
static KOKKOS_INLINE_FUNCTION int blockPlusDiagonalDiagonalEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i)
Returns flattened index of the specified (i,i) matrix entry, assuming that i > lastNondiagonal. Only applicable for BLOCK_PLUS_DIAGONAL DataVariationType.
ScalarView< DataScalar, DeviceType > getUnderlyingView() const
Returns a DynRankView constructed atop the same underlying data as the fixed-rank Kokkos::View used i...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==4, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 4...
KOKKOS_INLINE_FUNCTION void setUnderlyingView6(Kokkos::View< DataScalar ******, DeviceType > &view) const
sets the View that stores the unique data. For rank-6 underlying containers.
static KOKKOS_INLINE_FUNCTION int blockPlusDiagonalBlockEntryIndex(const int &lastNondiagonal, const int &numNondiagonalEntries, const int &i, const int &j)
//! Returns flattened index of the specified (i,j) matrix entry, assuming that i,j ≤ lastNondiagona...
static Data< DataScalar, DeviceType > allocateInPlaceCombinationResult(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
Argument extractor class which passes all arguments to the provided container.
KOKKOS_INLINE_FUNCTION Kokkos::Array< int, 7 > getExtents() const
Returns an array containing the logical extents in each dimension.
static void copyContainer(ToContainer to, FromContainer from)
Generic data copying method to allow construction of Data object from DynRankViews for which deep_cop...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==3, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 3...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar ******, DeviceType > & getUnderlyingView6() const
returns the View that stores the unique data. For rank-6 underlying containers.
KOKKOS_INLINE_FUNCTION void setUnderlyingView7(Kokkos::View< DataScalar *******, DeviceType > &view) const
sets the View that stores the unique data. For rank-7 underlying containers.
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewRank() const
returns the rank of the View that stores the unique data
enable_if_t< rank !=7, void > storeInPlaceCombination(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
storeInPlaceCombination with compile-time rank – implementation for rank < 7.
KOKKOS_INLINE_FUNCTION void setUnderlyingView2(Kokkos::View< DataScalar **, DeviceType > &view) const
sets the View that stores the unique data. For rank-2 underlying containers.
KOKKOS_INLINE_FUNCTION void setUnderlyingView3(Kokkos::View< DataScalar ***, DeviceType > &view) const
sets the View that stores the unique data. For rank-3 underlying containers.
KOKKOS_INLINE_FUNCTION DimensionInfo combinedDataDimensionInfo(const Data &otherData, const int &dim) const
Returns (DataVariationType, data extent) in the specified dimension for a Data container that combine...
void storeMatVec(const Data< DataScalar, DeviceType > &matData, const Data< DataScalar, DeviceType > &vecData)
Places the result of a matrix-vector multiply corresponding to the two provided containers into this ...
Argument extractor class which ignores the input arguments in favor of passing a single 0 argument to...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==1, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 1...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==7, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 7...
KOKKOS_INLINE_FUNCTION const int & blockPlusDiagonalLastNonDiagonal() const
For a Data object containing data with variation type BLOCK_PLUS_DIAGONAL, returns the row and column...
void clear() const
Copies 0.0 to the underlying View.
KOKKOS_INLINE_FUNCTION constexpr bool isValid() const
returns true for containers that have data; false for those that don&#39;t (namely, those that have been ...
one of two dimensions in a matrix; bottom-right part of matrix is diagonal
KOKKOS_INLINE_FUNCTION enable_if_t< rank==5, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 5...
Wrapper around a Kokkos::View that allows data that is constant or repeating in various logical dimen...
varies according to modulus of the index
void storeInPlaceCombination(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
Places the result of an in-place combination (e.g., entrywise sum) into this data container...
Header function for Intrepid2::Util class and other utility functions.
KOKKOS_INLINE_FUNCTION bool underlyingMatchesLogical() const
Returns true if the underlying container has exactly the same rank and extents as the logical contain...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar *, DeviceType > & getUnderlyingView1() const
returns the View that stores the unique data. For rank-1 underlying containers.
enable_if_t< rank==7, void > storeInPlaceCombination(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
storeInPlaceCombination with compile-time rank – implementation for rank of 7. (Not optimized; expe...
Data(std::vector< DimensionInfo > dimInfoVector)
Constructor in terms of DimensionInfo for each logical dimension; does not require a View to be speci...
Struct expressing all variation information about a Data object in a single dimension, including its logical extent and storage extent.
KOKKOS_INLINE_FUNCTION unsigned rank() const
Returns the logical rank of the Data container.
void storeInPlaceDifference(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) difference, A .- B, into this container.
KOKKOS_INLINE_FUNCTION enable_if_t< valid_args< IntArgs... >::value &&(sizeof...(IntArgs)<=7), return_type > operator()(const IntArgs &... intArgs) const
Returns a value corresponding to the specified logical data location.
Data(DataScalar constantValue, Kokkos::Array< int, rank > extents)
constructor for everywhere-constant data
KOKKOS_INLINE_FUNCTION enable_if_t< rank==2, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 2...
enable_if_t<(rank!=1) &&(rank!=7), Kokkos::MDRangePolicy< typename DeviceType::execution_space, Kokkos::Rank< rank > > > dataExtentRangePolicy()
returns an MDRangePolicy over the underlying data extents (but with the logical shape).
KOKKOS_INLINE_FUNCTION int extent_int(const int &r) const
Returns the logical extent in the specified dimension.
void copyDataFromDynRankViewMatchingUnderlying(const ScalarView< DataScalar, DeviceType > &dynRankView) const
Copies from the provided DynRankView into the underlying Kokkos::View container storing the unique da...
For use with Data object into which a value will be stored.
KOKKOS_INLINE_FUNCTION return_type getEntry(const IntArgs &... intArgs) const
Returns a (read-only) value corresponding to the specified logical data location. ...
static Data< DataScalar, DeviceType > allocateMatVecResult(const Data< DataScalar, DeviceType > &matData, const Data< DataScalar, DeviceType > &vecData)
void storeInPlaceQuotient(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) quotient, A ./ B, into this container.
void storeInPlaceSum(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) sum, A .+ B, into this container.
Data(const ScalarView< DataScalar, DeviceType > &data, int rank, Kokkos::Array< int, 7 > extents, Kokkos::Array< DataVariationType, 7 > variationType, const int blockPlusDiagonalLastNonDiagonal=-1)
DynRankView constructor. Will copy to a View of appropriate rank.
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar ****, DeviceType > & getUnderlyingView4() const
returns the View that stores the unique data. For rank-4 underlying containers.
void storeMatMat(const bool transposeA, const Data< DataScalar, DeviceType > &A_MatData, const bool transposeB, const Data< DataScalar, DeviceType > &B_MatData)
void allocateAndCopyFromDynRankView(ScalarView< DataScalar, DeviceType > data)
allocate an underlying View that matches the provided DynRankView in dimensions, and copy...
KOKKOS_INLINE_FUNCTION DimensionInfo combinedDimensionInfo(const DimensionInfo &myData, const DimensionInfo &otherData)
Returns DimensionInfo for a Data container that combines (through multiplication, say...
static KOKKOS_INLINE_FUNCTION int blockPlusDiagonalNumNondiagonalEntries(const int &lastNondiagonal)
Returns the number of non-diagonal entries based on the last non-diagonal. Only applicable for BLOCK_...
KOKKOS_INLINE_FUNCTION enable_if_t< rank==6, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 6...
void setActiveDims()
class initialization method. Called by constructors.
Kokkos::DynRankView< typename ViewType::value_type, typename DeduceLayout< ViewType >::result_layout, typename ViewType::device_type > getMatchingViewWithLabel(const ViewType &view, const std::string &label, DimArgs... dims)
Creates and returns a view that matches the provided view in Kokkos Layout.
Data()
default constructor (empty data)
ScalarView< DataScalar, DeviceType > allocateDynRankViewMatchingUnderlying() const
Returns a DynRankView that matches the underlying Kokkos::View object in value_type, layout, and dimension.
void setExtent(const ordinal_type &d, const ordinal_type &newExtent)
sets the logical extent in the specified dimension. If needed, the underlying data container is resiz...
A singleton class for a DynRankView containing exactly one zero entry. (Technically, the entry is DataScalar(), the default value for the scalar type.) This allows View-wrapping classes to return a reference to zero, even when that zero is not explicitly stored in the wrapped views.
void storeInPlaceProduct(const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B)
stores the in-place (entrywise) product, A .* B, into this container.
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar *******, DeviceType > & getUnderlyingView7() const
returns the View that stores the unique data. For rank-7 underlying containers.
KOKKOS_INLINE_FUNCTION reference_type getWritableEntry(const IntArgs... intArgs) const
Returns an l-value reference to the specified logical entry in the underlying view. Note that for variation types other than GENERAL, multiple valid argument sets will refer to the same memory location. Intended for Intrepid2 developers and expert users only.
KOKKOS_INLINE_FUNCTION void setUnderlyingView1(Kokkos::View< DataScalar *, DeviceType > &view) const
sets the View that stores the unique data. For rank-1 underlying containers.
KOKKOS_INLINE_FUNCTION void setUnderlyingView5(Kokkos::View< DataScalar *****, DeviceType > &view) const
sets the View that stores the unique data. For rank-5 underlying containers.
void storeInPlaceCombination(PolicyType &policy, ThisUnderlyingViewType &this_underlying, AUnderlyingViewType &A_underlying, BUnderlyingViewType &B_underlying, BinaryOperator &binaryOperator, ArgExtractorThis argThis, ArgExtractorA argA, ArgExtractorB argB)
storeInPlaceCombination implementation for rank < 7, with compile-time underlying views and argument ...
KOKKOS_INLINE_FUNCTION int getVariationModulus(const int &d) const
Variation modulus accessor.
Data(ScalarView< DataScalar, DeviceType > data)
copy constructor modeled after the copy-like constructor above. Not as efficient as the implicit copy...
Data(const Kokkos::DynRankView< DataScalar, DeviceType, DynRankViewProperties... > &data, Kokkos::Array< int, rank > extents, Kokkos::Array< DataVariationType, rank > variationType, const int blockPlusDiagonalLastNonDiagonal=-1)
Constructor that accepts a DynRankView as an argument. The data belonging to the DynRankView will be ...
KOKKOS_INLINE_FUNCTION int getDataExtent(const ordinal_type &d) const
returns the true extent of the data corresponding to the logical dimension provided; if the data does...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar **, DeviceType > & getUnderlyingView2() const
returns the View that stores the unique data. For rank-2 underlying containers.
Argument extractor class which passes a single argument, indicated by the template parameter whichArg...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar *****, DeviceType > & getUnderlyingView5() const
returns the View that stores the unique data. For rank-5 underlying containers.
DataVariationType
Enumeration to indicate how data varies in a particular dimension of an Intrepid2::Data object...
ScalarView< DataScalar, DeviceType > allocateDynRankViewMatchingUnderlying(DimArgs... dims) const
Returns a DynRankView that matches the underlying Kokkos::View object value_type and layout...
KOKKOS_INLINE_FUNCTION const Kokkos::View< DataScalar ***, DeviceType > & getUnderlyingView3() const
returns the View that stores the unique data. For rank-3 underlying containers.
static Data< DataScalar, DeviceType > allocateMatMatResult(const bool transposeA, const Data< DataScalar, DeviceType > &A_MatData, const bool transposeB, const Data< DataScalar, DeviceType > &B_MatData)
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewSize() const
returns the number of entries in the View that stores the unique data
Data(const Data< DataScalar, OtherDeviceType > &data)
copy-like constructor for differing execution spaces. This does a deep_copy of the underlying view...
KOKKOS_INLINE_FUNCTION int getUnderlyingViewExtent(const int &dim) const
Returns the extent of the underlying view in the specified dimension.
KOKKOS_INLINE_FUNCTION const Kokkos::Array< DataVariationType, 7 > & getVariationTypes() const
Returns an array with the variation types in each logical dimension.