Compadre  1.2.0
Compadre_GMLS_ApplyTargetEvaluations.hpp
Go to the documentation of this file.
1 #ifndef _COMPADRE_GMLS_APPLY_TARGET_EVALUATIONS_HPP_
2 #define _COMPADRE_GMLS_APPLY_TARGET_EVALUATIONS_HPP_
3 
4 #include "Compadre_GMLS.hpp"
5 namespace Compadre {
6 
7 KOKKOS_INLINE_FUNCTION
9 
10  const int target_index = _initial_index_for_batch + teamMember.league_rank();
11 
12 #ifdef COMPADRE_USE_LAPACK
13 
14  // CPU
15  const int alphas_per_tile_per_target = _neighbor_lists.getNumberOfNeighborsDevice(target_index) + _added_alpha_size;
16  const int base_offset_index_jmke = getTargetOffsetIndexDevice(0,0,0,0);
17  const int base_alphas_index = getAlphaIndexDevice(target_index, base_offset_index_jmke);
18 
19  scratch_matrix_right_type this_alphas(_alphas.data() + TO_GLOBAL(base_alphas_index), _total_alpha_values*_max_evaluation_sites_per_target, alphas_per_tile_per_target);
20 
21  for (int e=0; e<this->getNEvaluationSitesPerTarget(target_index); ++e) {
22  // evaluating alpha_ij
23  for (size_t j=0; j<_operations.size(); ++j) {
24  for (int k=0; k<_lro_output_tile_size[j]; ++k) {
25  for (int m=0; m<_lro_input_tile_size[j]; ++m) {
26  double alpha_ij = 0;
27  int offset_index_jmke = getTargetOffsetIndexDevice(j,m,k,e);
28  for (int i=0; i<this->getNNeighbors(target_index) + _added_alpha_size; ++i) {
29  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember,
30  _basis_multiplier*target_NP), [&] (const int l, double &talpha_ij) {
32 
33  talpha_ij += P_target_row(offset_index_jmke, l)*Q(l, i+m*this->getNNeighbors(target_index));
34 
35  compadre_kernel_assert_extreme_debug(P_target_row(offset_index_jmke, l)==P_target_row(offset_index_jmke, l)
36  && "NaN in P_target_row matrix.");
37  compadre_kernel_assert_extreme_debug(Q(l, i+m*this->getNNeighbors(target_index))==Q(l, i+m*this->getNNeighbors(target_index))
38  && "NaN in Q coefficient matrix.");
39 
40  } else if (_sampling_multiplier == 1) {
41 
42  talpha_ij += P_target_row(offset_index_jmke, l)*Q(l, i);
43 
44  compadre_kernel_assert_extreme_debug(P_target_row(offset_index_jmke, l)==P_target_row(offset_index_jmke, l)
45  && "NaN in P_target_row matrix.");
47  && "NaN in Q coefficient matrix.");
48 
49  } else {
50  talpha_ij += 0;
51  }
52  }, alpha_ij);
53  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
54  this_alphas(offset_index_jmke,i) = alpha_ij;
55  compadre_kernel_assert_extreme_debug(alpha_ij==alpha_ij && "NaN in alphas.");
56  });
57  }
58  }
59  }
60  }
61  }
62 #elif defined(COMPADRE_USE_CUDA)
63 // // GPU
64 // for (int j=0; j<_operations.size(); ++j) {
65 // for (int k=0; k<_lro_output_tile_size[j]; ++k) {
66 // for (int m=0; m<_lro_input_tile_size[j]; ++m) {
67 // const int alpha_offset = (_lro_total_offsets[j] + m*_lro_output_tile_size[j] + k)*_neighbor_lists(target_index,0);
68 // const int P_offset =_basis_multiplier*target_NP*(_lro_total_offsets[j] + m*_lro_output_tile_size[j] + k);
69 // Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,
70 // this->getNNeighbors(target_index)), [=] (const int i) {
71 //
72 // double alpha_ij = 0;
73 // if (_sampling_multiplier>1 && m<_sampling_multiplier) {
74 // const int m_neighbor_offset = i+m*this->getNNeighbors(target_index);
75 // Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember,
76 // _basis_multiplier*target_NP), [=] (const int l, double &talpha_ij) {
77 // //for (int l=0; l<_basis_multiplier*target_NP; ++l) {
78 // talpha_ij += P_target_row(P_offset + l)*Q(ORDER_INDICES(m_neighbor_offset,l));
79 // }, alpha_ij);
80 // //}
81 // } else if (_sampling_multiplier == 1) {
82 // Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember,
83 // _basis_multiplier*target_NP), [=] (const int l, double &talpha_ij) {
84 // //for (int l=0; l<_basis_multiplier*target_NP; ++l) {
85 // talpha_ij += P_target_row(P_offset + l)*Q(ORDER_INDICES(i,l));
86 // }, alpha_ij);
87 // //}
88 // }
89 // Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
90 // t1(i) = alpha_ij;
91 // });
92 // });
93 // Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember,
94 // this->getNNeighbors(target_index)), [=] (const int i) {
95 // _alphas(ORDER_INDICES(target_index, alpha_offset + i)) = t1(i);
96 // });
97 // teamMember.team_barrier();
98 // }
99 // }
100 // }
101 
102  // GPU
103  for (int e=0; e<getNEvaluationSitesPerTarget(target_index); ++e) {
104  for (int j=0; j<(int)_operations.size(); ++j) {
105  for (int k=0; k<_lro_output_tile_size[j]; ++k) {
106  for (int m=0; m<_lro_input_tile_size[j]; ++m) {
107  int offset_index_jmke = getTargetOffsetIndexDevice(j,m,k,e);
108  int alphas_index = getAlphaIndexDevice(target_index, offset_index_jmke);
109  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,
110  this->getNNeighbors(target_index) + _added_alpha_size), [&] (const int i) {
111  double alpha_ij = 0;
113  const int m_neighbor_offset = i+m*this->getNNeighbors(target_index);
114  Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember, _basis_multiplier*target_NP),
115  [=] (int& l, double& t_alpha_ij) {
116  t_alpha_ij += P_target_row(offset_index_jmke, l)*Q(l, m_neighbor_offset);
117 
118  compadre_kernel_assert_extreme_debug(P_target_row(offset_index_jmke, l)==P_target_row(offset_index_jmke, l)
119  && "NaN in P_target_row matrix.");
120  compadre_kernel_assert_extreme_debug(Q(l, m_neighbor_offset)==Q(l, m_neighbor_offset)
121  && "NaN in Q coefficient matrix.");
122 
123  }, alpha_ij);
124  } else if (_sampling_multiplier == 1) {
125  Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember, _basis_multiplier*target_NP),
126  [=] (int& l, double& t_alpha_ij) {
127  t_alpha_ij += P_target_row(offset_index_jmke, l)*Q(l,i);
128 
129  compadre_kernel_assert_extreme_debug(P_target_row(offset_index_jmke, l)==P_target_row(offset_index_jmke, l)
130  && "NaN in P_target_row matrix.");
132  && "NaN in Q coefficient matrix.");
133 
134  }, alpha_ij);
135  }
136  Kokkos::single(Kokkos::PerThread(teamMember), [=] () {
137  //_alphas(target_index, offset_index_jmke, i) = alpha_ij;
138  _alphas(alphas_index+i) = alpha_ij;
139  compadre_kernel_assert_extreme_debug(alpha_ij==alpha_ij && "NaN in alphas.");
140  });
141  });
142 
143  }
144  }
145  }
146  }
147 #endif
148 
149  teamMember.team_barrier();
150 }
151 
152 } // Compadre
153 #endif
Kokkos::View< int * > _lro_input_tile_size
dimensions ^ rank of tensor of output for each sampling functional (device)
Kokkos::View< double *, layout_right > _alphas
generated alpha coefficients (device)
KOKKOS_INLINE_FUNCTION int getNumberOfNeighborsDevice(int target_index) const
Get number of neighbors for a given target (device)
Kokkos::View< TargetOperation * > _operations
vector containing target functionals to be applied for reconstruction problem (device) ...
int _max_evaluation_sites_per_target
maximum number of evaluation sites for each target (includes target site)
team_policy::member_type member_type
#define compadre_kernel_assert_extreme_debug(condition)
int _total_alpha_values
used for sizing P_target_row and the _alphas view
int _sampling_multiplier
actual dimension of the sampling functional e.g.
NeighborLists< Kokkos::View< int * > > _neighbor_lists
Accessor to get neighbor list data, offset data, and number of neighbors per target.
Kokkos::View< int * > _lro_output_tile_size
dimensions ^ rank of tensor of output for each target functional (device)
KOKKOS_INLINE_FUNCTION int getNNeighbors(const int target_index) const
Returns number of neighbors for a particular target.
KOKKOS_INLINE_FUNCTION int getNEvaluationSitesPerTarget(const int target_index) const
(OPTIONAL) Returns number of additional evaluation sites for a particular target
#define TO_GLOBAL(variable)
int _initial_index_for_batch
initial index for current batch
KOKKOS_INLINE_FUNCTION int getTargetOffsetIndexDevice(const int lro_num, const int input_component, const int output_component, const int additional_evaluation_local_index=0) const
Handles offset from operation input/output + extra evaluation sites.
Kokkos::View< double **, layout_right, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_right_type
Kokkos::View< double *, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_vector_type
int _added_alpha_size
additional alpha coefficients due to constraints
int _basis_multiplier
dimension of the reconstructed function e.g.
KOKKOS_INLINE_FUNCTION global_index_type getAlphaIndexDevice(const int target_index, const int alpha_column_offset) const
Gives index into alphas given two axes, which when incremented by the neighbor number transforms acce...
KOKKOS_INLINE_FUNCTION void applyTargetsToCoefficients(const member_type &teamMember, scratch_vector_type t1, scratch_vector_type t2, scratch_matrix_right_type Q, scratch_vector_type w, scratch_matrix_right_type P_target_row, const int target_NP) const
Helper function for applying the evaluations from a target functional to the polynomial coefficients...