Sacado Package Browser (Single Doxygen Collection)  Version of the Day
view/TestAssembly.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 #include <iostream>
42 
43 // Utilities
44 #include "Teuchos_TestingHelpers.hpp"
45 #include "Teuchos_VerboseObject.hpp"
46 
47 // FENL
48 #include <BoxElemFixture.hpp>
49 #include <fenl_functors.hpp>
50 
51 struct Perf {
52  size_t global_elem_count ;
53  size_t global_node_count ;
54  double fill_time ;
55 
58  fill_time(0) {}
59 
60  void increment(const Perf& p) {
63  fill_time += p.fill_time;
64  }
65 
66  void scale(double s) {
67  fill_time *= s;
68  }
69 };
70 
71 template <typename Scalar, typename Device,
75  const int use_print ,
76  const int use_trials ,
77  const int use_nodes[] ,
78  Kokkos::View< Scalar* , Device >& residual,
80 {
81  using Teuchos::RCP;
82  using Teuchos::rcp;
83  using Teuchos::rcpFromRef;
84  using Teuchos::arrayView;
85  using Teuchos::ParameterList;
86 
88 
90  typedef typename LocalMatrixType::StaticCrsGraphType LocalGraphType ;
91 
93 
95 
96  typedef typename ElementComputationType::vector_type VectorType ;
97 
98  //------------------------------------
99 
100  // Decompose by node to avoid parallel communication in assembly
101 
102  const double bubble_x = 1.0 ;
103  const double bubble_y = 1.0 ;
104  const double bubble_z = 1.0 ;
105 
106  const FixtureType fixture( Kokkos::Example::BoxElemPart::DecomposeNode ,
107  1 , 0 ,
108  use_nodes[0] , use_nodes[1] , use_nodes[2] ,
109  bubble_x , bubble_y , bubble_z );
110 
111  //------------------------------------
112 
113  Kokkos::Impl::Timer wall_clock ;
114 
115  Perf perf_stats = Perf() ;
116 
117  for ( int itrial = 0 ; itrial < use_trials ; ++itrial ) {
118 
119  Perf perf = Perf() ;
120 
121  perf.global_elem_count = fixture.elem_count_global();
122  perf.global_node_count = fixture.node_count_global();
123 
124  //----------------------------------
125  // Create the local sparse matrix graph and element-to-graph map
126  // from the element->to->node identifier array.
127  // The graph only has rows for the owned nodes.
128 
129  typename NodeNodeGraphType::Times graph_times;
130  const NodeNodeGraphType
131  mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(),
132  graph_times );
133 
134  // Create the local sparse matrix from the graph:
135  jacobian = LocalMatrixType( mesh_to_graph.graph );
136 
137  //----------------------------------
138 
139  // Allocate solution vector for each node in the mesh and residual vector for each owned node
140  VectorType solution( "solution" , fixture.node_count() );
141  residual = VectorType( "residual" , fixture.node_count_owned() );
142 
143  // Create element computation functor
144  const ElementComputationType elemcomp( fixture , solution ,
145  mesh_to_graph.elem_graph ,
146  jacobian , residual );
147 
148  Kokkos::deep_copy( solution , Scalar(1.2345) );
149 
150  //--------------------------------
151  // Element contributions to residual and jacobian
152 
153  Kokkos::deep_copy( residual , Scalar(0) );
154  Kokkos::deep_copy( jacobian.coeff , Scalar(0) );
155 
156  wall_clock.reset();
157 
158  elemcomp.apply();
159 
160  Device::fence();
161  perf.fill_time = wall_clock.seconds();
162 
163  //--------------------------------
164 
165  perf_stats.increment(perf);
166 
167  }
168 
169  return perf_stats ;
170 }
171 
172 template<class ValueType>
173 bool compareValues(const ValueType& a1,
174  const std::string& a1_name,
175  const ValueType&a2,
176  const std::string& a2_name,
177  const ValueType& rel_tol, const ValueType& abs_tol,
178  Teuchos::FancyOStream& out)
179 {
180  bool success = true;
181 
182  ValueType err = std::abs(a1 - a2);
183  ValueType tol = abs_tol + rel_tol*std::max(std::abs(a1),std::abs(a2));
184  if (err > tol) {
185  out << "\nError, relErr(" << a1_name <<","
186  << a2_name << ") = relErr(" << a1 <<"," << a2 <<") = "
187  << err << " <= tol = " << tol << ": failed!\n";
188  success = false;
189  }
190 
191  return success;
192 }
193 
194 template <typename VectorType, typename MatrixType>
195 bool check_assembly(const VectorType& analytic_residual,
196  const MatrixType& analytic_jacobian,
197  const VectorType& fad_residual,
198  const MatrixType& fad_jacobian,
199  const std::string& test_name)
200 {
201  const double tol = 1e-14;
202  bool success = true;
203  Teuchos::RCP<Teuchos::FancyOStream> out =
204  Teuchos::VerboseObjectBase::getDefaultOStream();
205  std::stringstream buf;
206  Teuchos::FancyOStream fbuf(Teuchos::rcp(&buf,false));
207 
208  typename VectorType::HostMirror host_analytic_residual =
209  Kokkos::create_mirror_view(analytic_residual);
210  typename VectorType::HostMirror host_fad_residual =
211  Kokkos::create_mirror_view(fad_residual);
212  Kokkos::deep_copy( host_analytic_residual, analytic_residual );
213  Kokkos::deep_copy( host_fad_residual, fad_residual );
214 
215  fbuf << test_name << ":" << std::endl;
216 
217  if (host_analytic_residual.extent(0) != host_fad_residual.extent(0)) {
218  fbuf << "Analytic residual dimension "
219  << host_analytic_residual.extent(0)
220  << " does not match Fad residual dimension "
221  << host_fad_residual.extent(0) << std::endl;
222  success = false;
223  }
224  else {
225  const size_t num_node = host_analytic_residual.extent(0);
226  for (size_t i=0; i<num_node; ++i) {
227  success = success && compareValues(
228  host_analytic_residual(i), "analytic residual",
229  host_fad_residual(i), "Fad residual",
230  tol, tol, fbuf );
231  }
232  }
233 
234  typename MatrixType::HostMirror host_analytic_jacobian =
235  Kokkos::create_mirror_view(analytic_jacobian);
236  typename MatrixType::HostMirror host_fad_jacobian =
237  Kokkos::create_mirror_view(fad_jacobian);
238  Kokkos::deep_copy( host_analytic_jacobian, analytic_jacobian );
239  Kokkos::deep_copy( host_fad_jacobian, fad_jacobian );
240 
241  if (host_analytic_jacobian.extent(0) != host_fad_jacobian.extent(0)) {
242  fbuf << "Analytic Jacobian dimension "
243  << host_analytic_jacobian.extent(0)
244  << " does not match Fad Jacobian dimension "
245  << host_fad_jacobian.extent(0) << std::endl;
246  success = false;
247  }
248  else {
249  const size_t num_entry = host_analytic_jacobian.extent(0);
250  for (size_t i=0; i<num_entry; ++i) {
251  success = success && compareValues(
252  host_analytic_jacobian(i), "analytic Jacobian",
253  host_fad_jacobian(i), "Fad Jacobian",
254  tol, tol, fbuf );
255  }
256  }
257 
258  if (!success)
259  *out << buf.str();
260 
261  return success;
262 }
263 
264 template <class Device>
266  const int use_print ,
267  const int use_trials ,
268  const int n_begin ,
269  const int n_end ,
270  const int n_step ,
271  const bool quadratic ,
272  const bool check )
273 {
279 
280  std::cout.precision(8);
281  std::cout << std::endl
282  << "\"Grid Size\" , "
283  << "\"FEM Size\" , "
284  << "\"Analytic Fill Time\" , "
285  << "\"Fad Element Fill Slowdown\" , "
286  << "\"Fad Optimized Element Fill Slowdown\" , "
287  << "\"Fad QP Fill Slowdown\" , "
288  << std::endl;
289 
290  typedef Kokkos::View< double* , Device > vector_type ;
292  vector_type analytic_residual, fad_residual, fad_opt_residual,
293  fad_qp_residual;
294  matrix_type analytic_jacobian, fad_jacobian, fad_opt_jacobian,
295  fad_qp_jacobian;
296 
297  for (int n=n_begin; n<=n_end; n+=n_step) {
298  const int use_nodes[] = { n, n, n };
299  Perf perf_analytic, perf_fad, perf_fad_opt, perf_fad_qp;
300 
301  if (quadratic) {
302  perf_analytic =
303  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,Analytic>(
304  use_print, use_trials, use_nodes,
305  analytic_residual, analytic_jacobian );
306 
307  perf_fad =
308  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElement>(
309  use_print, use_trials, use_nodes,
310  fad_residual, fad_jacobian);
311 
312  perf_fad_opt =
313  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElementOptimized>(
314  use_print, use_trials, use_nodes,
315  fad_opt_residual, fad_opt_jacobian);
316 
317  perf_fad_qp =
318  fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadQuadPoint>(
319  use_print, use_trials, use_nodes,
320  fad_qp_residual, fad_qp_jacobian);
321  }
322  else {
323  perf_analytic =
324  fenl_assembly<double,Device,BoxElemPart::ElemLinear,Analytic>(
325  use_print, use_trials, use_nodes,
326  analytic_residual, analytic_jacobian );
327 
328  perf_fad =
329  fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElement>(
330  use_print, use_trials, use_nodes,
331  fad_residual, fad_jacobian);
332 
333  perf_fad_opt =
334  fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElementOptimized>(
335  use_print, use_trials, use_nodes,
336  fad_opt_residual, fad_opt_jacobian);
337 
338  perf_fad_qp =
339  fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadQuadPoint>(
340  use_print, use_trials, use_nodes,
341  fad_qp_residual, fad_qp_jacobian);
342  }
343  if (check) {
344  check_assembly( analytic_residual, analytic_jacobian.coeff,
345  fad_residual, fad_jacobian.coeff,
346  "Fad" );
347  check_assembly( analytic_residual, analytic_jacobian.coeff,
348  fad_opt_residual, fad_opt_jacobian.coeff,
349  "Optimized Fad" );
350  check_assembly( analytic_residual, analytic_jacobian.coeff,
351  fad_qp_residual, fad_qp_jacobian.coeff,
352  "QP Fad" );
353  }
354 
355  double s =
356  1000.0 / ( use_trials * perf_analytic.global_elem_count );
357  perf_analytic.scale(s);
358  perf_fad.scale(s);
359  perf_fad_opt.scale(s);
360  perf_fad_qp.scale(s);
361 
362  std::cout.precision(3);
363  std::cout << n << " , "
364  << perf_analytic.global_node_count << " , "
365  << std::setw(2)
366  << std::scientific
367  << perf_analytic.fill_time << " , "
368  << std::fixed << std::setw(6)
369  << perf_fad.fill_time / perf_analytic.fill_time << " , "
370  << perf_fad_opt.fill_time / perf_analytic.fill_time << " , "
371  << perf_fad_qp.fill_time / perf_analytic.fill_time << " , "
372  << std::endl;
373  }
374 }
size_t global_node_count
abs(expr.val())
double fill_time
Perf fenl_assembly(const int use_print, const int use_trials, const int use_nodes[], Kokkos::View< Scalar *, Device > &residual, Kokkos::Example::FENL::CrsMatrix< Scalar, Device > &jacobian)
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)
bool check_assembly(const VectorType &analytic_residual, const MatrixType &analytic_jacobian, const VectorType &fad_residual, const MatrixType &fad_jacobian, const std::string &test_name)
size_t global_elem_count
void increment(const Perf &p)
bool compareValues(const ValueType &a1, const std::string &a1_name, const ValueType &a2, const std::string &a2_name, const ValueType &rel_tol, const ValueType &abs_tol, Teuchos::FancyOStream &out)
void scale(double s)
const double tol
SimpleFad< ValueT > max(const SimpleFad< ValueT > &a, const SimpleFad< ValueT > &b)
Partition a box of hexahedral elements among subdomains.
void performance_test_driver(const int use_print, const int use_trials, const int n_begin, const int n_end, const int n_step, const bool quadratic, const bool check)
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements...