Loading...
Searching...
No Matches
epsilon_divdiv_kerngen_v01_initial.hpp
Go to the documentation of this file.
1#pragma once
2
3#include "../../../quadrature/quadrature.hpp"
5#include "dense/vec.hpp"
9#include "linalg/operator.hpp"
12#include "linalg/vector.hpp"
13#include "linalg/vector_q1.hpp"
14#include "util/timer.hpp"
15
17
21
22template < typename ScalarT, int VecDim = 3 >
24{
25 public:
28 using ScalarType = ScalarT;
30
31 private:
32 bool storeLMatrices_ =
33 false; // set to let apply_impl() know, that it should store the local matrices after assembling them
34 bool applyStoredLMatrices_ =
35 false; // set to make apply_impl() load and use the stored LMatrices for the operator application
36 Grid4DDataLocalMatrices lmatrices_;
37 bool single_quadpoint_ = true;
38
40
45 BoundaryConditions bcs_;
46
47 bool treat_boundary_;
48 bool diagonal_;
49
50 linalg::OperatorApplyMode operator_apply_mode_;
51 linalg::OperatorCommunicationMode operator_communication_mode_;
52 linalg::OperatorStoredMatrixMode operator_stored_matrix_mode_;
53
56
59
60 public:
67 BoundaryConditions bcs,
68 bool diagonal,
70 linalg::OperatorCommunicationMode operator_communication_mode =
73 : domain_( domain )
74 , grid_( grid )
75 , radii_( radii )
76 , k_( k )
77 , mask_( mask )
78 , treat_boundary_( true )
79 , diagonal_( diagonal )
80 , operator_apply_mode_( operator_apply_mode )
81 , operator_communication_mode_( operator_communication_mode )
82 , operator_stored_matrix_mode_( operator_stored_matrix_mode )
83 // TODO: we can reuse the send and recv buffers and pass in from the outside somehow
84 , send_buffers_( domain )
85 , recv_buffers_( domain )
86 {
87 bcs_[0] = bcs[0];
88 bcs_[1] = bcs[1];
89 }
90
92
93 /// @brief Getter for domain member
94 const grid::shell::DistributedDomain& get_domain() const { return domain_; }
95
96 /// @brief Getter for radii member
98
99 /// @brief Getter for grid member
101
102 /// @brief Retrives the local matrix stored in the operator
103 KOKKOS_INLINE_FUNCTION
105 const int local_subdomain_id,
106 const int x_cell,
107 const int y_cell,
108 const int r_cell,
109 const int wedge,
110 const int dimi,
111 const int dimj ) const
112 {
113 assert( lmatrices_.data() != nullptr );
115 for ( int i = 0; i < 6; ++i )
116 {
117 for ( int j = 0; j < 6; ++j )
118 {
119 ijslice( i, j ) =
120 lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, wedge )( i + dimi * 6, j + dimj * 6 );
121 }
122 }
123 return ijslice;
124 }
125
126 /// @brief Set the local matrix stored in the operator
127 KOKKOS_INLINE_FUNCTION
129 const int local_subdomain_id,
130 const int x_cell,
131 const int y_cell,
132 const int r_cell,
133 const int wedge,
134 const int dimi,
135 const int dimj,
137 {
138 assert( lmatrices_.data() != nullptr );
139 for ( int i = 0; i < 6; ++i )
140 {
141 for ( int j = 0; j < 6; ++j )
142 {
143 lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, wedge )( i + dimi * 6, j + dimj * 6 ) =
144 mat( i, j );
145 }
146 }
147 }
148
149 /// @brief Setter/Getter for app applyStoredLMatrices_: usage of stored local matrices during apply
150 void setApplyStoredLMatrices( bool v ) { applyStoredLMatrices_ = v; }
151
152 /// @brief S/Getter for diagonal member
153 void set_diagonal( bool v ) { diagonal_ = v; }
154
155 /// @brief
156 /// allocates memory for the local matrices
157 /// calls kernel with storeLMatrices_ = true to assemble and store the local matrices
158 /// sets applyStoredLMatrices_, such that future applies use the stored local matrices
160 {
161 storeLMatrices_ = true;
162 if ( lmatrices_.data() == nullptr )
163 {
164 lmatrices_ = Grid4DDataLocalMatrices(
165 "LaplaceSimple::lmatrices_",
166 domain_.subdomains().size(),
170 Kokkos::parallel_for(
171 "assemble_store_lmatrices", grid::shell::local_domain_md_range_policy_cells( domain_ ), *this );
172 Kokkos::fence();
173 }
174 storeLMatrices_ = false;
175 applyStoredLMatrices_ = true;
176 }
177
179 const linalg::OperatorApplyMode operator_apply_mode,
180 const linalg::OperatorCommunicationMode operator_communication_mode )
181 {
182 operator_apply_mode_ = operator_apply_mode;
183 operator_communication_mode_ = operator_communication_mode;
184 }
185
186 void apply_impl( const SrcVectorType& src, DstVectorType& dst )
187 {
188 util::Timer timer_apply( "vector_laplace_apply" );
189 if ( storeLMatrices_ or applyStoredLMatrices_ )
190 assert( lmatrices_.data() != nullptr );
191
192 if ( operator_apply_mode_ == linalg::OperatorApplyMode::Replace )
193 {
194 assign( dst, 0 );
195 }
196
197 src_ = src.grid_data();
198 dst_ = dst.grid_data();
199
200 if ( src_.extent( 0 ) != dst_.extent( 0 ) || src_.extent( 1 ) != dst_.extent( 1 ) ||
201 src_.extent( 2 ) != dst_.extent( 2 ) || src_.extent( 3 ) != dst_.extent( 3 ) )
202 {
203 throw std::runtime_error( "VectorLaplace: src/dst mismatch" );
204 }
205
206 if ( src_.extent( 1 ) != grid_.extent( 1 ) || src_.extent( 2 ) != grid_.extent( 2 ) )
207 {
208 throw std::runtime_error( "VectorLaplace: src/dst mismatch" );
209 }
210
211 util::Timer timer_kernel( "vector_laplace_kernel" );
212 Kokkos::parallel_for( "matvec", grid::shell::local_domain_md_range_policy_cells( domain_ ), *this );
213 Kokkos::fence();
214 timer_kernel.stop();
215
216 if ( operator_communication_mode_ == linalg::OperatorCommunicationMode::CommunicateAdditively )
217 {
218 util::Timer timer_comm( "vector_laplace_comm" );
219
221 domain_, dst_, send_buffers_, recv_buffers_ );
223 }
224 }
225
226 KOKKOS_INLINE_FUNCTION void
227 operator()( const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell ) const
228 {
229 // Compute the local element matrix.
231
232 if ( !applyStoredLMatrices_ )
233 {
234 double wedge_surf_phy_coords[2][3][3];
235 double quad_surface_coords[2][2][3];
236 quad_surface_coords[0][0][0] = grid_( local_subdomain_id, x_cell, y_cell, 0 );
237 quad_surface_coords[0][0][1] = grid_( local_subdomain_id, x_cell, y_cell, 1 );
238 quad_surface_coords[0][0][2] = grid_( local_subdomain_id, x_cell, y_cell, 2 );
239 quad_surface_coords[0][1][0] = grid_( local_subdomain_id, x_cell, y_cell + 1, 0 );
240 quad_surface_coords[0][1][1] = grid_( local_subdomain_id, x_cell, y_cell + 1, 1 );
241 quad_surface_coords[0][1][2] = grid_( local_subdomain_id, x_cell, y_cell + 1, 2 );
242 quad_surface_coords[1][0][0] = grid_( local_subdomain_id, x_cell + 1, y_cell, 0 );
243 quad_surface_coords[1][0][1] = grid_( local_subdomain_id, x_cell + 1, y_cell, 1 );
244 quad_surface_coords[1][0][2] = grid_( local_subdomain_id, x_cell + 1, y_cell, 2 );
245 quad_surface_coords[1][1][0] = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 0 );
246 quad_surface_coords[1][1][1] = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 1 );
247 quad_surface_coords[1][1][2] = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 2 );
248 wedge_surf_phy_coords[0][0][0] = quad_surface_coords[0][0][0];
249 wedge_surf_phy_coords[0][0][1] = quad_surface_coords[0][0][1];
250 wedge_surf_phy_coords[0][0][2] = quad_surface_coords[0][0][2];
251 wedge_surf_phy_coords[0][1][0] = quad_surface_coords[1][0][0];
252 wedge_surf_phy_coords[0][1][1] = quad_surface_coords[1][0][1];
253 wedge_surf_phy_coords[0][1][2] = quad_surface_coords[1][0][2];
254 wedge_surf_phy_coords[0][2][0] = quad_surface_coords[0][1][0];
255 wedge_surf_phy_coords[0][2][1] = quad_surface_coords[0][1][1];
256 wedge_surf_phy_coords[0][2][2] = quad_surface_coords[0][1][2];
257 wedge_surf_phy_coords[1][0][0] = quad_surface_coords[1][1][0];
258 wedge_surf_phy_coords[1][0][1] = quad_surface_coords[1][1][1];
259 wedge_surf_phy_coords[1][0][2] = quad_surface_coords[1][1][2];
260 wedge_surf_phy_coords[1][1][0] = quad_surface_coords[0][1][0];
261 wedge_surf_phy_coords[1][1][1] = quad_surface_coords[0][1][1];
262 wedge_surf_phy_coords[1][1][2] = quad_surface_coords[0][1][2];
263 wedge_surf_phy_coords[1][2][0] = quad_surface_coords[1][0][0];
264 wedge_surf_phy_coords[1][2][1] = quad_surface_coords[1][0][1];
265 wedge_surf_phy_coords[1][2][2] = quad_surface_coords[1][0][2];
266 double r_0 = radii_( local_subdomain_id, r_cell );
267 double r_1 = radii_( local_subdomain_id, r_cell + 1 );
268 double src_local_hex[3][2][6];
269 int dim;
270 for ( dim = 0; dim < 3; dim += 1 )
271 {
272 src_local_hex[dim][0][0] = src_( local_subdomain_id, x_cell, y_cell, r_cell, dim );
273 src_local_hex[dim][0][1] = src_( local_subdomain_id, x_cell + 1, y_cell, r_cell, dim );
274 src_local_hex[dim][0][2] = src_( local_subdomain_id, x_cell, y_cell + 1, r_cell, dim );
275 src_local_hex[dim][0][3] = src_( local_subdomain_id, x_cell, y_cell, r_cell + 1, dim );
276 src_local_hex[dim][0][4] = src_( local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim );
277 src_local_hex[dim][0][5] = src_( local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim );
278 src_local_hex[dim][1][0] = src_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell, dim );
279 src_local_hex[dim][1][1] = src_( local_subdomain_id, x_cell, y_cell + 1, r_cell, dim );
280 src_local_hex[dim][1][2] = src_( local_subdomain_id, x_cell + 1, y_cell, r_cell, dim );
281 src_local_hex[dim][1][3] = src_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1, dim );
282 src_local_hex[dim][1][4] = src_( local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim );
283 src_local_hex[dim][1][5] = src_( local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim );
284 };
285 double k_local_hex[2][6];
286 k_local_hex[0][0] = k_( local_subdomain_id, x_cell, y_cell, r_cell );
287 k_local_hex[0][1] = k_( local_subdomain_id, x_cell + 1, y_cell, r_cell );
288 k_local_hex[0][2] = k_( local_subdomain_id, x_cell, y_cell + 1, r_cell );
289 k_local_hex[0][3] = k_( local_subdomain_id, x_cell, y_cell, r_cell + 1 );
290 k_local_hex[0][4] = k_( local_subdomain_id, x_cell + 1, y_cell, r_cell + 1 );
291 k_local_hex[0][5] = k_( local_subdomain_id, x_cell, y_cell + 1, r_cell + 1 );
292 k_local_hex[1][0] = k_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell );
293 k_local_hex[1][1] = k_( local_subdomain_id, x_cell, y_cell + 1, r_cell );
294 k_local_hex[1][2] = k_( local_subdomain_id, x_cell + 1, y_cell, r_cell );
295 k_local_hex[1][3] = k_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1 );
296 k_local_hex[1][4] = k_( local_subdomain_id, x_cell, y_cell + 1, r_cell + 1 );
297 k_local_hex[1][5] = k_( local_subdomain_id, x_cell + 1, y_cell, r_cell + 1 );
298 double qp_array[6][3];
299 double qw_array[6];
300 qp_array[0][0] = 0.66666666666666663;
301 qp_array[1][0] = 0.16666666666666671;
302 qp_array[2][0] = 0.16666666666666671;
303 qp_array[3][0] = 0.66666666666666663;
304 qp_array[4][0] = 0.16666666666666671;
305 qp_array[5][0] = 0.16666666666666671;
306 qp_array[0][1] = 0.16666666666666671;
307 qp_array[1][1] = 0.66666666666666663;
308 qp_array[2][1] = 0.16666666666666671;
309 qp_array[3][1] = 0.16666666666666671;
310 qp_array[4][1] = 0.66666666666666663;
311 qp_array[5][1] = 0.16666666666666671;
312 qp_array[0][2] = -0.57735026918962573;
313 qp_array[1][2] = -0.57735026918962573;
314 qp_array[2][2] = -0.57735026918962573;
315 qp_array[3][2] = 0.57735026918962573;
316 qp_array[4][2] = 0.57735026918962573;
317 qp_array[5][2] = 0.57735026918962573;
318 qw_array[0] = 0.16666666666666671;
319 qw_array[1] = 0.16666666666666671;
320 qw_array[2] = 0.16666666666666671;
321 qw_array[3] = 0.16666666666666671;
322 qw_array[4] = 0.16666666666666671;
323 qw_array[5] = 0.16666666666666671;
324 int cmb_shift = ( ( treat_boundary_ && diagonal_ == false && r_cell == 0 ) ? ( 3 ) : ( 0 ) );
325 int max_rad = -1 + radii_.extent( 1 );
326 int surface_shift = ( ( treat_boundary_ && diagonal_ == false && max_rad == r_cell + 1 ) ? ( 3 ) : ( 0 ) );
327 double dst_array[3][2][6] = { 0 };
328 int w = 0;
329 for ( w = 0; w < 2; w += 1 )
330 {
331 int q = 0;
332 for ( q = 0; q < 6; q += 1 )
333 {
334 /* Coefficient evaluation on current wedge w */;
335 double tmpcse_k_eval_0 = ( 1.0 / 2.0 ) * qp_array[q][2];
336 double tmpcse_k_eval_1 = 1.0 / 2.0 - tmpcse_k_eval_0;
337 double tmpcse_k_eval_2 = tmpcse_k_eval_0 + 1.0 / 2.0;
338 double tmpcse_k_eval_3 = -qp_array[q][0] - qp_array[q][1] + 1;
339 double k_eval = tmpcse_k_eval_1 * tmpcse_k_eval_3 * k_local_hex[w][0] +
340 tmpcse_k_eval_1 * k_local_hex[w][1] * qp_array[q][0] +
341 tmpcse_k_eval_1 * k_local_hex[w][2] * qp_array[q][1] +
342 tmpcse_k_eval_2 * tmpcse_k_eval_3 * k_local_hex[w][3] +
343 tmpcse_k_eval_2 * k_local_hex[w][4] * qp_array[q][0] +
344 tmpcse_k_eval_2 * k_local_hex[w][5] * qp_array[q][1];
345 /* Computation + Inversion of the Jacobian */;
346 double tmpcse_J_0 = -1.0 / 2.0 * r_0 + ( 1.0 / 2.0 ) * r_1;
347 double tmpcse_J_1 = r_0 + tmpcse_J_0 * ( qp_array[q][2] + 1 );
348 double tmpcse_J_2 = -qp_array[q][0] - qp_array[q][1] + 1;
349 double J_0_0 = tmpcse_J_1 * ( -wedge_surf_phy_coords[w][0][0] + wedge_surf_phy_coords[w][1][0] );
350 double J_0_1 = tmpcse_J_1 * ( -wedge_surf_phy_coords[w][0][0] + wedge_surf_phy_coords[w][2][0] );
351 double J_0_2 = tmpcse_J_0 * ( tmpcse_J_2 * wedge_surf_phy_coords[w][0][0] +
352 qp_array[q][0] * wedge_surf_phy_coords[w][1][0] +
353 qp_array[q][1] * wedge_surf_phy_coords[w][2][0] );
354 double J_1_0 = tmpcse_J_1 * ( -wedge_surf_phy_coords[w][0][1] + wedge_surf_phy_coords[w][1][1] );
355 double J_1_1 = tmpcse_J_1 * ( -wedge_surf_phy_coords[w][0][1] + wedge_surf_phy_coords[w][2][1] );
356 double J_1_2 = tmpcse_J_0 * ( tmpcse_J_2 * wedge_surf_phy_coords[w][0][1] +
357 qp_array[q][0] * wedge_surf_phy_coords[w][1][1] +
358 qp_array[q][1] * wedge_surf_phy_coords[w][2][1] );
359 double J_2_0 = tmpcse_J_1 * ( -wedge_surf_phy_coords[w][0][2] + wedge_surf_phy_coords[w][1][2] );
360 double J_2_1 = tmpcse_J_1 * ( -wedge_surf_phy_coords[w][0][2] + wedge_surf_phy_coords[w][2][2] );
361 double J_2_2 = tmpcse_J_0 * ( tmpcse_J_2 * wedge_surf_phy_coords[w][0][2] +
362 qp_array[q][0] * wedge_surf_phy_coords[w][1][2] +
363 qp_array[q][1] * wedge_surf_phy_coords[w][2][2] );
364 double J_det = J_0_0 * J_1_1 * J_2_2 - J_0_0 * J_1_2 * J_2_1 - J_0_1 * J_1_0 * J_2_2 +
365 J_0_1 * J_1_2 * J_2_0 + J_0_2 * J_1_0 * J_2_1 - J_0_2 * J_1_1 * J_2_0;
366 double tmpcse_J_invT_0 = 1.0 / J_det;
367 double J_invT_cse_0_0 = tmpcse_J_invT_0 * ( J_1_1 * J_2_2 - J_1_2 * J_2_1 );
368 double J_invT_cse_0_1 = tmpcse_J_invT_0 * ( -J_1_0 * J_2_2 + J_1_2 * J_2_0 );
369 double J_invT_cse_0_2 = tmpcse_J_invT_0 * ( J_1_0 * J_2_1 - J_1_1 * J_2_0 );
370 double J_invT_cse_1_0 = tmpcse_J_invT_0 * ( -J_0_1 * J_2_2 + J_0_2 * J_2_1 );
371 double J_invT_cse_1_1 = tmpcse_J_invT_0 * ( J_0_0 * J_2_2 - J_0_2 * J_2_0 );
372 double J_invT_cse_1_2 = tmpcse_J_invT_0 * ( -J_0_0 * J_2_1 + J_0_1 * J_2_0 );
373 double J_invT_cse_2_0 = tmpcse_J_invT_0 * ( J_0_1 * J_1_2 - J_0_2 * J_1_1 );
374 double J_invT_cse_2_1 = tmpcse_J_invT_0 * ( -J_0_0 * J_1_2 + J_0_2 * J_1_0 );
375 double J_invT_cse_2_2 = tmpcse_J_invT_0 * ( J_0_0 * J_1_1 - J_0_1 * J_1_0 );
376
377 double scalar_grad[6][3] = { 0 };
378 double tmpcse_grad_i_0 = ( 1.0 / 2.0 ) * qp_array[q][2];
379 double tmpcse_grad_i_1 = tmpcse_grad_i_0 - 1.0 / 2.0;
380 double tmpcse_grad_i_2 = ( 1.0 / 2.0 ) * qp_array[q][0];
381 double tmpcse_grad_i_3 = ( 1.0 / 2.0 ) * qp_array[q][1];
382 double tmpcse_grad_i_4 = tmpcse_grad_i_2 + tmpcse_grad_i_3 - 1.0 / 2.0;
383 double tmpcse_grad_i_5 = J_invT_cse_0_2 * tmpcse_grad_i_2;
384 double tmpcse_grad_i_6 = -tmpcse_grad_i_1;
385 double tmpcse_grad_i_7 = J_invT_cse_1_2 * tmpcse_grad_i_2;
386 double tmpcse_grad_i_8 = J_invT_cse_2_2 * tmpcse_grad_i_2;
387 double tmpcse_grad_i_9 = J_invT_cse_0_2 * tmpcse_grad_i_3;
388 double tmpcse_grad_i_10 = J_invT_cse_1_2 * tmpcse_grad_i_3;
389 double tmpcse_grad_i_11 = J_invT_cse_2_2 * tmpcse_grad_i_3;
390 double tmpcse_grad_i_12 = tmpcse_grad_i_0 + 1.0 / 2.0;
391 double tmpcse_grad_i_13 = -tmpcse_grad_i_12;
392 double tmpcse_grad_i_14 = -tmpcse_grad_i_4;
393 scalar_grad[0][0] = J_invT_cse_0_0 * tmpcse_grad_i_1 + J_invT_cse_0_1 * tmpcse_grad_i_1 +
394 J_invT_cse_0_2 * tmpcse_grad_i_4;
395 scalar_grad[0][1] = J_invT_cse_1_0 * tmpcse_grad_i_1 + J_invT_cse_1_1 * tmpcse_grad_i_1 +
396 J_invT_cse_1_2 * tmpcse_grad_i_4;
397 scalar_grad[0][2] = J_invT_cse_2_0 * tmpcse_grad_i_1 + J_invT_cse_2_1 * tmpcse_grad_i_1 +
398 J_invT_cse_2_2 * tmpcse_grad_i_4;
399 scalar_grad[1][0] = J_invT_cse_0_0 * tmpcse_grad_i_6 - tmpcse_grad_i_5;
400 scalar_grad[1][1] = J_invT_cse_1_0 * tmpcse_grad_i_6 - tmpcse_grad_i_7;
401 scalar_grad[1][2] = J_invT_cse_2_0 * tmpcse_grad_i_6 - tmpcse_grad_i_8;
402 scalar_grad[2][0] = J_invT_cse_0_1 * tmpcse_grad_i_6 - tmpcse_grad_i_9;
403 scalar_grad[2][1] = J_invT_cse_1_1 * tmpcse_grad_i_6 - tmpcse_grad_i_10;
404 scalar_grad[2][2] = J_invT_cse_2_1 * tmpcse_grad_i_6 - tmpcse_grad_i_11;
405 scalar_grad[3][0] = J_invT_cse_0_0 * tmpcse_grad_i_13 + J_invT_cse_0_1 * tmpcse_grad_i_13 +
406 J_invT_cse_0_2 * tmpcse_grad_i_14;
407 scalar_grad[3][1] = J_invT_cse_1_0 * tmpcse_grad_i_13 + J_invT_cse_1_1 * tmpcse_grad_i_13 +
408 J_invT_cse_1_2 * tmpcse_grad_i_14;
409 scalar_grad[3][2] = J_invT_cse_2_0 * tmpcse_grad_i_13 + J_invT_cse_2_1 * tmpcse_grad_i_13 +
410 J_invT_cse_2_2 * tmpcse_grad_i_14;
411 scalar_grad[4][0] = J_invT_cse_0_0 * tmpcse_grad_i_12 + tmpcse_grad_i_5;
412 scalar_grad[4][1] = J_invT_cse_1_0 * tmpcse_grad_i_12 + tmpcse_grad_i_7;
413 scalar_grad[4][2] = J_invT_cse_2_0 * tmpcse_grad_i_12 + tmpcse_grad_i_8;
414 scalar_grad[5][0] = J_invT_cse_0_1 * tmpcse_grad_i_12 + tmpcse_grad_i_9;
415 scalar_grad[5][1] = J_invT_cse_1_1 * tmpcse_grad_i_12 + tmpcse_grad_i_10;
416 scalar_grad[5][2] = J_invT_cse_2_1 * tmpcse_grad_i_12 + tmpcse_grad_i_11;
417 int dimi;
418 int dimj;
419 for ( dimi = 0; dimi < 3; dimi += 1 )
420 {
421 for ( dimj = 0; dimj < 3; dimj += 1 )
422 {
423 if ( diagonal_ == false )
424 {
425 double grad_u[3][3] = { 0 };
426 double div_u = 0.0;
427 int node_idx;
428 for ( node_idx = cmb_shift; node_idx < 6 - surface_shift; node_idx += 1 )
429 {
430 double E_grad_trial[3][3] = { 0 };
431 E_grad_trial[0][dimj] = scalar_grad[node_idx][0];
432 E_grad_trial[1][dimj] = scalar_grad[node_idx][1];
433 E_grad_trial[2][dimj] = scalar_grad[node_idx][2];
434 double tmpcse_symgrad_trial_0 = 0.5 * E_grad_trial[0][1] + 0.5 * E_grad_trial[1][0];
435 double tmpcse_symgrad_trial_1 = 0.5 * E_grad_trial[0][2] + 0.5 * E_grad_trial[2][0];
436 double tmpcse_symgrad_trial_2 = 0.5 * E_grad_trial[1][2] + 0.5 * E_grad_trial[2][1];
437 grad_u[0][0] =
438 1.0 * E_grad_trial[0][0] * src_local_hex[dimj][w][node_idx] + grad_u[0][0];
439 grad_u[0][1] =
440 tmpcse_symgrad_trial_0 * src_local_hex[dimj][w][node_idx] + grad_u[0][1];
441 grad_u[0][2] =
442 tmpcse_symgrad_trial_1 * src_local_hex[dimj][w][node_idx] + grad_u[0][2];
443 grad_u[1][0] =
444 tmpcse_symgrad_trial_0 * src_local_hex[dimj][w][node_idx] + grad_u[1][0];
445 grad_u[1][1] =
446 1.0 * E_grad_trial[1][1] * src_local_hex[dimj][w][node_idx] + grad_u[1][1];
447 grad_u[1][2] =
448 tmpcse_symgrad_trial_2 * src_local_hex[dimj][w][node_idx] + grad_u[1][2];
449 grad_u[2][0] =
450 tmpcse_symgrad_trial_1 * src_local_hex[dimj][w][node_idx] + grad_u[2][0];
451 grad_u[2][1] =
452 tmpcse_symgrad_trial_2 * src_local_hex[dimj][w][node_idx] + grad_u[2][1];
453 grad_u[2][2] =
454 1.0 * E_grad_trial[2][2] * src_local_hex[dimj][w][node_idx] + grad_u[2][2];
455 div_u = div_u + E_grad_trial[dimj][dimj] * src_local_hex[dimj][w][node_idx];
456 };
457 for ( node_idx = cmb_shift; node_idx < 6 - surface_shift; node_idx += 1 )
458 {
459 double E_grad_test[3][3] = { 0 };
460 E_grad_test[0][dimi] = scalar_grad[node_idx][0];
461 E_grad_test[1][dimi] = scalar_grad[node_idx][1];
462 E_grad_test[2][dimi] = scalar_grad[node_idx][2];
463 double tmpcse_symgrad_test_0 = 0.5 * E_grad_test[0][1] + 0.5 * E_grad_test[1][0];
464 double tmpcse_symgrad_test_1 = 0.5 * E_grad_test[0][2] + 0.5 * E_grad_test[2][0];
465 double tmpcse_symgrad_test_2 = 0.5 * E_grad_test[1][2] + 0.5 * E_grad_test[2][1];
466 double tmpcse_pairing_0 = 2 * tmpcse_symgrad_test_0;
467 double tmpcse_pairing_1 = 2 * tmpcse_symgrad_test_1;
468 double tmpcse_pairing_2 = 2 * tmpcse_symgrad_test_2;
469 dst_array[dimi][w][node_idx] =
470 k_eval *
471 ( -0.66666666666666663 * div_u * E_grad_test[dimi][dimi] +
472 tmpcse_pairing_0 * grad_u[0][1] + tmpcse_pairing_0 * grad_u[1][0] +
473 tmpcse_pairing_1 * grad_u[0][2] + tmpcse_pairing_1 * grad_u[2][0] +
474 tmpcse_pairing_2 * grad_u[1][2] + tmpcse_pairing_2 * grad_u[2][1] +
475 2.0 * E_grad_test[0][0] * grad_u[0][0] +
476 2.0 * E_grad_test[1][1] * grad_u[1][1] +
477 2.0 * E_grad_test[2][2] * grad_u[2][2] ) *
478 fabs( J_det ) * qw_array[q] +
479 dst_array[dimi][w][node_idx];
480 };
481 };
482 if ( dimi == dimj &&
483 ( diagonal_ || treat_boundary_ && ( r_cell == 0 || r_cell + 1 == max_rad ) ) )
484 {
485 int node_idx;
486 for ( node_idx = surface_shift; node_idx < 6 - cmb_shift; node_idx += 1 )
487 {
488 double E_grad_test[3][3] = { 0 };
489 E_grad_test[0][dimj] = scalar_grad[node_idx][0];
490 E_grad_test[1][dimj] = scalar_grad[node_idx][1];
491 E_grad_test[2][dimj] = scalar_grad[node_idx][2];
492
493 double grad_u_diag[3][3] = { 0 };
494 double tmpcse_symgrad_test_0 = 0.5 * E_grad_test[0][1] + 0.5 * E_grad_test[1][0];
495 double tmpcse_symgrad_test_1 = 0.5 * E_grad_test[0][2] + 0.5 * E_grad_test[2][0];
496 double tmpcse_symgrad_test_2 = 0.5 * E_grad_test[1][2] + 0.5 * E_grad_test[2][1];
497 grad_u_diag[0][0] = 1.0 * E_grad_test[0][0] * src_local_hex[dimj][w][node_idx];
498 grad_u_diag[0][1] = tmpcse_symgrad_test_0 * src_local_hex[dimj][w][node_idx];
499 grad_u_diag[0][2] = tmpcse_symgrad_test_1 * src_local_hex[dimj][w][node_idx];
500 grad_u_diag[1][0] = tmpcse_symgrad_test_0 * src_local_hex[dimj][w][node_idx];
501 grad_u_diag[1][1] = 1.0 * E_grad_test[1][1] * src_local_hex[dimj][w][node_idx];
502 grad_u_diag[1][2] = tmpcse_symgrad_test_2 * src_local_hex[dimj][w][node_idx];
503 grad_u_diag[2][0] = tmpcse_symgrad_test_1 * src_local_hex[dimj][w][node_idx];
504 grad_u_diag[2][1] = tmpcse_symgrad_test_2 * src_local_hex[dimj][w][node_idx];
505 grad_u_diag[2][2] = 1.0 * E_grad_test[2][2] * src_local_hex[dimj][w][node_idx];
506 double tmpcse_pairing_0 = 4 * src_local_hex[dimj][w][node_idx];
507 double tmpcse_pairing_1 = 2.0 * src_local_hex[dimj][w][node_idx];
508 dst_array[dimi][w][node_idx] =
509 k_eval *
510 ( tmpcse_pairing_0 * pow( tmpcse_symgrad_test_0, 2 ) +
511 tmpcse_pairing_0 * pow( tmpcse_symgrad_test_1, 2 ) +
512 tmpcse_pairing_0 * pow( tmpcse_symgrad_test_2, 2 ) +
513 tmpcse_pairing_1 * pow( E_grad_test[0][0], 2 ) +
514 tmpcse_pairing_1 * pow( E_grad_test[1][1], 2 ) +
515 tmpcse_pairing_1 * pow( E_grad_test[2][2], 2 ) -
516 0.66666666666666663 * E_grad_test[dimi][dimi] * E_grad_test[dimj][dimj] *
517 src_local_hex[dimj][w][node_idx] ) *
518 fabs( J_det ) * qw_array[q] +
519 dst_array[dimi][w][node_idx];
520 };
521 };
522 };
523 };
524 };
525 };
526 int dim_add;
527 for ( dim_add = 0; dim_add < 3; dim_add += 1 )
528 {
529 Kokkos::atomic_add(
530 &dst_( local_subdomain_id, x_cell, y_cell, r_cell, dim_add ), dst_array[dim_add][0][0] );
531 Kokkos::atomic_add(
532 &dst_( local_subdomain_id, x_cell + 1, y_cell, r_cell, dim_add ),
533 dst_array[dim_add][0][1] + dst_array[dim_add][1][2] );
534 Kokkos::atomic_add(
535 &dst_( local_subdomain_id, x_cell, y_cell + 1, r_cell, dim_add ),
536 dst_array[dim_add][0][2] + dst_array[dim_add][1][1] );
537 Kokkos::atomic_add(
538 &dst_( local_subdomain_id, x_cell, y_cell, r_cell + 1, dim_add ), dst_array[dim_add][0][3] );
539 Kokkos::atomic_add(
540 &dst_( local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim_add ),
541 dst_array[dim_add][0][4] + dst_array[dim_add][1][5] );
542 Kokkos::atomic_add(
543 &dst_( local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim_add ),
544 dst_array[dim_add][0][5] + dst_array[dim_add][1][4] );
545 Kokkos::atomic_add(
546 &dst_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell, dim_add ), dst_array[dim_add][1][0] );
547 Kokkos::atomic_add(
548 &dst_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1, dim_add ),
549 dst_array[dim_add][1][3] );
550 };
551 }
552 else
553 {
554 // load LMatrix for both local wedges
555 A[0] = lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, 0 );
556 A[1] = lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, 1 );
557 }
558
559 if ( diagonal_ )
560 {
561 A[0] = A[0].diagonal();
562 A[1] = A[1].diagonal();
563 }
564
565 if ( storeLMatrices_ )
566 {
567 // write local matrices to mem
568 lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, 0 ) = A[0];
569 lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, 1 ) = A[1];
570 }
571 else
572 {
574 for ( int dimj = 0; dimj < 3; dimj++ )
575 {
578 src_d, local_subdomain_id, x_cell, y_cell, r_cell, dimj, src_ );
579
580 for ( int wedge = 0; wedge < num_wedges_per_hex_cell; wedge++ )
581 {
582 for ( int i = 0; i < num_nodes_per_wedge; i++ )
583 {
584 src[wedge]( dimj * num_nodes_per_wedge + i ) = src_d[wedge]( i );
585 }
586 }
587 }
588 //extract_local_wedge_vector_coefficients( src, local_subdomain_id, x_cell, y_cell, r_cell, dimj, src_ );
589
591
592 dst[0] = A[0] * src[0];
593 dst[1] = A[1] * src[1];
594
595 //atomically_add_local_wedge_vector_coefficients( dst_, local_subdomain_id, x_cell, y_cell, r_cell, dimi, dst );
596 for ( int dimi = 0; dimi < 3; dimi++ )
597 {
599 dst_d[0] = dst[0].template slice< 6 >( dimi * num_nodes_per_wedge );
600 dst_d[1] = dst[1].template slice< 6 >( dimi * num_nodes_per_wedge );
601
603 dst_, local_subdomain_id, x_cell, y_cell, r_cell, dimi, dst_d );
604 }
605 }
606 }
607};
608
611
612} // namespace terra::fe::wedge::operators::shell::epsdivdiv_history
Send and receive buffers for all process-local subdomain boundaries.
Definition communication.hpp:56
void set_lmatrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge, const int dimi, const int dimj, dense::Mat< ScalarT, 6, 6 > mat) const
Set the local matrix stored in the operator.
Definition epsilon_divdiv_kerngen_v01_initial.hpp:128
void set_operator_apply_and_communication_modes(const linalg::OperatorApplyMode operator_apply_mode, const linalg::OperatorCommunicationMode operator_communication_mode)
Definition epsilon_divdiv_kerngen_v01_initial.hpp:178
ScalarT ScalarType
Definition epsilon_divdiv_kerngen_v01_initial.hpp:28
const grid::shell::DistributedDomain & get_domain() const
Getter for domain member.
Definition epsilon_divdiv_kerngen_v01_initial.hpp:94
grid::Grid2DDataScalar< ScalarT > get_radii() const
Getter for radii member.
Definition epsilon_divdiv_kerngen_v01_initial.hpp:97
grid::Grid3DDataVec< ScalarT, 3 > get_grid() const
Getter for grid member.
Definition epsilon_divdiv_kerngen_v01_initial.hpp:100
terra::grid::Grid4DDataMatrices< ScalarType, 18, 18, 2 > Grid4DDataLocalMatrices
Definition epsilon_divdiv_kerngen_v01_initial.hpp:29
EpsilonDivDivKerngenV01Initial(const grid::shell::DistributedDomain &domain, const grid::Grid3DDataVec< ScalarT, 3 > &grid, const grid::Grid2DDataScalar< ScalarT > &radii, const grid::Grid4DDataScalar< grid::shell::ShellBoundaryFlag > &mask, const grid::Grid4DDataScalar< ScalarT > &k, BoundaryConditions bcs, bool diagonal, linalg::OperatorApplyMode operator_apply_mode=linalg::OperatorApplyMode::Replace, linalg::OperatorCommunicationMode operator_communication_mode=linalg::OperatorCommunicationMode::CommunicateAdditively, linalg::OperatorStoredMatrixMode operator_stored_matrix_mode=linalg::OperatorStoredMatrixMode::Off)
Definition epsilon_divdiv_kerngen_v01_initial.hpp:61
void setApplyStoredLMatrices(bool v)
Setter/Getter for app applyStoredLMatrices_: usage of stored local matrices during apply.
Definition epsilon_divdiv_kerngen_v01_initial.hpp:150
void set_diagonal(bool v)
S/Getter for diagonal member.
Definition epsilon_divdiv_kerngen_v01_initial.hpp:153
dense::Mat< ScalarT, 6, 6 > get_lmatrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge, const int dimi, const int dimj) const
Retrives the local matrix stored in the operator.
Definition epsilon_divdiv_kerngen_v01_initial.hpp:104
void operator()(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell) const
Definition epsilon_divdiv_kerngen_v01_initial.hpp:227
const grid::Grid4DDataScalar< ScalarType > & k_grid_data()
Definition epsilon_divdiv_kerngen_v01_initial.hpp:91
void store_lmatrices()
allocates memory for the local matrices calls kernel with storeLMatrices_ = true to assemble and stor...
Definition epsilon_divdiv_kerngen_v01_initial.hpp:159
void apply_impl(const SrcVectorType &src, DstVectorType &dst)
Definition epsilon_divdiv_kerngen_v01_initial.hpp:186
Parallel data structure organizing the thick spherical shell metadata for distributed (MPI parallel) ...
Definition spherical_shell.hpp:2518
const std::map< SubdomainInfo, std::tuple< LocalSubdomainIdx, SubdomainNeighborhood > > & subdomains() const
Definition spherical_shell.hpp:2650
const DomainInfo & domain_info() const
Returns a const reference.
Definition spherical_shell.hpp:2647
int subdomain_num_nodes_radially() const
Equivalent to calling subdomain_num_nodes_radially( subdomain_refinement_level() )
Definition spherical_shell.hpp:861
int subdomain_num_nodes_per_side_laterally() const
Equivalent to calling subdomain_num_nodes_per_side_laterally( subdomain_refinement_level() )
Definition spherical_shell.hpp:852
Static assertion: VectorQ1Scalar satisfies VectorLike concept.
Definition vector_q1.hpp:168
const grid::Grid4DDataVec< ScalarType, VecDim > & grid_data() const
Get const reference to grid data.
Definition vector_q1.hpp:288
Timer supporting RAII scope or manual stop.
Definition timer.hpp:342
void stop()
Stop the timer and record elapsed time.
Definition timer.hpp:364
Concept for types that behave like linear operators.
Definition operator.hpp:57
void unpack_and_reduce_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers, CommunicationReduction reduction=CommunicationReduction::SUM)
Unpacks and reduces local subdomain boundaries.
Definition communication.hpp:672
void pack_send_and_recv_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_send_buffers, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers)
Packs, sends and recvs local subdomain boundaries using two sets of buffers.
Definition communication.hpp:242
Definition epsilon_divdiv_kerngen_v01_initial.hpp:16
void atomically_add_local_wedge_vector_coefficients(const grid::Grid4DDataVec< T, VecDim > &global_coefficients, const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int d, const dense::Vec< T, 6 > local_coefficients[2])
Performs an atomic add of the two local wedge coefficient vectors of a hex cell into the global coeff...
Definition kernel_helpers.hpp:465
constexpr int num_wedges_per_hex_cell
Definition kernel_helpers.hpp:5
void extract_local_wedge_vector_coefficients(dense::Vec< T, 6 >(&local_coefficients)[2], const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int d, const grid::Grid4DDataVec< T, VecDim > &global_coefficients)
Extracts the local vector coefficients for the two wedges of a hex cell from the global coefficient v...
Definition kernel_helpers.hpp:356
constexpr int num_nodes_per_wedge
Definition kernel_helpers.hpp:7
BoundaryConditionMapping[2] BoundaryConditions
Definition shell/bit_masks.hpp:37
ShellBoundaryFlag
FlagLike that indicates boundary types for the thick spherical shell.
Definition shell/bit_masks.hpp:12
Kokkos::MDRangePolicy< Kokkos::Rank< 4 > > local_domain_md_range_policy_cells(const DistributedDomain &distributed_domain)
Definition spherical_shell.hpp:2739
BoundaryConditionFlag
FlagLike that indicates the type of boundary condition
Definition shell/bit_masks.hpp:25
Kokkos::View< dense::Mat< ScalarType, Rows, Cols > ****[NumMatrices], Layout > Grid4DDataMatrices
Definition grid_types.hpp:173
Kokkos::View< ScalarType ***[VecDim], Layout > Grid3DDataVec
Definition grid_types.hpp:42
Kokkos::View< ScalarType ****, Layout > Grid4DDataScalar
Definition grid_types.hpp:27
Kokkos::View< ScalarType **, Layout > Grid2DDataScalar
Definition grid_types.hpp:21
OperatorApplyMode
Modes for applying an operator to a vector.
Definition operator.hpp:30
@ Replace
Overwrite the destination vector.
OperatorStoredMatrixMode
Modes for applying stored matrices.
Definition operator.hpp:47
@ Off
Do not use stored matrices.
OperatorCommunicationMode
Modes for communication during operator application.
Definition operator.hpp:40
@ CommunicateAdditively
Communicate and add results.
Definition mat.hpp:10
constexpr Mat diagonal() const
Definition mat.hpp:377
Definition vec.hpp:9
SoA (Structure-of-Arrays) 4D vector grid data.
Definition grid_types.hpp:51
auto extent(int i) const
Definition grid_types.hpp:75