Loading...
Searching...
No Matches
epsilon_divdiv_kerngen_v02_split_dimij.hpp
Go to the documentation of this file.
1#pragma once
2
3#include "../../../quadrature/quadrature.hpp"
5#include "dense/vec.hpp"
9#include "linalg/operator.hpp"
12#include "linalg/vector.hpp"
13#include "linalg/vector_q1.hpp"
14#include "util/timer.hpp"
15
17
21
22template < typename ScalarT, int VecDim = 3 >
24{
25 public:
28 using ScalarType = ScalarT;
30
31 private:
32 bool storeLMatrices_ =
33 false; // set to let apply_impl() know, that it should store the local matrices after assembling them
34 bool applyStoredLMatrices_ =
35 false; // set to make apply_impl() load and use the stored LMatrices for the operator application
36 Grid4DDataLocalMatrices lmatrices_;
37 bool single_quadpoint_ = true;
38
40
45 BoundaryConditions bcs_;
46
47 bool treat_boundary_;
48 bool diagonal_;
49
50 linalg::OperatorApplyMode operator_apply_mode_;
51 linalg::OperatorCommunicationMode operator_communication_mode_;
52 linalg::OperatorStoredMatrixMode operator_stored_matrix_mode_;
53
56
59
60 public:
67 BoundaryConditions bcs,
68 bool diagonal,
70 linalg::OperatorCommunicationMode operator_communication_mode =
73 : domain_( domain )
74 , grid_( grid )
75 , radii_( radii )
76 , k_( k )
77 , mask_( mask )
78 , treat_boundary_( true )
79 , diagonal_( diagonal )
80 , operator_apply_mode_( operator_apply_mode )
81 , operator_communication_mode_( operator_communication_mode )
82 , operator_stored_matrix_mode_( operator_stored_matrix_mode )
83 // TODO: we can reuse the send and recv buffers and pass in from the outside somehow
84 , send_buffers_( domain )
85 , recv_buffers_( domain )
86 {
87 bcs_[0] = bcs[0];
88 bcs_[1] = bcs[1];
89 }
90
92
93 /// @brief Getter for domain member
94 const grid::shell::DistributedDomain& get_domain() const { return domain_; }
95
96 /// @brief Getter for radii member
98
99 /// @brief Getter for grid member
101
102 /// @brief Retrives the local matrix stored in the operator
103 KOKKOS_INLINE_FUNCTION
105 const int local_subdomain_id,
106 const int x_cell,
107 const int y_cell,
108 const int r_cell,
109 const int wedge,
110 const int dimi,
111 const int dimj ) const
112 {
113 assert( lmatrices_.data() != nullptr );
115 for ( int i = 0; i < 6; ++i )
116 {
117 for ( int j = 0; j < 6; ++j )
118 {
119 ijslice( i, j ) =
120 lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, wedge )( i + dimi * 6, j + dimj * 6 );
121 }
122 }
123 return ijslice;
124 }
125
126 /// @brief Set the local matrix stored in the operator
127 KOKKOS_INLINE_FUNCTION
129 const int local_subdomain_id,
130 const int x_cell,
131 const int y_cell,
132 const int r_cell,
133 const int wedge,
134 const int dimi,
135 const int dimj,
137 {
138 assert( lmatrices_.data() != nullptr );
139 for ( int i = 0; i < 6; ++i )
140 {
141 for ( int j = 0; j < 6; ++j )
142 {
143 lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, wedge )( i + dimi * 6, j + dimj * 6 ) =
144 mat( i, j );
145 }
146 }
147 }
148
149 /// @brief Setter/Getter for app applyStoredLMatrices_: usage of stored local matrices during apply
150 void setApplyStoredLMatrices( bool v ) { applyStoredLMatrices_ = v; }
151
152 /// @brief S/Getter for diagonal member
153 void set_diagonal( bool v ) { diagonal_ = v; }
154
155 /// @brief
156 /// allocates memory for the local matrices
157 /// calls kernel with storeLMatrices_ = true to assemble and store the local matrices
158 /// sets applyStoredLMatrices_, such that future applies use the stored local matrices
160 {
161 storeLMatrices_ = true;
162 if ( lmatrices_.data() == nullptr )
163 {
164 lmatrices_ = Grid4DDataLocalMatrices(
165 "LaplaceSimple::lmatrices_",
166 domain_.subdomains().size(),
170 Kokkos::parallel_for(
171 "assemble_store_lmatrices", grid::shell::local_domain_md_range_policy_cells( domain_ ), *this );
172 Kokkos::fence();
173 }
174 storeLMatrices_ = false;
175 applyStoredLMatrices_ = true;
176 }
177
179 const linalg::OperatorApplyMode operator_apply_mode,
180 const linalg::OperatorCommunicationMode operator_communication_mode )
181 {
182 operator_apply_mode_ = operator_apply_mode;
183 operator_communication_mode_ = operator_communication_mode;
184 }
185
186 void apply_impl( const SrcVectorType& src, DstVectorType& dst )
187 {
188 util::Timer timer_apply( "vector_laplace_apply" );
189 if ( storeLMatrices_ or applyStoredLMatrices_ )
190 assert( lmatrices_.data() != nullptr );
191
192 if ( operator_apply_mode_ == linalg::OperatorApplyMode::Replace )
193 {
194 assign( dst, 0 );
195 }
196
197 src_ = src.grid_data();
198 dst_ = dst.grid_data();
199
200 if ( src_.extent( 0 ) != dst_.extent( 0 ) || src_.extent( 1 ) != dst_.extent( 1 ) ||
201 src_.extent( 2 ) != dst_.extent( 2 ) || src_.extent( 3 ) != dst_.extent( 3 ) )
202 {
203 throw std::runtime_error( "VectorLaplace: src/dst mismatch" );
204 }
205
206 if ( src_.extent( 1 ) != grid_.extent( 1 ) || src_.extent( 2 ) != grid_.extent( 2 ) )
207 {
208 throw std::runtime_error( "VectorLaplace: src/dst mismatch" );
209 }
210
211 util::Timer timer_kernel( "vector_laplace_kernel" );
212 Kokkos::parallel_for( "matvec", grid::shell::local_domain_md_range_policy_cells( domain_ ), *this );
213 Kokkos::fence();
214 timer_kernel.stop();
215
216 if ( operator_communication_mode_ == linalg::OperatorCommunicationMode::CommunicateAdditively )
217 {
218 util::Timer timer_comm( "vector_laplace_comm" );
219
221 domain_, dst_, send_buffers_, recv_buffers_ );
223 }
224 }
225
226 KOKKOS_INLINE_FUNCTION void
227 operator()( const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell ) const
228 {
229 // Compute the local element matrix.
231
232 if ( !applyStoredLMatrices_ )
233 {
234
235double wedge_surf_phy_coords[2][3][3];
236double quad_surface_coords[2][2][3];
237quad_surface_coords[0][0][0] = grid_(local_subdomain_id, x_cell, y_cell, 0);
238quad_surface_coords[0][0][1] = grid_(local_subdomain_id, x_cell, y_cell, 1);
239quad_surface_coords[0][0][2] = grid_(local_subdomain_id, x_cell, y_cell, 2);
240quad_surface_coords[0][1][0] = grid_(local_subdomain_id, x_cell, y_cell + 1, 0);
241quad_surface_coords[0][1][1] = grid_(local_subdomain_id, x_cell, y_cell + 1, 1);
242quad_surface_coords[0][1][2] = grid_(local_subdomain_id, x_cell, y_cell + 1, 2);
243quad_surface_coords[1][0][0] = grid_(local_subdomain_id, x_cell + 1, y_cell, 0);
244quad_surface_coords[1][0][1] = grid_(local_subdomain_id, x_cell + 1, y_cell, 1);
245quad_surface_coords[1][0][2] = grid_(local_subdomain_id, x_cell + 1, y_cell, 2);
246quad_surface_coords[1][1][0] = grid_(local_subdomain_id, x_cell + 1, y_cell + 1, 0);
247quad_surface_coords[1][1][1] = grid_(local_subdomain_id, x_cell + 1, y_cell + 1, 1);
248quad_surface_coords[1][1][2] = grid_(local_subdomain_id, x_cell + 1, y_cell + 1, 2);
249wedge_surf_phy_coords[0][0][0] = quad_surface_coords[0][0][0];
250wedge_surf_phy_coords[0][0][1] = quad_surface_coords[0][0][1];
251wedge_surf_phy_coords[0][0][2] = quad_surface_coords[0][0][2];
252wedge_surf_phy_coords[0][1][0] = quad_surface_coords[1][0][0];
253wedge_surf_phy_coords[0][1][1] = quad_surface_coords[1][0][1];
254wedge_surf_phy_coords[0][1][2] = quad_surface_coords[1][0][2];
255wedge_surf_phy_coords[0][2][0] = quad_surface_coords[0][1][0];
256wedge_surf_phy_coords[0][2][1] = quad_surface_coords[0][1][1];
257wedge_surf_phy_coords[0][2][2] = quad_surface_coords[0][1][2];
258wedge_surf_phy_coords[1][0][0] = quad_surface_coords[1][1][0];
259wedge_surf_phy_coords[1][0][1] = quad_surface_coords[1][1][1];
260wedge_surf_phy_coords[1][0][2] = quad_surface_coords[1][1][2];
261wedge_surf_phy_coords[1][1][0] = quad_surface_coords[0][1][0];
262wedge_surf_phy_coords[1][1][1] = quad_surface_coords[0][1][1];
263wedge_surf_phy_coords[1][1][2] = quad_surface_coords[0][1][2];
264wedge_surf_phy_coords[1][2][0] = quad_surface_coords[1][0][0];
265wedge_surf_phy_coords[1][2][1] = quad_surface_coords[1][0][1];
266wedge_surf_phy_coords[1][2][2] = quad_surface_coords[1][0][2];
267double r_0 = radii_(local_subdomain_id, r_cell);
268double r_1 = radii_(local_subdomain_id, r_cell + 1);
269double src_local_hex[3][2][6];
270int dim;
271for (dim = 0; dim < 3; dim += 1) {
272 src_local_hex[dim][0][0] = src_(local_subdomain_id, x_cell, y_cell, r_cell, dim);
273 src_local_hex[dim][0][1] = src_(local_subdomain_id, x_cell + 1, y_cell, r_cell, dim);
274 src_local_hex[dim][0][2] = src_(local_subdomain_id, x_cell, y_cell + 1, r_cell, dim);
275 src_local_hex[dim][0][3] = src_(local_subdomain_id, x_cell, y_cell, r_cell + 1, dim);
276 src_local_hex[dim][0][4] = src_(local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim);
277 src_local_hex[dim][0][5] = src_(local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim);
278 src_local_hex[dim][1][0] = src_(local_subdomain_id, x_cell + 1, y_cell + 1, r_cell, dim);
279 src_local_hex[dim][1][1] = src_(local_subdomain_id, x_cell, y_cell + 1, r_cell, dim);
280 src_local_hex[dim][1][2] = src_(local_subdomain_id, x_cell + 1, y_cell, r_cell, dim);
281 src_local_hex[dim][1][3] = src_(local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1, dim);
282 src_local_hex[dim][1][4] = src_(local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim);
283 src_local_hex[dim][1][5] = src_(local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim);
284};
285double k_local_hex[2][6];
286k_local_hex[0][0] = k_(local_subdomain_id, x_cell, y_cell, r_cell);
287k_local_hex[0][1] = k_(local_subdomain_id, x_cell + 1, y_cell, r_cell);
288k_local_hex[0][2] = k_(local_subdomain_id, x_cell, y_cell + 1, r_cell);
289k_local_hex[0][3] = k_(local_subdomain_id, x_cell, y_cell, r_cell + 1);
290k_local_hex[0][4] = k_(local_subdomain_id, x_cell + 1, y_cell, r_cell + 1);
291k_local_hex[0][5] = k_(local_subdomain_id, x_cell, y_cell + 1, r_cell + 1);
292k_local_hex[1][0] = k_(local_subdomain_id, x_cell + 1, y_cell + 1, r_cell);
293k_local_hex[1][1] = k_(local_subdomain_id, x_cell, y_cell + 1, r_cell);
294k_local_hex[1][2] = k_(local_subdomain_id, x_cell + 1, y_cell, r_cell);
295k_local_hex[1][3] = k_(local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1);
296k_local_hex[1][4] = k_(local_subdomain_id, x_cell, y_cell + 1, r_cell + 1);
297k_local_hex[1][5] = k_(local_subdomain_id, x_cell + 1, y_cell, r_cell + 1);
298double qp_array[6][3];
299double qw_array[6];
300qp_array[0][0] = 0.66666666666666663;
301qp_array[1][0] = 0.16666666666666671;
302qp_array[2][0] = 0.16666666666666671;
303qp_array[3][0] = 0.66666666666666663;
304qp_array[4][0] = 0.16666666666666671;
305qp_array[5][0] = 0.16666666666666671;
306qp_array[0][1] = 0.16666666666666671;
307qp_array[1][1] = 0.66666666666666663;
308qp_array[2][1] = 0.16666666666666671;
309qp_array[3][1] = 0.16666666666666671;
310qp_array[4][1] = 0.66666666666666663;
311qp_array[5][1] = 0.16666666666666671;
312qp_array[0][2] = -0.57735026918962573;
313qp_array[1][2] = -0.57735026918962573;
314qp_array[2][2] = -0.57735026918962573;
315qp_array[3][2] = 0.57735026918962573;
316qp_array[4][2] = 0.57735026918962573;
317qp_array[5][2] = 0.57735026918962573;
318qw_array[0] = 0.16666666666666671;
319qw_array[1] = 0.16666666666666671;
320qw_array[2] = 0.16666666666666671;
321qw_array[3] = 0.16666666666666671;
322qw_array[4] = 0.16666666666666671;
323qw_array[5] = 0.16666666666666671;
324int cmb_shift = ((treat_boundary_ && diagonal_ == false && r_cell == 0) ? (
325 3
326)
327: (
328 0
329));
330int max_rad = radii_.extent(1) - 1;
331int surface_shift = ((treat_boundary_ && diagonal_ == false && max_rad == r_cell + 1) ? (
332 3
333)
334: (
335 0
336));
337double dst_array[3][2][6] = {0};
338int w = 0;
339for (w = 0; w < 2; w += 1) {
340 int q = 0;
341 for (q = 0; q < 6; q += 1) {
342 /* Coefficient evaluation on current wedge w */;
343 double tmpcse_k_eval_0 = (1.0/2.0)*qp_array[q][2];
344 double tmpcse_k_eval_1 = 1.0/2.0 - tmpcse_k_eval_0;
345 double tmpcse_k_eval_2 = tmpcse_k_eval_0 + 1.0/2.0;
346 double tmpcse_k_eval_3 = -qp_array[q][0] - qp_array[q][1] + 1;
347 double k_eval = tmpcse_k_eval_1*tmpcse_k_eval_3*k_local_hex[w][0] + tmpcse_k_eval_1*k_local_hex[w][1]*qp_array[q][0] + tmpcse_k_eval_1*k_local_hex[w][2]*qp_array[q][1] + tmpcse_k_eval_2*tmpcse_k_eval_3*k_local_hex[w][3] + tmpcse_k_eval_2*k_local_hex[w][4]*qp_array[q][0] + tmpcse_k_eval_2*k_local_hex[w][5]*qp_array[q][1];
348 /* Computation + Inversion of the Jacobian */;
349 double tmpcse_J_0 = -1.0/2.0*r_0 + (1.0/2.0)*r_1;
350 double tmpcse_J_1 = r_0 + tmpcse_J_0*(qp_array[q][2] + 1);
351 double tmpcse_J_2 = -qp_array[q][0] - qp_array[q][1] + 1;
352 double J_0_0 = tmpcse_J_1*(-wedge_surf_phy_coords[w][0][0] + wedge_surf_phy_coords[w][1][0]);
353 double J_0_1 = tmpcse_J_1*(-wedge_surf_phy_coords[w][0][0] + wedge_surf_phy_coords[w][2][0]);
354 double J_0_2 = tmpcse_J_0*(tmpcse_J_2*wedge_surf_phy_coords[w][0][0] + qp_array[q][0]*wedge_surf_phy_coords[w][1][0] + qp_array[q][1]*wedge_surf_phy_coords[w][2][0]);
355 double J_1_0 = tmpcse_J_1*(-wedge_surf_phy_coords[w][0][1] + wedge_surf_phy_coords[w][1][1]);
356 double J_1_1 = tmpcse_J_1*(-wedge_surf_phy_coords[w][0][1] + wedge_surf_phy_coords[w][2][1]);
357 double J_1_2 = tmpcse_J_0*(tmpcse_J_2*wedge_surf_phy_coords[w][0][1] + qp_array[q][0]*wedge_surf_phy_coords[w][1][1] + qp_array[q][1]*wedge_surf_phy_coords[w][2][1]);
358 double J_2_0 = tmpcse_J_1*(-wedge_surf_phy_coords[w][0][2] + wedge_surf_phy_coords[w][1][2]);
359 double J_2_1 = tmpcse_J_1*(-wedge_surf_phy_coords[w][0][2] + wedge_surf_phy_coords[w][2][2]);
360 double J_2_2 = tmpcse_J_0*(tmpcse_J_2*wedge_surf_phy_coords[w][0][2] + qp_array[q][0]*wedge_surf_phy_coords[w][1][2] + qp_array[q][1]*wedge_surf_phy_coords[w][2][2]);
361 double J_det = J_0_0*J_1_1*J_2_2 - J_0_0*J_1_2*J_2_1 - J_0_1*J_1_0*J_2_2 + J_0_1*J_1_2*J_2_0 + J_0_2*J_1_0*J_2_1 - J_0_2*J_1_1*J_2_0;
362 double tmpcse_J_invT_0 = 1.0/J_det;
363 double J_invT_cse_0_0 = tmpcse_J_invT_0*(J_1_1*J_2_2 - J_1_2*J_2_1);
364 double J_invT_cse_0_1 = tmpcse_J_invT_0*(-J_1_0*J_2_2 + J_1_2*J_2_0);
365 double J_invT_cse_0_2 = tmpcse_J_invT_0*(J_1_0*J_2_1 - J_1_1*J_2_0);
366 double J_invT_cse_1_0 = tmpcse_J_invT_0*(-J_0_1*J_2_2 + J_0_2*J_2_1);
367 double J_invT_cse_1_1 = tmpcse_J_invT_0*(J_0_0*J_2_2 - J_0_2*J_2_0);
368 double J_invT_cse_1_2 = tmpcse_J_invT_0*(-J_0_0*J_2_1 + J_0_1*J_2_0);
369 double J_invT_cse_2_0 = tmpcse_J_invT_0*(J_0_1*J_1_2 - J_0_2*J_1_1);
370 double J_invT_cse_2_1 = tmpcse_J_invT_0*(-J_0_0*J_1_2 + J_0_2*J_1_0);
371 double J_invT_cse_2_2 = tmpcse_J_invT_0*(J_0_0*J_1_1 - J_0_1*J_1_0);
372
373 double scalar_grad[6][3] = {0};
374 double tmpcse_grad_i_0 = (1.0/2.0)*qp_array[q][2];
375 double tmpcse_grad_i_1 = tmpcse_grad_i_0 - 1.0/2.0;
376 double tmpcse_grad_i_2 = (1.0/2.0)*qp_array[q][0];
377 double tmpcse_grad_i_3 = (1.0/2.0)*qp_array[q][1];
378 double tmpcse_grad_i_4 = tmpcse_grad_i_2 + tmpcse_grad_i_3 - 1.0/2.0;
379 double tmpcse_grad_i_5 = J_invT_cse_0_2*tmpcse_grad_i_2;
380 double tmpcse_grad_i_6 = -tmpcse_grad_i_1;
381 double tmpcse_grad_i_7 = J_invT_cse_1_2*tmpcse_grad_i_2;
382 double tmpcse_grad_i_8 = J_invT_cse_2_2*tmpcse_grad_i_2;
383 double tmpcse_grad_i_9 = J_invT_cse_0_2*tmpcse_grad_i_3;
384 double tmpcse_grad_i_10 = J_invT_cse_1_2*tmpcse_grad_i_3;
385 double tmpcse_grad_i_11 = J_invT_cse_2_2*tmpcse_grad_i_3;
386 double tmpcse_grad_i_12 = tmpcse_grad_i_0 + 1.0/2.0;
387 double tmpcse_grad_i_13 = -tmpcse_grad_i_12;
388 double tmpcse_grad_i_14 = -tmpcse_grad_i_4;
389 scalar_grad[0][0] = J_invT_cse_0_0*tmpcse_grad_i_1 + J_invT_cse_0_1*tmpcse_grad_i_1 + J_invT_cse_0_2*tmpcse_grad_i_4;
390 scalar_grad[0][1] = J_invT_cse_1_0*tmpcse_grad_i_1 + J_invT_cse_1_1*tmpcse_grad_i_1 + J_invT_cse_1_2*tmpcse_grad_i_4;
391 scalar_grad[0][2] = J_invT_cse_2_0*tmpcse_grad_i_1 + J_invT_cse_2_1*tmpcse_grad_i_1 + J_invT_cse_2_2*tmpcse_grad_i_4;
392 scalar_grad[1][0] = J_invT_cse_0_0*tmpcse_grad_i_6 - tmpcse_grad_i_5;
393 scalar_grad[1][1] = J_invT_cse_1_0*tmpcse_grad_i_6 - tmpcse_grad_i_7;
394 scalar_grad[1][2] = J_invT_cse_2_0*tmpcse_grad_i_6 - tmpcse_grad_i_8;
395 scalar_grad[2][0] = J_invT_cse_0_1*tmpcse_grad_i_6 - tmpcse_grad_i_9;
396 scalar_grad[2][1] = J_invT_cse_1_1*tmpcse_grad_i_6 - tmpcse_grad_i_10;
397 scalar_grad[2][2] = J_invT_cse_2_1*tmpcse_grad_i_6 - tmpcse_grad_i_11;
398 scalar_grad[3][0] = J_invT_cse_0_0*tmpcse_grad_i_13 + J_invT_cse_0_1*tmpcse_grad_i_13 + J_invT_cse_0_2*tmpcse_grad_i_14;
399 scalar_grad[3][1] = J_invT_cse_1_0*tmpcse_grad_i_13 + J_invT_cse_1_1*tmpcse_grad_i_13 + J_invT_cse_1_2*tmpcse_grad_i_14;
400 scalar_grad[3][2] = J_invT_cse_2_0*tmpcse_grad_i_13 + J_invT_cse_2_1*tmpcse_grad_i_13 + J_invT_cse_2_2*tmpcse_grad_i_14;
401 scalar_grad[4][0] = J_invT_cse_0_0*tmpcse_grad_i_12 + tmpcse_grad_i_5;
402 scalar_grad[4][1] = J_invT_cse_1_0*tmpcse_grad_i_12 + tmpcse_grad_i_7;
403 scalar_grad[4][2] = J_invT_cse_2_0*tmpcse_grad_i_12 + tmpcse_grad_i_8;
404 scalar_grad[5][0] = J_invT_cse_0_1*tmpcse_grad_i_12 + tmpcse_grad_i_9;
405 scalar_grad[5][1] = J_invT_cse_1_1*tmpcse_grad_i_12 + tmpcse_grad_i_10;
406 scalar_grad[5][2] = J_invT_cse_2_1*tmpcse_grad_i_12 + tmpcse_grad_i_11;
407 int dimj;
408
409 double grad_u[3][3] = {0};
410 double div_u = 0.0;
411 int node_idx;
412 for (dimj = 0; dimj < 3; dimj += 1) {
413 if (diagonal_ == false) {
414 for (node_idx = cmb_shift; node_idx < 6 - surface_shift; node_idx += 1) {
415
416 double E_grad_trial[3][3] = {0};
417 E_grad_trial[0][dimj] = scalar_grad[node_idx][0];
418 E_grad_trial[1][dimj] = scalar_grad[node_idx][1];
419 E_grad_trial[2][dimj] = scalar_grad[node_idx][2];
420 double tmpcse_symgrad_trial_0 = 0.5*E_grad_trial[0][1] + 0.5*E_grad_trial[1][0];
421 double tmpcse_symgrad_trial_1 = 0.5*E_grad_trial[0][2] + 0.5*E_grad_trial[2][0];
422 double tmpcse_symgrad_trial_2 = 0.5*E_grad_trial[1][2] + 0.5*E_grad_trial[2][1];
423 grad_u[0][0] = 1.0*E_grad_trial[0][0]*src_local_hex[dimj][w][node_idx] + grad_u[0][0];
424 grad_u[0][1] = tmpcse_symgrad_trial_0*src_local_hex[dimj][w][node_idx] + grad_u[0][1];
425 grad_u[0][2] = tmpcse_symgrad_trial_1*src_local_hex[dimj][w][node_idx] + grad_u[0][2];
426 grad_u[1][0] = tmpcse_symgrad_trial_0*src_local_hex[dimj][w][node_idx] + grad_u[1][0];
427 grad_u[1][1] = 1.0*E_grad_trial[1][1]*src_local_hex[dimj][w][node_idx] + grad_u[1][1];
428 grad_u[1][2] = tmpcse_symgrad_trial_2*src_local_hex[dimj][w][node_idx] + grad_u[1][2];
429 grad_u[2][0] = tmpcse_symgrad_trial_1*src_local_hex[dimj][w][node_idx] + grad_u[2][0];
430 grad_u[2][1] = tmpcse_symgrad_trial_2*src_local_hex[dimj][w][node_idx] + grad_u[2][1];
431 grad_u[2][2] = 1.0*E_grad_trial[2][2]*src_local_hex[dimj][w][node_idx] + grad_u[2][2];
432 div_u = div_u + E_grad_trial[dimj][dimj]*src_local_hex[dimj][w][node_idx];
433 };
434 };
435 };
436 int dimi;
437 for (dimi = 0; dimi < 3; dimi += 1) {
438 if (diagonal_ == false) {
439 for (node_idx = cmb_shift; node_idx < 6 - surface_shift; node_idx += 1) {
440
441 double E_grad_test[3][3] = {0};
442 E_grad_test[0][dimi] = scalar_grad[node_idx][0];
443 E_grad_test[1][dimi] = scalar_grad[node_idx][1];
444 E_grad_test[2][dimi] = scalar_grad[node_idx][2];
445 double tmpcse_symgrad_test_0 = 0.5*E_grad_test[0][1] + 0.5*E_grad_test[1][0];
446 double tmpcse_symgrad_test_1 = 0.5*E_grad_test[0][2] + 0.5*E_grad_test[2][0];
447 double tmpcse_symgrad_test_2 = 0.5*E_grad_test[1][2] + 0.5*E_grad_test[2][1];
448 double tmpcse_pairing_0 = 2*tmpcse_symgrad_test_0;
449 double tmpcse_pairing_1 = 2*tmpcse_symgrad_test_1;
450 double tmpcse_pairing_2 = 2*tmpcse_symgrad_test_2;
451 dst_array[dimi][w][node_idx] = k_eval*(-0.66666666666666663*div_u*E_grad_test[dimi][dimi] + tmpcse_pairing_0*grad_u[0][1] + tmpcse_pairing_0*grad_u[1][0] + tmpcse_pairing_1*grad_u[0][2] + tmpcse_pairing_1*grad_u[2][0] + tmpcse_pairing_2*grad_u[1][2] + tmpcse_pairing_2*grad_u[2][1] + 2.0*E_grad_test[0][0]*grad_u[0][0] + 2.0*E_grad_test[1][1]*grad_u[1][1] + 2.0*E_grad_test[2][2]*grad_u[2][2])*fabs(J_det)*qw_array[q] + dst_array[dimi][w][node_idx];
452 };
453 };
454 };
455 int dim_diagBC;
456 for (dim_diagBC = 0; dim_diagBC < 3; dim_diagBC += 1) {
457 if (diagonal_ || treat_boundary_ && (r_cell == 0 || r_cell + 1 == max_rad)) {
458 int node_idx;
459 for (node_idx = surface_shift; node_idx < 6 - cmb_shift; node_idx += 1) {
460
461 double E_grad_test[3][3] = {0};
462 E_grad_test[0][dim_diagBC] = scalar_grad[node_idx][0];
463 E_grad_test[1][dim_diagBC] = scalar_grad[node_idx][1];
464 E_grad_test[2][dim_diagBC] = scalar_grad[node_idx][2];
465
466 double grad_u_diag[3][3] = {0};
467 double tmpcse_symgrad_test_0 = 0.5*E_grad_test[0][1] + 0.5*E_grad_test[1][0];
468 double tmpcse_symgrad_test_1 = 0.5*E_grad_test[0][2] + 0.5*E_grad_test[2][0];
469 double tmpcse_symgrad_test_2 = 0.5*E_grad_test[1][2] + 0.5*E_grad_test[2][1];
470 grad_u_diag[0][0] = 1.0*E_grad_test[0][0]*src_local_hex[dim_diagBC][w][node_idx];
471 grad_u_diag[0][1] = tmpcse_symgrad_test_0*src_local_hex[dim_diagBC][w][node_idx];
472 grad_u_diag[0][2] = tmpcse_symgrad_test_1*src_local_hex[dim_diagBC][w][node_idx];
473 grad_u_diag[1][0] = tmpcse_symgrad_test_0*src_local_hex[dim_diagBC][w][node_idx];
474 grad_u_diag[1][1] = 1.0*E_grad_test[1][1]*src_local_hex[dim_diagBC][w][node_idx];
475 grad_u_diag[1][2] = tmpcse_symgrad_test_2*src_local_hex[dim_diagBC][w][node_idx];
476 grad_u_diag[2][0] = tmpcse_symgrad_test_1*src_local_hex[dim_diagBC][w][node_idx];
477 grad_u_diag[2][1] = tmpcse_symgrad_test_2*src_local_hex[dim_diagBC][w][node_idx];
478 grad_u_diag[2][2] = 1.0*E_grad_test[2][2]*src_local_hex[dim_diagBC][w][node_idx];
479 double tmpcse_pairing_0 = 4*src_local_hex[dim_diagBC][w][node_idx];
480 double tmpcse_pairing_1 = 2.0*src_local_hex[dim_diagBC][w][node_idx];
481 dst_array[dim_diagBC][w][node_idx] = k_eval*(tmpcse_pairing_0*pow(tmpcse_symgrad_test_0, 2) + tmpcse_pairing_0*pow(tmpcse_symgrad_test_1, 2) + tmpcse_pairing_0*pow(tmpcse_symgrad_test_2, 2) + tmpcse_pairing_1*pow(E_grad_test[0][0], 2) + tmpcse_pairing_1*pow(E_grad_test[1][1], 2) + tmpcse_pairing_1*pow(E_grad_test[2][2], 2) - 0.66666666666666663*pow(E_grad_test[dim_diagBC][dim_diagBC], 2)*src_local_hex[dim_diagBC][w][node_idx])*fabs(J_det)*qw_array[q] + dst_array[dim_diagBC][w][node_idx];
482 };
483 };
484 };
485 };
486};
487int dim_add;
488for (dim_add = 0; dim_add < 3; dim_add += 1) {
489 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell, y_cell, r_cell, dim_add), dst_array[dim_add][0][0]);
490 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell + 1, y_cell, r_cell, dim_add), dst_array[dim_add][0][1] + dst_array[dim_add][1][2]);
491 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell, y_cell + 1, r_cell, dim_add), dst_array[dim_add][0][2] + dst_array[dim_add][1][1]);
492 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell, y_cell, r_cell + 1, dim_add), dst_array[dim_add][0][3]);
493 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim_add), dst_array[dim_add][0][4] + dst_array[dim_add][1][5]);
494 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim_add), dst_array[dim_add][0][5] + dst_array[dim_add][1][4]);
495 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell + 1, y_cell + 1, r_cell, dim_add), dst_array[dim_add][1][0]);
496 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1, dim_add), dst_array[dim_add][1][3]);
497};
498 }
499 else
500 {
501 // load LMatrix for both local wedges
502 A[0] = lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, 0 );
503 A[1] = lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, 1 );
504 }
505
506 if ( diagonal_ )
507 {
508 A[0] = A[0].diagonal();
509 A[1] = A[1].diagonal();
510 }
511
512 if ( storeLMatrices_ )
513 {
514 // write local matrices to mem
515 lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, 0 ) = A[0];
516 lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, 1 ) = A[1];
517 }
518 else
519 {
521 for ( int dimj = 0; dimj < 3; dimj++ )
522 {
525 src_d, local_subdomain_id, x_cell, y_cell, r_cell, dimj, src_ );
526
527 for ( int wedge = 0; wedge < num_wedges_per_hex_cell; wedge++ )
528 {
529 for ( int i = 0; i < num_nodes_per_wedge; i++ )
530 {
531 src[wedge]( dimj * num_nodes_per_wedge + i ) = src_d[wedge]( i );
532 }
533 }
534 }
535 //extract_local_wedge_vector_coefficients( src, local_subdomain_id, x_cell, y_cell, r_cell, dimj, src_ );
536
538
539 dst[0] = A[0] * src[0];
540 dst[1] = A[1] * src[1];
541
542 //atomically_add_local_wedge_vector_coefficients( dst_, local_subdomain_id, x_cell, y_cell, r_cell, dimi, dst );
543 for ( int dimi = 0; dimi < 3; dimi++ )
544 {
546 dst_d[0] = dst[0].template slice< 6 >( dimi * num_nodes_per_wedge );
547 dst_d[1] = dst[1].template slice< 6 >( dimi * num_nodes_per_wedge );
548
550 dst_, local_subdomain_id, x_cell, y_cell, r_cell, dimi, dst_d );
551 }
552 }
553 }
554};
555
558
559} // namespace terra::fe::wedge::operators::shell::epsdivdiv_history
Send and receive buffers for all process-local subdomain boundaries.
Definition communication.hpp:56
void set_diagonal(bool v)
S/Getter for diagonal member.
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:153
void set_operator_apply_and_communication_modes(const linalg::OperatorApplyMode operator_apply_mode, const linalg::OperatorCommunicationMode operator_communication_mode)
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:178
terra::grid::Grid4DDataMatrices< ScalarType, 18, 18, 2 > Grid4DDataLocalMatrices
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:29
const grid::shell::DistributedDomain & get_domain() const
Getter for domain member.
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:94
dense::Mat< ScalarT, 6, 6 > get_lmatrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge, const int dimi, const int dimj) const
Retrives the local matrix stored in the operator.
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:104
void setApplyStoredLMatrices(bool v)
Setter/Getter for app applyStoredLMatrices_: usage of stored local matrices during apply.
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:150
ScalarT ScalarType
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:28
grid::Grid3DDataVec< ScalarT, 3 > get_grid() const
Getter for grid member.
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:100
void set_lmatrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge, const int dimi, const int dimj, dense::Mat< ScalarT, 6, 6 > mat) const
Set the local matrix stored in the operator.
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:128
grid::Grid2DDataScalar< ScalarT > get_radii() const
Getter for radii member.
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:97
void apply_impl(const SrcVectorType &src, DstVectorType &dst)
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:186
const grid::Grid4DDataScalar< ScalarType > & k_grid_data()
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:91
void store_lmatrices()
allocates memory for the local matrices calls kernel with storeLMatrices_ = true to assemble and stor...
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:159
EpsilonDivDivKerngenV02SplitDimij(const grid::shell::DistributedDomain &domain, const grid::Grid3DDataVec< ScalarT, 3 > &grid, const grid::Grid2DDataScalar< ScalarT > &radii, const grid::Grid4DDataScalar< grid::shell::ShellBoundaryFlag > &mask, const grid::Grid4DDataScalar< ScalarT > &k, BoundaryConditions bcs, bool diagonal, linalg::OperatorApplyMode operator_apply_mode=linalg::OperatorApplyMode::Replace, linalg::OperatorCommunicationMode operator_communication_mode=linalg::OperatorCommunicationMode::CommunicateAdditively, linalg::OperatorStoredMatrixMode operator_stored_matrix_mode=linalg::OperatorStoredMatrixMode::Off)
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:61
void operator()(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell) const
Definition epsilon_divdiv_kerngen_v02_split_dimij.hpp:227
Parallel data structure organizing the thick spherical shell metadata for distributed (MPI parallel) ...
Definition spherical_shell.hpp:2518
const std::map< SubdomainInfo, std::tuple< LocalSubdomainIdx, SubdomainNeighborhood > > & subdomains() const
Definition spherical_shell.hpp:2650
const DomainInfo & domain_info() const
Returns a const reference.
Definition spherical_shell.hpp:2647
int subdomain_num_nodes_radially() const
Equivalent to calling subdomain_num_nodes_radially( subdomain_refinement_level() )
Definition spherical_shell.hpp:861
int subdomain_num_nodes_per_side_laterally() const
Equivalent to calling subdomain_num_nodes_per_side_laterally( subdomain_refinement_level() )
Definition spherical_shell.hpp:852
Static assertion: VectorQ1Scalar satisfies VectorLike concept.
Definition vector_q1.hpp:168
const grid::Grid4DDataVec< ScalarType, VecDim > & grid_data() const
Get const reference to grid data.
Definition vector_q1.hpp:288
Timer supporting RAII scope or manual stop.
Definition timer.hpp:342
void stop()
Stop the timer and record elapsed time.
Definition timer.hpp:364
Concept for types that behave like linear operators.
Definition operator.hpp:57
void unpack_and_reduce_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers, CommunicationReduction reduction=CommunicationReduction::SUM)
Unpacks and reduces local subdomain boundaries.
Definition communication.hpp:672
void pack_send_and_recv_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_send_buffers, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers)
Packs, sends and recvs local subdomain boundaries using two sets of buffers.
Definition communication.hpp:242
Definition epsilon_divdiv_kerngen_v01_initial.hpp:16
void atomically_add_local_wedge_vector_coefficients(const grid::Grid4DDataVec< T, VecDim > &global_coefficients, const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int d, const dense::Vec< T, 6 > local_coefficients[2])
Performs an atomic add of the two local wedge coefficient vectors of a hex cell into the global coeff...
Definition kernel_helpers.hpp:465
constexpr int num_wedges_per_hex_cell
Definition kernel_helpers.hpp:5
void extract_local_wedge_vector_coefficients(dense::Vec< T, 6 >(&local_coefficients)[2], const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int d, const grid::Grid4DDataVec< T, VecDim > &global_coefficients)
Extracts the local vector coefficients for the two wedges of a hex cell from the global coefficient v...
Definition kernel_helpers.hpp:356
constexpr int num_nodes_per_wedge
Definition kernel_helpers.hpp:7
BoundaryConditionMapping[2] BoundaryConditions
Definition shell/bit_masks.hpp:37
ShellBoundaryFlag
FlagLike that indicates boundary types for the thick spherical shell.
Definition shell/bit_masks.hpp:12
Kokkos::MDRangePolicy< Kokkos::Rank< 4 > > local_domain_md_range_policy_cells(const DistributedDomain &distributed_domain)
Definition spherical_shell.hpp:2739
BoundaryConditionFlag
FlagLike that indicates the type of boundary condition
Definition shell/bit_masks.hpp:25
Kokkos::View< dense::Mat< ScalarType, Rows, Cols > ****[NumMatrices], Layout > Grid4DDataMatrices
Definition grid_types.hpp:173
Kokkos::View< ScalarType ***[VecDim], Layout > Grid3DDataVec
Definition grid_types.hpp:42
Kokkos::View< ScalarType ****, Layout > Grid4DDataScalar
Definition grid_types.hpp:27
Kokkos::View< ScalarType **, Layout > Grid2DDataScalar
Definition grid_types.hpp:21
OperatorApplyMode
Modes for applying an operator to a vector.
Definition operator.hpp:30
@ Replace
Overwrite the destination vector.
OperatorStoredMatrixMode
Modes for applying stored matrices.
Definition operator.hpp:47
@ Off
Do not use stored matrices.
OperatorCommunicationMode
Modes for communication during operator application.
Definition operator.hpp:40
@ CommunicateAdditively
Communicate and add results.
Definition mat.hpp:10
constexpr Mat diagonal() const
Definition mat.hpp:377
Definition vec.hpp:9
SoA (Structure-of-Arrays) 4D vector grid data.
Definition grid_types.hpp:51
auto extent(int i) const
Definition grid_types.hpp:75