Loading...
Searching...
No Matches
epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp
Go to the documentation of this file.
1#pragma once
2
3#include "../../../quadrature/quadrature.hpp"
5#include "dense/vec.hpp"
9#include "linalg/operator.hpp"
12#include "linalg/vector.hpp"
13#include "linalg/vector_q1.hpp"
14#include "util/timer.hpp"
15
17
21
22template < typename ScalarT, int VecDim = 3 >
24{
25 public:
28 using ScalarType = ScalarT;
30
31 private:
32 bool storeLMatrices_ =
33 false; // set to let apply_impl() know, that it should store the local matrices after assembling them
34 bool applyStoredLMatrices_ =
35 false; // set to make apply_impl() load and use the stored LMatrices for the operator application
36 Grid4DDataLocalMatrices lmatrices_;
37 bool single_quadpoint_ = true;
38
40
45 BoundaryConditions bcs_;
46
47 bool treat_boundary_;
48 bool diagonal_;
49
50 linalg::OperatorApplyMode operator_apply_mode_;
51 linalg::OperatorCommunicationMode operator_communication_mode_;
52 linalg::OperatorStoredMatrixMode operator_stored_matrix_mode_;
53
56
59
60 public:
67 BoundaryConditions bcs,
68 bool diagonal,
70 linalg::OperatorCommunicationMode operator_communication_mode =
73 : domain_( domain )
74 , grid_( grid )
75 , radii_( radii )
76 , k_( k )
77 , mask_( mask )
78 , treat_boundary_( true )
79 , diagonal_( diagonal )
80 , operator_apply_mode_( operator_apply_mode )
81 , operator_communication_mode_( operator_communication_mode )
82 , operator_stored_matrix_mode_( operator_stored_matrix_mode )
83 // TODO: we can reuse the send and recv buffers and pass in from the outside somehow
84 , send_buffers_( domain )
85 , recv_buffers_( domain )
86 {
87 bcs_[0] = bcs[0];
88 bcs_[1] = bcs[1];
89 }
90
92
93 /// @brief Getter for domain member
94 const grid::shell::DistributedDomain& get_domain() const { return domain_; }
95
96 /// @brief Getter for radii member
98
99 /// @brief Getter for grid member
101
102 /// @brief Retrives the local matrix stored in the operator
103 KOKKOS_INLINE_FUNCTION
105 const int local_subdomain_id,
106 const int x_cell,
107 const int y_cell,
108 const int r_cell,
109 const int wedge,
110 const int dimi,
111 const int dimj ) const
112 {
113 assert( lmatrices_.data() != nullptr );
115 for ( int i = 0; i < 6; ++i )
116 {
117 for ( int j = 0; j < 6; ++j )
118 {
119 ijslice( i, j ) =
120 lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, wedge )( i + dimi * 6, j + dimj * 6 );
121 }
122 }
123 return ijslice;
124 }
125
126 /// @brief Set the local matrix stored in the operator
127 KOKKOS_INLINE_FUNCTION
129 const int local_subdomain_id,
130 const int x_cell,
131 const int y_cell,
132 const int r_cell,
133 const int wedge,
134 const int dimi,
135 const int dimj,
137 {
138 assert( lmatrices_.data() != nullptr );
139 for ( int i = 0; i < 6; ++i )
140 {
141 for ( int j = 0; j < 6; ++j )
142 {
143 lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, wedge )( i + dimi * 6, j + dimj * 6 ) =
144 mat( i, j );
145 }
146 }
147 }
148
149 /// @brief Setter/Getter for app applyStoredLMatrices_: usage of stored local matrices during apply
150 void setApplyStoredLMatrices( bool v ) { applyStoredLMatrices_ = v; }
151
152 /// @brief S/Getter for diagonal member
153 void set_diagonal( bool v ) { diagonal_ = v; }
154
155 /// @brief
156 /// allocates memory for the local matrices
157 /// calls kernel with storeLMatrices_ = true to assemble and store the local matrices
158 /// sets applyStoredLMatrices_, such that future applies use the stored local matrices
160 {
161 storeLMatrices_ = true;
162 if ( lmatrices_.data() == nullptr )
163 {
164 lmatrices_ = Grid4DDataLocalMatrices(
165 "LaplaceSimple::lmatrices_",
166 domain_.subdomains().size(),
170 Kokkos::parallel_for(
171 "assemble_store_lmatrices", grid::shell::local_domain_md_range_policy_cells( domain_ ), *this );
172 Kokkos::fence();
173 }
174 storeLMatrices_ = false;
175 applyStoredLMatrices_ = true;
176 }
177
179 const linalg::OperatorApplyMode operator_apply_mode,
180 const linalg::OperatorCommunicationMode operator_communication_mode )
181 {
182 operator_apply_mode_ = operator_apply_mode;
183 operator_communication_mode_ = operator_communication_mode;
184 }
185
186 void apply_impl( const SrcVectorType& src, DstVectorType& dst )
187 {
188 util::Timer timer_apply( "vector_laplace_apply" );
189 if ( storeLMatrices_ or applyStoredLMatrices_ )
190 assert( lmatrices_.data() != nullptr );
191
192 if ( operator_apply_mode_ == linalg::OperatorApplyMode::Replace )
193 {
194 assign( dst, 0 );
195 }
196
197 src_ = src.grid_data();
198 dst_ = dst.grid_data();
199
200 if ( src_.extent( 0 ) != dst_.extent( 0 ) || src_.extent( 1 ) != dst_.extent( 1 ) ||
201 src_.extent( 2 ) != dst_.extent( 2 ) || src_.extent( 3 ) != dst_.extent( 3 ) )
202 {
203 throw std::runtime_error( "VectorLaplace: src/dst mismatch" );
204 }
205
206 if ( src_.extent( 1 ) != grid_.extent( 1 ) || src_.extent( 2 ) != grid_.extent( 2 ) )
207 {
208 throw std::runtime_error( "VectorLaplace: src/dst mismatch" );
209 }
210
211 util::Timer timer_kernel( "vector_laplace_kernel" );
212 Kokkos::parallel_for( "matvec", grid::shell::local_domain_md_range_policy_cells( domain_ ), *this );
213 Kokkos::fence();
214 timer_kernel.stop();
215
216 if ( operator_communication_mode_ == linalg::OperatorCommunicationMode::CommunicateAdditively )
217 {
218 util::Timer timer_comm( "vector_laplace_comm" );
219
221 domain_, dst_, send_buffers_, recv_buffers_ );
223 }
224 }
225
226 KOKKOS_INLINE_FUNCTION void
227 operator()( const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell ) const
228 {
229 // Compute the local element matrix.
231
232 if ( !applyStoredLMatrices_ )
233 {
234
235double wedge_surf_phy_coords[2][3][3];
236double quad_surface_coords[2][2][3];
237quad_surface_coords[0][0][0] = grid_(local_subdomain_id, x_cell, y_cell, 0);
238quad_surface_coords[0][0][1] = grid_(local_subdomain_id, x_cell, y_cell, 1);
239quad_surface_coords[0][0][2] = grid_(local_subdomain_id, x_cell, y_cell, 2);
240quad_surface_coords[0][1][0] = grid_(local_subdomain_id, x_cell, y_cell + 1, 0);
241quad_surface_coords[0][1][1] = grid_(local_subdomain_id, x_cell, y_cell + 1, 1);
242quad_surface_coords[0][1][2] = grid_(local_subdomain_id, x_cell, y_cell + 1, 2);
243quad_surface_coords[1][0][0] = grid_(local_subdomain_id, x_cell + 1, y_cell, 0);
244quad_surface_coords[1][0][1] = grid_(local_subdomain_id, x_cell + 1, y_cell, 1);
245quad_surface_coords[1][0][2] = grid_(local_subdomain_id, x_cell + 1, y_cell, 2);
246quad_surface_coords[1][1][0] = grid_(local_subdomain_id, x_cell + 1, y_cell + 1, 0);
247quad_surface_coords[1][1][1] = grid_(local_subdomain_id, x_cell + 1, y_cell + 1, 1);
248quad_surface_coords[1][1][2] = grid_(local_subdomain_id, x_cell + 1, y_cell + 1, 2);
249wedge_surf_phy_coords[0][0][0] = quad_surface_coords[0][0][0];
250wedge_surf_phy_coords[0][0][1] = quad_surface_coords[0][0][1];
251wedge_surf_phy_coords[0][0][2] = quad_surface_coords[0][0][2];
252wedge_surf_phy_coords[0][1][0] = quad_surface_coords[1][0][0];
253wedge_surf_phy_coords[0][1][1] = quad_surface_coords[1][0][1];
254wedge_surf_phy_coords[0][1][2] = quad_surface_coords[1][0][2];
255wedge_surf_phy_coords[0][2][0] = quad_surface_coords[0][1][0];
256wedge_surf_phy_coords[0][2][1] = quad_surface_coords[0][1][1];
257wedge_surf_phy_coords[0][2][2] = quad_surface_coords[0][1][2];
258wedge_surf_phy_coords[1][0][0] = quad_surface_coords[1][1][0];
259wedge_surf_phy_coords[1][0][1] = quad_surface_coords[1][1][1];
260wedge_surf_phy_coords[1][0][2] = quad_surface_coords[1][1][2];
261wedge_surf_phy_coords[1][1][0] = quad_surface_coords[0][1][0];
262wedge_surf_phy_coords[1][1][1] = quad_surface_coords[0][1][1];
263wedge_surf_phy_coords[1][1][2] = quad_surface_coords[0][1][2];
264wedge_surf_phy_coords[1][2][0] = quad_surface_coords[1][0][0];
265wedge_surf_phy_coords[1][2][1] = quad_surface_coords[1][0][1];
266wedge_surf_phy_coords[1][2][2] = quad_surface_coords[1][0][2];
267double r_0 = radii_(local_subdomain_id, r_cell);
268double r_1 = radii_(local_subdomain_id, r_cell + 1);
269double src_local_hex[3][2][6];
270int dim;
271for (dim = 0; dim < 3; dim += 1) {
272 src_local_hex[dim][0][0] = src_(local_subdomain_id, x_cell, y_cell, r_cell, dim);
273 src_local_hex[dim][0][1] = src_(local_subdomain_id, x_cell + 1, y_cell, r_cell, dim);
274 src_local_hex[dim][0][2] = src_(local_subdomain_id, x_cell, y_cell + 1, r_cell, dim);
275 src_local_hex[dim][0][3] = src_(local_subdomain_id, x_cell, y_cell, r_cell + 1, dim);
276 src_local_hex[dim][0][4] = src_(local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim);
277 src_local_hex[dim][0][5] = src_(local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim);
278 src_local_hex[dim][1][0] = src_(local_subdomain_id, x_cell + 1, y_cell + 1, r_cell, dim);
279 src_local_hex[dim][1][1] = src_(local_subdomain_id, x_cell, y_cell + 1, r_cell, dim);
280 src_local_hex[dim][1][2] = src_(local_subdomain_id, x_cell + 1, y_cell, r_cell, dim);
281 src_local_hex[dim][1][3] = src_(local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1, dim);
282 src_local_hex[dim][1][4] = src_(local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim);
283 src_local_hex[dim][1][5] = src_(local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim);
284};
285double k_local_hex[2][6];
286k_local_hex[0][0] = k_(local_subdomain_id, x_cell, y_cell, r_cell);
287k_local_hex[0][1] = k_(local_subdomain_id, x_cell + 1, y_cell, r_cell);
288k_local_hex[0][2] = k_(local_subdomain_id, x_cell, y_cell + 1, r_cell);
289k_local_hex[0][3] = k_(local_subdomain_id, x_cell, y_cell, r_cell + 1);
290k_local_hex[0][4] = k_(local_subdomain_id, x_cell + 1, y_cell, r_cell + 1);
291k_local_hex[0][5] = k_(local_subdomain_id, x_cell, y_cell + 1, r_cell + 1);
292k_local_hex[1][0] = k_(local_subdomain_id, x_cell + 1, y_cell + 1, r_cell);
293k_local_hex[1][1] = k_(local_subdomain_id, x_cell, y_cell + 1, r_cell);
294k_local_hex[1][2] = k_(local_subdomain_id, x_cell + 1, y_cell, r_cell);
295k_local_hex[1][3] = k_(local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1);
296k_local_hex[1][4] = k_(local_subdomain_id, x_cell, y_cell + 1, r_cell + 1);
297k_local_hex[1][5] = k_(local_subdomain_id, x_cell + 1, y_cell, r_cell + 1);
298double qp_array[1][3];
299double qw_array[1];
300qp_array[0][0] = 0.33333333333333333;
301qp_array[0][1] = 0.33333333333333333;
302qp_array[0][2] = 0.0;
303qw_array[0] = 1.0;
304int cmb_shift = ((treat_boundary_ && diagonal_ == false && r_cell == 0) ? (
305 3
306)
307: (
308 0
309));
310int max_rad = radii_.extent(1) - 1;
311int surface_shift = ((treat_boundary_ && diagonal_ == false && max_rad == r_cell + 1) ? (
312 3
313)
314: (
315 0
316));
317double dst_array[3][2][6] = {0};
318int w = 0;
319for (w = 0; w < 2; w += 1) {
320 int q = 0;
321 for (q = 0; q < 1; q += 1) {
322 /* Coefficient evaluation on current wedge w */;
323 double tmpcse_k_eval_0 = (1.0/2.0)*qp_array[q][2];
324 double tmpcse_k_eval_1 = 1.0/2.0 - tmpcse_k_eval_0;
325 double tmpcse_k_eval_2 = tmpcse_k_eval_0 + 1.0/2.0;
326 double tmpcse_k_eval_3 = -qp_array[q][0] - qp_array[q][1] + 1;
327 double k_eval = tmpcse_k_eval_1*tmpcse_k_eval_3*k_local_hex[w][0] + tmpcse_k_eval_1*k_local_hex[w][1]*qp_array[q][0] + tmpcse_k_eval_1*k_local_hex[w][2]*qp_array[q][1] + tmpcse_k_eval_2*tmpcse_k_eval_3*k_local_hex[w][3] + tmpcse_k_eval_2*k_local_hex[w][4]*qp_array[q][0] + tmpcse_k_eval_2*k_local_hex[w][5]*qp_array[q][1];
328 /* Computation + Inversion of the Jacobian */;
329 double tmpcse_J_0 = -1.0/2.0*r_0 + (1.0/2.0)*r_1;
330 double tmpcse_J_1 = r_0 + tmpcse_J_0*(qp_array[q][2] + 1);
331 double tmpcse_J_2 = -qp_array[q][0] - qp_array[q][1] + 1;
332 double J_0_0 = tmpcse_J_1*(-wedge_surf_phy_coords[w][0][0] + wedge_surf_phy_coords[w][1][0]);
333 double J_0_1 = tmpcse_J_1*(-wedge_surf_phy_coords[w][0][0] + wedge_surf_phy_coords[w][2][0]);
334 double J_0_2 = tmpcse_J_0*(tmpcse_J_2*wedge_surf_phy_coords[w][0][0] + qp_array[q][0]*wedge_surf_phy_coords[w][1][0] + qp_array[q][1]*wedge_surf_phy_coords[w][2][0]);
335 double J_1_0 = tmpcse_J_1*(-wedge_surf_phy_coords[w][0][1] + wedge_surf_phy_coords[w][1][1]);
336 double J_1_1 = tmpcse_J_1*(-wedge_surf_phy_coords[w][0][1] + wedge_surf_phy_coords[w][2][1]);
337 double J_1_2 = tmpcse_J_0*(tmpcse_J_2*wedge_surf_phy_coords[w][0][1] + qp_array[q][0]*wedge_surf_phy_coords[w][1][1] + qp_array[q][1]*wedge_surf_phy_coords[w][2][1]);
338 double J_2_0 = tmpcse_J_1*(-wedge_surf_phy_coords[w][0][2] + wedge_surf_phy_coords[w][1][2]);
339 double J_2_1 = tmpcse_J_1*(-wedge_surf_phy_coords[w][0][2] + wedge_surf_phy_coords[w][2][2]);
340 double J_2_2 = tmpcse_J_0*(tmpcse_J_2*wedge_surf_phy_coords[w][0][2] + qp_array[q][0]*wedge_surf_phy_coords[w][1][2] + qp_array[q][1]*wedge_surf_phy_coords[w][2][2]);
341 double J_det = J_0_0*J_1_1*J_2_2 - J_0_0*J_1_2*J_2_1 - J_0_1*J_1_0*J_2_2 + J_0_1*J_1_2*J_2_0 + J_0_2*J_1_0*J_2_1 - J_0_2*J_1_1*J_2_0;
342 double tmpcse_J_invT_0 = 1.0/J_det;
343 double J_invT_cse_0_0 = tmpcse_J_invT_0*(J_1_1*J_2_2 - J_1_2*J_2_1);
344 double J_invT_cse_0_1 = tmpcse_J_invT_0*(-J_1_0*J_2_2 + J_1_2*J_2_0);
345 double J_invT_cse_0_2 = tmpcse_J_invT_0*(J_1_0*J_2_1 - J_1_1*J_2_0);
346 double J_invT_cse_1_0 = tmpcse_J_invT_0*(-J_0_1*J_2_2 + J_0_2*J_2_1);
347 double J_invT_cse_1_1 = tmpcse_J_invT_0*(J_0_0*J_2_2 - J_0_2*J_2_0);
348 double J_invT_cse_1_2 = tmpcse_J_invT_0*(-J_0_0*J_2_1 + J_0_1*J_2_0);
349 double J_invT_cse_2_0 = tmpcse_J_invT_0*(J_0_1*J_1_2 - J_0_2*J_1_1);
350 double J_invT_cse_2_1 = tmpcse_J_invT_0*(-J_0_0*J_1_2 + J_0_2*J_1_0);
351 double J_invT_cse_2_2 = tmpcse_J_invT_0*(J_0_0*J_1_1 - J_0_1*J_1_0);
352
353 double scalar_grad[6][3] = {0};
354 double tmpcse_grad_i_0 = (1.0/2.0)*qp_array[q][2];
355 double tmpcse_grad_i_1 = tmpcse_grad_i_0 - 1.0/2.0;
356 double tmpcse_grad_i_2 = (1.0/2.0)*qp_array[q][0];
357 double tmpcse_grad_i_3 = (1.0/2.0)*qp_array[q][1];
358 double tmpcse_grad_i_4 = tmpcse_grad_i_2 + tmpcse_grad_i_3 - 1.0/2.0;
359 double tmpcse_grad_i_5 = J_invT_cse_0_2*tmpcse_grad_i_2;
360 double tmpcse_grad_i_6 = -tmpcse_grad_i_1;
361 double tmpcse_grad_i_7 = J_invT_cse_1_2*tmpcse_grad_i_2;
362 double tmpcse_grad_i_8 = J_invT_cse_2_2*tmpcse_grad_i_2;
363 double tmpcse_grad_i_9 = J_invT_cse_0_2*tmpcse_grad_i_3;
364 double tmpcse_grad_i_10 = J_invT_cse_1_2*tmpcse_grad_i_3;
365 double tmpcse_grad_i_11 = J_invT_cse_2_2*tmpcse_grad_i_3;
366 double tmpcse_grad_i_12 = tmpcse_grad_i_0 + 1.0/2.0;
367 double tmpcse_grad_i_13 = -tmpcse_grad_i_12;
368 double tmpcse_grad_i_14 = -tmpcse_grad_i_4;
369 scalar_grad[0][0] = J_invT_cse_0_0*tmpcse_grad_i_1 + J_invT_cse_0_1*tmpcse_grad_i_1 + J_invT_cse_0_2*tmpcse_grad_i_4;
370 scalar_grad[0][1] = J_invT_cse_1_0*tmpcse_grad_i_1 + J_invT_cse_1_1*tmpcse_grad_i_1 + J_invT_cse_1_2*tmpcse_grad_i_4;
371 scalar_grad[0][2] = J_invT_cse_2_0*tmpcse_grad_i_1 + J_invT_cse_2_1*tmpcse_grad_i_1 + J_invT_cse_2_2*tmpcse_grad_i_4;
372 scalar_grad[1][0] = J_invT_cse_0_0*tmpcse_grad_i_6 - tmpcse_grad_i_5;
373 scalar_grad[1][1] = J_invT_cse_1_0*tmpcse_grad_i_6 - tmpcse_grad_i_7;
374 scalar_grad[1][2] = J_invT_cse_2_0*tmpcse_grad_i_6 - tmpcse_grad_i_8;
375 scalar_grad[2][0] = J_invT_cse_0_1*tmpcse_grad_i_6 - tmpcse_grad_i_9;
376 scalar_grad[2][1] = J_invT_cse_1_1*tmpcse_grad_i_6 - tmpcse_grad_i_10;
377 scalar_grad[2][2] = J_invT_cse_2_1*tmpcse_grad_i_6 - tmpcse_grad_i_11;
378 scalar_grad[3][0] = J_invT_cse_0_0*tmpcse_grad_i_13 + J_invT_cse_0_1*tmpcse_grad_i_13 + J_invT_cse_0_2*tmpcse_grad_i_14;
379 scalar_grad[3][1] = J_invT_cse_1_0*tmpcse_grad_i_13 + J_invT_cse_1_1*tmpcse_grad_i_13 + J_invT_cse_1_2*tmpcse_grad_i_14;
380 scalar_grad[3][2] = J_invT_cse_2_0*tmpcse_grad_i_13 + J_invT_cse_2_1*tmpcse_grad_i_13 + J_invT_cse_2_2*tmpcse_grad_i_14;
381 scalar_grad[4][0] = J_invT_cse_0_0*tmpcse_grad_i_12 + tmpcse_grad_i_5;
382 scalar_grad[4][1] = J_invT_cse_1_0*tmpcse_grad_i_12 + tmpcse_grad_i_7;
383 scalar_grad[4][2] = J_invT_cse_2_0*tmpcse_grad_i_12 + tmpcse_grad_i_8;
384 scalar_grad[5][0] = J_invT_cse_0_1*tmpcse_grad_i_12 + tmpcse_grad_i_9;
385 scalar_grad[5][1] = J_invT_cse_1_1*tmpcse_grad_i_12 + tmpcse_grad_i_10;
386 scalar_grad[5][2] = J_invT_cse_2_1*tmpcse_grad_i_12 + tmpcse_grad_i_11;
387 int dimj;
388
389 double grad_u[3][3] = {0};
390 double div_u = 0.0;
391 int node_idx;
392 for (dimj = 0; dimj < 3; dimj += 1) {
393 if (diagonal_ == false) {
394 for (node_idx = cmb_shift; node_idx < 6 - surface_shift; node_idx += 1) {
395
396 double E_grad_trial[3][3] = {0};
397 E_grad_trial[0][dimj] = scalar_grad[node_idx][0];
398 E_grad_trial[1][dimj] = scalar_grad[node_idx][1];
399 E_grad_trial[2][dimj] = scalar_grad[node_idx][2];
400 double tmpcse_symgrad_trial_0 = 0.5*E_grad_trial[0][1] + 0.5*E_grad_trial[1][0];
401 double tmpcse_symgrad_trial_1 = 0.5*E_grad_trial[0][2] + 0.5*E_grad_trial[2][0];
402 double tmpcse_symgrad_trial_2 = 0.5*E_grad_trial[1][2] + 0.5*E_grad_trial[2][1];
403 grad_u[0][0] = 1.0*E_grad_trial[0][0]*src_local_hex[dimj][w][node_idx] + grad_u[0][0];
404 grad_u[0][1] = tmpcse_symgrad_trial_0*src_local_hex[dimj][w][node_idx] + grad_u[0][1];
405 grad_u[0][2] = tmpcse_symgrad_trial_1*src_local_hex[dimj][w][node_idx] + grad_u[0][2];
406 grad_u[1][0] = tmpcse_symgrad_trial_0*src_local_hex[dimj][w][node_idx] + grad_u[1][0];
407 grad_u[1][1] = 1.0*E_grad_trial[1][1]*src_local_hex[dimj][w][node_idx] + grad_u[1][1];
408 grad_u[1][2] = tmpcse_symgrad_trial_2*src_local_hex[dimj][w][node_idx] + grad_u[1][2];
409 grad_u[2][0] = tmpcse_symgrad_trial_1*src_local_hex[dimj][w][node_idx] + grad_u[2][0];
410 grad_u[2][1] = tmpcse_symgrad_trial_2*src_local_hex[dimj][w][node_idx] + grad_u[2][1];
411 grad_u[2][2] = 1.0*E_grad_trial[2][2]*src_local_hex[dimj][w][node_idx] + grad_u[2][2];
412 div_u = div_u + E_grad_trial[dimj][dimj]*src_local_hex[dimj][w][node_idx];
413 };
414 };
415 };
416 int dimi;
417 for (dimi = 0; dimi < 3; dimi += 1) {
418 if (diagonal_ == false) {
419 for (node_idx = cmb_shift; node_idx < 6 - surface_shift; node_idx += 1) {
420
421 double E_grad_test[3][3] = {0};
422 E_grad_test[0][dimi] = scalar_grad[node_idx][0];
423 E_grad_test[1][dimi] = scalar_grad[node_idx][1];
424 E_grad_test[2][dimi] = scalar_grad[node_idx][2];
425 double tmpcse_symgrad_test_0 = 0.5*E_grad_test[0][1] + 0.5*E_grad_test[1][0];
426 double tmpcse_symgrad_test_1 = 0.5*E_grad_test[0][2] + 0.5*E_grad_test[2][0];
427 double tmpcse_symgrad_test_2 = 0.5*E_grad_test[1][2] + 0.5*E_grad_test[2][1];
428 double tmpcse_pairing_0 = 2*tmpcse_symgrad_test_0;
429 double tmpcse_pairing_1 = 2*tmpcse_symgrad_test_1;
430 double tmpcse_pairing_2 = 2*tmpcse_symgrad_test_2;
431 dst_array[dimi][w][node_idx] = k_eval*(-0.66666666666666663*div_u*E_grad_test[dimi][dimi] + tmpcse_pairing_0*grad_u[0][1] + tmpcse_pairing_0*grad_u[1][0] + tmpcse_pairing_1*grad_u[0][2] + tmpcse_pairing_1*grad_u[2][0] + tmpcse_pairing_2*grad_u[1][2] + tmpcse_pairing_2*grad_u[2][1] + 2.0*E_grad_test[0][0]*grad_u[0][0] + 2.0*E_grad_test[1][1]*grad_u[1][1] + 2.0*E_grad_test[2][2]*grad_u[2][2])*fabs(J_det)*qw_array[q] + dst_array[dimi][w][node_idx];
432 };
433 };
434 };
435 int dim_diagBC;
436 for (dim_diagBC = 0; dim_diagBC < 3; dim_diagBC += 1) {
437 if (diagonal_ || treat_boundary_ && (r_cell == 0 || r_cell + 1 == max_rad)) {
438 int node_idx;
439 for (node_idx = surface_shift; node_idx < 6 - cmb_shift; node_idx += 1) {
440
441 double E_grad_test[3][3] = {0};
442 E_grad_test[0][dim_diagBC] = scalar_grad[node_idx][0];
443 E_grad_test[1][dim_diagBC] = scalar_grad[node_idx][1];
444 E_grad_test[2][dim_diagBC] = scalar_grad[node_idx][2];
445
446 double grad_u_diag[3][3] = {0};
447 double tmpcse_symgrad_test_0 = 0.5*E_grad_test[0][1] + 0.5*E_grad_test[1][0];
448 double tmpcse_symgrad_test_1 = 0.5*E_grad_test[0][2] + 0.5*E_grad_test[2][0];
449 double tmpcse_symgrad_test_2 = 0.5*E_grad_test[1][2] + 0.5*E_grad_test[2][1];
450 grad_u_diag[0][0] = 1.0*E_grad_test[0][0]*src_local_hex[dim_diagBC][w][node_idx];
451 grad_u_diag[0][1] = tmpcse_symgrad_test_0*src_local_hex[dim_diagBC][w][node_idx];
452 grad_u_diag[0][2] = tmpcse_symgrad_test_1*src_local_hex[dim_diagBC][w][node_idx];
453 grad_u_diag[1][0] = tmpcse_symgrad_test_0*src_local_hex[dim_diagBC][w][node_idx];
454 grad_u_diag[1][1] = 1.0*E_grad_test[1][1]*src_local_hex[dim_diagBC][w][node_idx];
455 grad_u_diag[1][2] = tmpcse_symgrad_test_2*src_local_hex[dim_diagBC][w][node_idx];
456 grad_u_diag[2][0] = tmpcse_symgrad_test_1*src_local_hex[dim_diagBC][w][node_idx];
457 grad_u_diag[2][1] = tmpcse_symgrad_test_2*src_local_hex[dim_diagBC][w][node_idx];
458 grad_u_diag[2][2] = 1.0*E_grad_test[2][2]*src_local_hex[dim_diagBC][w][node_idx];
459 double tmpcse_pairing_0 = 4*src_local_hex[dim_diagBC][w][node_idx];
460 double tmpcse_pairing_1 = 2.0*src_local_hex[dim_diagBC][w][node_idx];
461 dst_array[dim_diagBC][w][node_idx] = k_eval*(tmpcse_pairing_0*pow(tmpcse_symgrad_test_0, 2) + tmpcse_pairing_0*pow(tmpcse_symgrad_test_1, 2) + tmpcse_pairing_0*pow(tmpcse_symgrad_test_2, 2) + tmpcse_pairing_1*pow(E_grad_test[0][0], 2) + tmpcse_pairing_1*pow(E_grad_test[1][1], 2) + tmpcse_pairing_1*pow(E_grad_test[2][2], 2) - 0.66666666666666663*pow(E_grad_test[dim_diagBC][dim_diagBC], 2)*src_local_hex[dim_diagBC][w][node_idx])*fabs(J_det)*qw_array[q] + dst_array[dim_diagBC][w][node_idx];
462 };
463 };
464 };
465 };
466};
467int dim_add;
468for (dim_add = 0; dim_add < 3; dim_add += 1) {
469 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell, y_cell, r_cell, dim_add), dst_array[dim_add][0][0]);
470 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell + 1, y_cell, r_cell, dim_add), dst_array[dim_add][0][1] + dst_array[dim_add][1][2]);
471 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell, y_cell + 1, r_cell, dim_add), dst_array[dim_add][0][2] + dst_array[dim_add][1][1]);
472 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell, y_cell, r_cell + 1, dim_add), dst_array[dim_add][0][3]);
473 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim_add), dst_array[dim_add][0][4] + dst_array[dim_add][1][5]);
474 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim_add), dst_array[dim_add][0][5] + dst_array[dim_add][1][4]);
475 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell + 1, y_cell + 1, r_cell, dim_add), dst_array[dim_add][1][0]);
476 Kokkos::atomic_add(&dst_(local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1, dim_add), dst_array[dim_add][1][3]);
477};
478 }
479 else
480 {
481 // load LMatrix for both local wedges
482 A[0] = lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, 0 );
483 A[1] = lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, 1 );
484 }
485
486 if ( diagonal_ )
487 {
488 A[0] = A[0].diagonal();
489 A[1] = A[1].diagonal();
490 }
491
492 if ( storeLMatrices_ )
493 {
494 // write local matrices to mem
495 lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, 0 ) = A[0];
496 lmatrices_( local_subdomain_id, x_cell, y_cell, r_cell, 1 ) = A[1];
497 }
498 else
499 {
501 for ( int dimj = 0; dimj < 3; dimj++ )
502 {
505 src_d, local_subdomain_id, x_cell, y_cell, r_cell, dimj, src_ );
506
507 for ( int wedge = 0; wedge < num_wedges_per_hex_cell; wedge++ )
508 {
509 for ( int i = 0; i < num_nodes_per_wedge; i++ )
510 {
511 src[wedge]( dimj * num_nodes_per_wedge + i ) = src_d[wedge]( i );
512 }
513 }
514 }
515 //extract_local_wedge_vector_coefficients( src, local_subdomain_id, x_cell, y_cell, r_cell, dimj, src_ );
516
518
519 dst[0] = A[0] * src[0];
520 dst[1] = A[1] * src[1];
521
522 //atomically_add_local_wedge_vector_coefficients( dst_, local_subdomain_id, x_cell, y_cell, r_cell, dimi, dst );
523 for ( int dimi = 0; dimi < 3; dimi++ )
524 {
526 dst_d[0] = dst[0].template slice< 6 >( dimi * num_nodes_per_wedge );
527 dst_d[1] = dst[1].template slice< 6 >( dimi * num_nodes_per_wedge );
528
530 dst_, local_subdomain_id, x_cell, y_cell, r_cell, dimi, dst_d );
531 }
532 }
533 }
534};
535
538
539} // namespace terra::fe::wedge::operators::shell::epsdivdiv_history
Send and receive buffers for all process-local subdomain boundaries.
Definition communication.hpp:56
void store_lmatrices()
allocates memory for the local matrices calls kernel with storeLMatrices_ = true to assemble and stor...
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:159
const grid::shell::DistributedDomain & get_domain() const
Getter for domain member.
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:94
dense::Mat< ScalarT, 6, 6 > get_lmatrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge, const int dimi, const int dimj) const
Retrives the local matrix stored in the operator.
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:104
void apply_impl(const SrcVectorType &src, DstVectorType &dst)
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:186
ScalarT ScalarType
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:28
terra::grid::Grid4DDataMatrices< ScalarType, 18, 18, 2 > Grid4DDataLocalMatrices
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:29
void set_operator_apply_and_communication_modes(const linalg::OperatorApplyMode operator_apply_mode, const linalg::OperatorCommunicationMode operator_communication_mode)
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:178
grid::Grid3DDataVec< ScalarT, 3 > get_grid() const
Getter for grid member.
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:100
void setApplyStoredLMatrices(bool v)
Setter/Getter for app applyStoredLMatrices_: usage of stored local matrices during apply.
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:150
EpsilonDivDivKerngenV02bSingleQuadpoint(const grid::shell::DistributedDomain &domain, const grid::Grid3DDataVec< ScalarT, 3 > &grid, const grid::Grid2DDataScalar< ScalarT > &radii, const grid::Grid4DDataScalar< grid::shell::ShellBoundaryFlag > &mask, const grid::Grid4DDataScalar< ScalarT > &k, BoundaryConditions bcs, bool diagonal, linalg::OperatorApplyMode operator_apply_mode=linalg::OperatorApplyMode::Replace, linalg::OperatorCommunicationMode operator_communication_mode=linalg::OperatorCommunicationMode::CommunicateAdditively, linalg::OperatorStoredMatrixMode operator_stored_matrix_mode=linalg::OperatorStoredMatrixMode::Off)
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:61
grid::Grid2DDataScalar< ScalarT > get_radii() const
Getter for radii member.
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:97
void set_diagonal(bool v)
S/Getter for diagonal member.
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:153
const grid::Grid4DDataScalar< ScalarType > & k_grid_data()
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:91
void set_lmatrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge, const int dimi, const int dimj, dense::Mat< ScalarT, 6, 6 > mat) const
Set the local matrix stored in the operator.
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:128
void operator()(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell) const
Definition epsilon_divdiv_kerngen_v02b_single_quadpoint.hpp:227
Parallel data structure organizing the thick spherical shell metadata for distributed (MPI parallel) ...
Definition spherical_shell.hpp:2518
const std::map< SubdomainInfo, std::tuple< LocalSubdomainIdx, SubdomainNeighborhood > > & subdomains() const
Definition spherical_shell.hpp:2650
const DomainInfo & domain_info() const
Returns a const reference.
Definition spherical_shell.hpp:2647
int subdomain_num_nodes_radially() const
Equivalent to calling subdomain_num_nodes_radially( subdomain_refinement_level() )
Definition spherical_shell.hpp:861
int subdomain_num_nodes_per_side_laterally() const
Equivalent to calling subdomain_num_nodes_per_side_laterally( subdomain_refinement_level() )
Definition spherical_shell.hpp:852
Static assertion: VectorQ1Scalar satisfies VectorLike concept.
Definition vector_q1.hpp:168
const grid::Grid4DDataVec< ScalarType, VecDim > & grid_data() const
Get const reference to grid data.
Definition vector_q1.hpp:288
Timer supporting RAII scope or manual stop.
Definition timer.hpp:342
void stop()
Stop the timer and record elapsed time.
Definition timer.hpp:364
Concept for types that behave like linear operators.
Definition operator.hpp:57
void unpack_and_reduce_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers, CommunicationReduction reduction=CommunicationReduction::SUM)
Unpacks and reduces local subdomain boundaries.
Definition communication.hpp:672
void pack_send_and_recv_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_send_buffers, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers)
Packs, sends and recvs local subdomain boundaries using two sets of buffers.
Definition communication.hpp:242
Definition epsilon_divdiv_kerngen_v01_initial.hpp:16
void atomically_add_local_wedge_vector_coefficients(const grid::Grid4DDataVec< T, VecDim > &global_coefficients, const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int d, const dense::Vec< T, 6 > local_coefficients[2])
Performs an atomic add of the two local wedge coefficient vectors of a hex cell into the global coeff...
Definition kernel_helpers.hpp:465
constexpr int num_wedges_per_hex_cell
Definition kernel_helpers.hpp:5
void extract_local_wedge_vector_coefficients(dense::Vec< T, 6 >(&local_coefficients)[2], const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int d, const grid::Grid4DDataVec< T, VecDim > &global_coefficients)
Extracts the local vector coefficients for the two wedges of a hex cell from the global coefficient v...
Definition kernel_helpers.hpp:356
constexpr int num_nodes_per_wedge
Definition kernel_helpers.hpp:7
BoundaryConditionMapping[2] BoundaryConditions
Definition shell/bit_masks.hpp:37
ShellBoundaryFlag
FlagLike that indicates boundary types for the thick spherical shell.
Definition shell/bit_masks.hpp:12
Kokkos::MDRangePolicy< Kokkos::Rank< 4 > > local_domain_md_range_policy_cells(const DistributedDomain &distributed_domain)
Definition spherical_shell.hpp:2739
BoundaryConditionFlag
FlagLike that indicates the type of boundary condition
Definition shell/bit_masks.hpp:25
Kokkos::View< dense::Mat< ScalarType, Rows, Cols > ****[NumMatrices], Layout > Grid4DDataMatrices
Definition grid_types.hpp:173
Kokkos::View< ScalarType ***[VecDim], Layout > Grid3DDataVec
Definition grid_types.hpp:42
Kokkos::View< ScalarType ****, Layout > Grid4DDataScalar
Definition grid_types.hpp:27
Kokkos::View< ScalarType **, Layout > Grid2DDataScalar
Definition grid_types.hpp:21
OperatorApplyMode
Modes for applying an operator to a vector.
Definition operator.hpp:30
@ Replace
Overwrite the destination vector.
OperatorStoredMatrixMode
Modes for applying stored matrices.
Definition operator.hpp:47
@ Off
Do not use stored matrices.
OperatorCommunicationMode
Modes for communication during operator application.
Definition operator.hpp:40
@ CommunicateAdditively
Communicate and add results.
Definition mat.hpp:10
constexpr Mat diagonal() const
Definition mat.hpp:377
Definition vec.hpp:9
SoA (Structure-of-Arrays) 4D vector grid data.
Definition grid_types.hpp:51
auto extent(int i) const
Definition grid_types.hpp:75