325 int local_subdomain_id, x_cell, y_cell, r_cell;
328 int tmp = team.league_rank();
329 const int r_block_index = tmp % blocks_per_column_;
330 tmp /= blocks_per_column_;
331 y_cell = tmp & ( hex_lat_ - 1 );
332 tmp >>= lat_refinement_level_;
333 x_cell = tmp & ( hex_lat_ - 1 );
334 tmp >>= lat_refinement_level_;
335 local_subdomain_id = tmp;
337 r_cell = r_block_index * team.team_size() + team.team_rank();
340 const bool at_cmb =
has_flag( local_subdomain_id, x_cell, y_cell, r_cell, CMB );
341 const bool at_surface =
has_flag( local_subdomain_id, x_cell, y_cell, r_cell + 1, SURFACE );
350 static constexpr int WEDGE_NODE_OFF[2][6][3] = {
351 { { 0, 0, 0 }, { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, { 1, 0, 1 }, { 0, 1, 1 } },
352 { { 1, 1, 0 }, { 0, 1, 0 }, { 1, 0, 0 }, { 1, 1, 1 }, { 0, 1, 1 }, { 1, 0, 1 } } };
355 static constexpr int WEDGE_TO_UNIQUE[2][6] = {
356 { 0, 1, 2, 3, 4, 5 },
361 constexpr double ONE_THIRD = 1.0 / 3.0;
362 constexpr double ONE_SIXTH = 1.0 / 6.0;
363 constexpr double NEG_TWO_THIRDS = -0.66666666666666663;
366 static constexpr double dN_ref[6][3] = {
367 { -0.5, -0.5, -ONE_SIXTH },
368 { 0.5, 0.0, -ONE_SIXTH },
369 { 0.0, 0.5, -ONE_SIXTH },
370 { -0.5, -0.5, ONE_SIXTH },
371 { 0.5, 0.0, ONE_SIXTH },
372 { 0.0, 0.5, ONE_SIXTH } };
381 reinterpret_cast< double*
>( team.team_shmem().get_shmem(
team_shmem_size( team.team_size() ) ) );
383 using Scratch3D = Kokkos::
384 View< double***, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;
385 using Scratch3DLevels = Kokkos::
386 View< double***, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;
387 using Scratch2DLevels =
388 Kokkos::View< double**, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;
391 Scratch3D wedge_surf_phy_coords( shmem, 2, 3, 3 );
394 const int ts = team.team_size();
395 const int nlev = ts + 1;
398 Scratch3DLevels src_sh( shmem, nlev, 4, 3 );
399 shmem += nlev * 4 * 3;
402 Scratch2DLevels k_sh( shmem, nlev, 4 );
408 Kokkos::single( Kokkos::PerTeam( team ), [&]() {
409 const double q00x = grid_( local_subdomain_id, x_cell, y_cell, 0 );
410 const double q00y = grid_( local_subdomain_id, x_cell, y_cell, 1 );
411 const double q00z = grid_( local_subdomain_id, x_cell, y_cell, 2 );
413 const double q01x = grid_( local_subdomain_id, x_cell, y_cell + 1, 0 );
414 const double q01y = grid_( local_subdomain_id, x_cell, y_cell + 1, 1 );
415 const double q01z = grid_( local_subdomain_id, x_cell, y_cell + 1, 2 );
417 const double q10x = grid_( local_subdomain_id, x_cell + 1, y_cell, 0 );
418 const double q10y = grid_( local_subdomain_id, x_cell + 1, y_cell, 1 );
419 const double q10z = grid_( local_subdomain_id, x_cell + 1, y_cell, 2 );
421 const double q11x = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 0 );
422 const double q11y = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 1 );
423 const double q11z = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 2 );
426 wedge_surf_phy_coords( 0, 0, 0 ) = q00x;
427 wedge_surf_phy_coords( 0, 0, 1 ) = q00y;
428 wedge_surf_phy_coords( 0, 0, 2 ) = q00z;
429 wedge_surf_phy_coords( 0, 1, 0 ) = q10x;
430 wedge_surf_phy_coords( 0, 1, 1 ) = q10y;
431 wedge_surf_phy_coords( 0, 1, 2 ) = q10z;
432 wedge_surf_phy_coords( 0, 2, 0 ) = q01x;
433 wedge_surf_phy_coords( 0, 2, 1 ) = q01y;
434 wedge_surf_phy_coords( 0, 2, 2 ) = q01z;
437 wedge_surf_phy_coords( 1, 0, 0 ) = q11x;
438 wedge_surf_phy_coords( 1, 0, 1 ) = q11y;
439 wedge_surf_phy_coords( 1, 0, 2 ) = q11z;
440 wedge_surf_phy_coords( 1, 1, 0 ) = q01x;
441 wedge_surf_phy_coords( 1, 1, 1 ) = q01y;
442 wedge_surf_phy_coords( 1, 1, 2 ) = q01z;
443 wedge_surf_phy_coords( 1, 2, 0 ) = q10x;
444 wedge_surf_phy_coords( 1, 2, 1 ) = q10y;
445 wedge_surf_phy_coords( 1, 2, 2 ) = q10z;
458 const int r_base = ( ( team.league_rank() % blocks_per_column_ ) * ts );
460 auto load_level = [&](
const int level ) {
461 const int r_abs = r_base + level;
464 for (
int dy = 0; dy <= 1; ++dy )
467 for (
int dx = 0; dx <= 1; ++dx )
469 const int xy = dx + 2 * dy;
471 const int xi = x_cell + dx;
472 const int yi = y_cell + dy;
474 k_sh( level, xy ) = k_( local_subdomain_id, xi, yi, r_abs );
476 src_sh( level, xy, 0 ) = src_( local_subdomain_id, xi, yi, r_abs, 0 );
477 src_sh( level, xy, 1 ) = src_( local_subdomain_id, xi, yi, r_abs, 1 );
478 src_sh( level, xy, 2 ) = src_( local_subdomain_id, xi, yi, r_abs, 2 );
484 load_level( team.team_rank() );
487 if ( team.team_rank() == ts - 1 )
497 const double r_0 = radii_( local_subdomain_id, r_cell );
498 const double r_1 = radii_( local_subdomain_id, r_cell + 1 );
501 const bool at_boundary = at_cmb || at_surface;
502 bool treat_boundary =
false;
505 const ShellBoundaryFlag sbf = at_cmb ? CMB : SURFACE;
506 treat_boundary = ( get_boundary_condition_flag( bcs_, sbf ) == DIRICHLET );
509 const int cmb_shift = ( ( at_boundary && treat_boundary && ( !diagonal_ ) && at_cmb ) ? 3 : 0 );
510 const int surface_shift = ( ( at_boundary && treat_boundary && ( !diagonal_ ) && at_surface ) ? 3 : 0 );
513 double dst8[3][8] = { 0.0 };
516 const int lvl0 = team.team_rank();
520 for (
int w = 0; w < 2; ++w )
527 for (
int node = 0; node < 6; ++node )
529 const int dx = WEDGE_NODE_OFF[w][node][0];
530 const int dy = WEDGE_NODE_OFF[w][node][1];
531 const int dr = WEDGE_NODE_OFF[w][node][2];
533 const int xy = dx + 2 * dy;
534 const int lvl = lvl0 + dr;
536 k_sum += k_sh( lvl, xy );
538 const double k_eval = ONE_SIXTH * k_sum;
545 double i00, i01, i02;
546 double i10, i11, i12;
547 double i20, i21, i22;
550 const double half_dr = 0.5 * ( r_1 - r_0 );
551 const double r_mid = 0.5 * ( r_0 + r_1 );
553 const double J_0_0 = r_mid * ( -wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 1, 0 ) );
554 const double J_0_1 = r_mid * ( -wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 2, 0 ) );
556 half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 1, 0 ) +
557 wedge_surf_phy_coords( w, 2, 0 ) ) );
559 const double J_1_0 = r_mid * ( -wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 1, 1 ) );
560 const double J_1_1 = r_mid * ( -wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 2, 1 ) );
562 half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 1, 1 ) +
563 wedge_surf_phy_coords( w, 2, 1 ) ) );
565 const double J_2_0 = r_mid * ( -wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 1, 2 ) );
566 const double J_2_1 = r_mid * ( -wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 2, 2 ) );
568 half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 1, 2 ) +
569 wedge_surf_phy_coords( w, 2, 2 ) ) );
571 const double J_det = J_0_0 * J_1_1 * J_2_2 - J_0_0 * J_1_2 * J_2_1 - J_0_1 * J_1_0 * J_2_2 +
572 J_0_1 * J_1_2 * J_2_0 + J_0_2 * J_1_0 * J_2_1 - J_0_2 * J_1_1 * J_2_0;
574 const double invJ = 1.0 / J_det;
577 i00 = invJ * ( J_1_1 * J_2_2 - J_1_2 * J_2_1 );
578 i01 = invJ * ( -J_1_0 * J_2_2 + J_1_2 * J_2_0 );
579 i02 = invJ * ( J_1_0 * J_2_1 - J_1_1 * J_2_0 );
581 i10 = invJ * ( -J_0_1 * J_2_2 + J_0_2 * J_2_1 );
582 i11 = invJ * ( J_0_0 * J_2_2 - J_0_2 * J_2_0 );
583 i12 = invJ * ( -J_0_0 * J_2_1 + J_0_1 * J_2_0 );
585 i20 = invJ * ( J_0_1 * J_1_2 - J_0_2 * J_1_1 );
586 i21 = invJ * ( -J_0_0 * J_1_2 + J_0_2 * J_1_0 );
587 i22 = invJ * ( J_0_0 * J_1_1 - J_0_1 * J_1_0 );
589 wJ = Kokkos::abs( J_det );
592 const double kwJ = k_eval * wJ;
598 double gu10 = 0.0, gu11 = 0.0;
599 double gu20 = 0.0, gu21 = 0.0, gu22 = 0.0;
606 for (
int dimj = 0; dimj < 3; ++dimj )
609 for (
int node_idx = cmb_shift; node_idx < 6 - surface_shift; ++node_idx )
611 const double gx = dN_ref[node_idx][0];
612 const double gy = dN_ref[node_idx][1];
613 const double gz = dN_ref[node_idx][2];
615 const double g0 = i00 * gx + i01 * gy + i02 * gz;
616 const double g1 = i10 * gx + i11 * gy + i12 * gz;
617 const double g2 = i20 * gx + i21 * gy + i22 * gz;
619 double E00, E11, E22, sym01, sym02, sym12, gdd;
620 column_grad_to_sym( dimj, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
622 const int dx = WEDGE_NODE_OFF[w][node_idx][0];
623 const int dy = WEDGE_NODE_OFF[w][node_idx][1];
624 const int dr = WEDGE_NODE_OFF[w][node_idx][2];
626 const int xy = dx + 2 * dy;
627 const int lvl = lvl0 + dr;
629 const double s = src_sh( lvl, xy, dimj );
644 for (
int dimi = 0; dimi < 3; ++dimi )
647 for (
int node_idx = cmb_shift; node_idx < 6 - surface_shift; ++node_idx )
649 const double gx = dN_ref[node_idx][0];
650 const double gy = dN_ref[node_idx][1];
651 const double gz = dN_ref[node_idx][2];
653 const double g0 = i00 * gx + i01 * gy + i02 * gz;
654 const double g1 = i10 * gx + i11 * gy + i12 * gz;
655 const double g2 = i20 * gx + i21 * gy + i22 * gz;
657 double E00, E11, E22, sym01, sym02, sym12, gdd;
658 column_grad_to_sym( dimi, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
660 const double pairing0 = 2.0 * sym01;
661 const double pairing1 = 2.0 * sym02;
662 const double pairing2 = 2.0 * sym12;
664 const int u = WEDGE_TO_UNIQUE[w][node_idx];
666 dst8[dimi][u] += kwJ * ( NEG_TWO_THIRDS * div_u * gdd + pairing0 * gu10 + pairing0 * gu10 +
667 pairing1 * gu20 + pairing1 * gu20 + pairing2 * gu21 + pairing2 * gu21 +
668 2.0 * E00 * gu00 + 2.0 * E11 * gu11 + 2.0 * E22 * gu22 );
674 if ( diagonal_ || ( treat_boundary && at_boundary ) )
677 for (
int dim_diagBC = 0; dim_diagBC < 3; ++dim_diagBC )
680 for (
int node_idx = surface_shift; node_idx < 6 - cmb_shift; ++node_idx )
682 const double gx = dN_ref[node_idx][0];
683 const double gy = dN_ref[node_idx][1];
684 const double gz = dN_ref[node_idx][2];
686 const double g0 = i00 * gx + i01 * gy + i02 * gz;
687 const double g1 = i10 * gx + i11 * gy + i12 * gz;
688 const double g2 = i20 * gx + i21 * gy + i22 * gz;
690 double E00, E11, E22, sym01, sym02, sym12, gdd;
691 column_grad_to_sym( dim_diagBC, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
693 const int dx = WEDGE_NODE_OFF[w][node_idx][0];
694 const int dy = WEDGE_NODE_OFF[w][node_idx][1];
695 const int dr = WEDGE_NODE_OFF[w][node_idx][2];
697 const int xy = dx + 2 * dy;
698 const int lvl = lvl0 + dr;
700 const double s = src_sh( lvl, xy, dim_diagBC );
702 const double pairing0 = 4.0 * s;
703 const double pairing1 = 2.0 * s;
705 const int u = WEDGE_TO_UNIQUE[w][node_idx];
707 dst8[dim_diagBC][u] +=
708 kwJ * ( pairing0 * ( sym01 * sym01 ) + pairing0 * ( sym02 * sym02 ) +
709 pairing0 * ( sym12 * sym12 ) + pairing1 * ( E00 * E00 ) + pairing1 * ( E11 * E11 ) +
710 pairing1 * ( E22 * E22 ) + NEG_TWO_THIRDS * ( gdd * gdd ) * s );
717 for (
int dim_add = 0; dim_add < 3; ++dim_add )
719 Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell, y_cell, r_cell, dim_add ), dst8[dim_add][0] );
720 Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell + 1, y_cell, r_cell, dim_add ), dst8[dim_add][1] );
721 Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell, y_cell + 1, r_cell, dim_add ), dst8[dim_add][2] );
722 Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell, y_cell, r_cell + 1, dim_add ), dst8[dim_add][3] );
724 &dst_( local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim_add ), dst8[dim_add][4] );
726 &dst_( local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim_add ), dst8[dim_add][5] );
728 &dst_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell, dim_add ), dst8[dim_add][6] );
730 &dst_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1, dim_add ), dst8[dim_add][7] );