terraneo/epsilon__divdiv__kerngen__v05__shmem__src__k_8hpp_source.html

#pragma once


#include "../../../quadrature/quadrature.hpp"

#include "communication/shell/communication.hpp"

#include "dense/vec.hpp"

#include "fe/wedge/integrands.hpp"

#include "fe/wedge/kernel_helpers.hpp"

#include "grid/shell/spherical_shell.hpp"

#include "impl/Kokkos_Profiling.hpp"

#include "linalg/operator.hpp"

#include "linalg/solvers/gca/local_matrix_storage.hpp"

#include "linalg/trafo/local_basis_trafo_normal_tangential.hpp"

#include "linalg/vector.hpp"

#include "linalg/vector_q1.hpp"

#include "util/timer.hpp"


namespace terra::fe::wedge::operators::shell::epsdivdiv_history {


using grid::shell::get_boundary_condition_flag;

using grid::shell::BoundaryConditionFlag::DIRICHLET;

using grid::shell::BoundaryConditionFlag::FREESLIP;

using grid::shell::BoundaryConditionFlag::NEUMANN;

using grid::shell::ShellBoundaryFlag::CMB;

using grid::shell::ShellBoundaryFlag::SURFACE;

using terra::grid::shell::BoundaryConditionFlag;

using terra::grid::shell::BoundaryConditions;

using terra::grid::shell::ShellBoundaryFlag;

using terra::linalg::trafo::trafo_mat_cartesian_to_normal_tangential;


template < typename ScalarT, int VecDim = 3 >


class EpsilonDivDivKerngenV05ShmemSrcK

{

  public:

    using SrcVectorType                 = linalg::VectorQ1Vec< ScalarT, VecDim >;

    using DstVectorType                 = linalg::VectorQ1Vec< ScalarT, VecDim >;

    using ScalarType                    = ScalarT;

    static constexpr int LocalMatrixDim = 18;

    using Grid4DDataLocalMatrices = terra::grid::Grid4DDataMatrices< ScalarType, LocalMatrixDim, LocalMatrixDim, 2 >;

    using LocalMatrixStorage      = linalg::solvers::LocalMatrixStorage< ScalarType, LocalMatrixDim >;


  private:

    LocalMatrixStorage local_matrix_storage_;


    grid::shell::DistributedDomain domain_;


    grid::Grid3DDataVec< ScalarT, 3 >                        grid_;

    grid::Grid2DDataScalar< ScalarT >                        radii_;

    grid::Grid4DDataScalar< ScalarType >                     k_;

    grid::Grid4DDataScalar< grid::shell::ShellBoundaryFlag > mask_;

    BoundaryConditions                                       bcs_;


    bool diagonal_;


    linalg::OperatorApplyMode         operator_apply_mode_;

    linalg::OperatorCommunicationMode operator_communication_mode_;

    linalg::OperatorStoredMatrixMode  operator_stored_matrix_mode_;


    communication::shell::SubdomainNeighborhoodSendRecvBuffer< ScalarT, VecDim > send_buffers_;

    communication::shell::SubdomainNeighborhoodSendRecvBuffer< ScalarT, VecDim > recv_buffers_;


    grid::Grid4DDataVec< ScalarType, VecDim > src_;

    grid::Grid4DDataVec< ScalarType, VecDim > dst_;


    // Quadrature points.

    const int num_quad_points = quadrature::quad_felippa_1x1_num_quad_points;


    dense::Vec< ScalarT, 3 > quad_points[quadrature::quad_felippa_1x1_num_quad_points];

    ScalarT                  quad_weights[quadrature::quad_felippa_1x1_num_quad_points];


    int local_subdomains_;

    int hex_lat_;

    int hex_rad_;

    int lat_refinement_level_;

    int block_size_;

    int blocks_per_column_;

    int blocks_;


    ScalarT r_max_;

    ScalarT r_min_;


  public:


    EpsilonDivDivKerngenV05ShmemSrcK(

        const grid::shell::DistributedDomain&                           domain,

        const grid::Grid3DDataVec< ScalarT, 3 >&                        grid,

        const grid::Grid2DDataScalar< ScalarT >&                        radii,

        const grid::Grid4DDataScalar< grid::shell::ShellBoundaryFlag >& mask,

        const grid::Grid4DDataScalar< ScalarT >&                        k,

        BoundaryConditions                                              bcs,

        bool                                                            diagonal,

        linalg::OperatorApplyMode         operator_apply_mode = linalg::OperatorApplyMode::Replace,

        linalg::OperatorCommunicationMode operator_communication_mode =

            linalg::OperatorCommunicationMode::CommunicateAdditively,

        linalg::OperatorStoredMatrixMode operator_stored_matrix_mode = linalg::OperatorStoredMatrixMode::Off )

    : domain_( domain )

    , grid_( grid )

    , radii_( radii )

    , mask_( mask )

    , k_( k )

    , diagonal_( diagonal )

    , operator_apply_mode_( operator_apply_mode )

    , operator_communication_mode_( operator_communication_mode )

    , operator_stored_matrix_mode_( operator_stored_matrix_mode )

    , send_buffers_( domain )

    , recv_buffers_( domain )

    {

        bcs_[0] = bcs[0];

        bcs_[1] = bcs[1];

        quadrature::quad_felippa_1x1_quad_points( quad_points );

        quadrature::quad_felippa_1x1_quad_weights( quad_weights );

        const grid::shell::DomainInfo& domain_info = domain_.domain_info();

        local_subdomains_                          = domain_.subdomains().size();

        hex_lat_                                   = domain_info.subdomain_num_nodes_per_side_laterally() - 1;

        hex_rad_                                   = domain_info.subdomain_num_nodes_radially() - 1;

        lat_refinement_level_                      = domain_info.diamond_lateral_refinement_level();

        const int threads_per_column               = hex_rad_;

        block_size_                                = std::min( 128, threads_per_column );

        blocks_per_column_                         = ( threads_per_column + block_size_ - 1 ) / block_size_;

        blocks_                                    = local_subdomains_ * hex_lat_ * hex_lat_ * blocks_per_column_;

        r_min_                                     = domain_info.radii()[0];

        r_max_                                     = domain_info.radii()[domain_info.radii().size() - 1];

        util::logroot << "[EpsilonDivDiv] (threads_per_column, block_size_, blocks_per_column_) = "

                      << threads_per_column << ", " << block_size_ << ", " << blocks_per_column_ << ")" << std::endl;

    }


    void set_operator_apply_and_communication_modes(

        const linalg::OperatorApplyMode         operator_apply_mode,

        const linalg::OperatorCommunicationMode operator_communication_mode )

    {

        operator_apply_mode_         = operator_apply_mode;

        operator_communication_mode_ = operator_communication_mode;

    }


    /// @brief S/Getter for diagonal member

    void set_diagonal( bool v ) { diagonal_ = v; }


    /// @brief Getter for coefficient

    const grid::Grid4DDataScalar< ScalarType >& k_grid_data() { return k_; }


    /// @brief Getter for domain member

    const grid::shell::DistributedDomain& get_domain() const { return domain_; }


    /// @brief Getter for radii member

    grid::Grid2DDataScalar< ScalarT > get_radii() const { return radii_; }


    /// @brief Getter for grid member

    grid::Grid3DDataVec< ScalarT, 3 > get_grid() { return grid_; }


    /// @brief Getter for mask member

    KOKKOS_INLINE_FUNCTION


    bool has_flag(

        const int                      local_subdomain_id,

        const int                      x_cell,

        const int                      y_cell,

        const int                      r_cell,

        grid::shell::ShellBoundaryFlag flag ) const

    {

        return util::has_flag( mask_( local_subdomain_id, x_cell, y_cell, r_cell ), flag );

    }


    /// @brief allocates memory for the local matrices


    void set_stored_matrix_mode(

        linalg::OperatorStoredMatrixMode     operator_stored_matrix_mode,

        int                                  level_range,

        grid::Grid4DDataScalar< ScalarType > GCAElements )

    {

        operator_stored_matrix_mode_ = operator_stored_matrix_mode;


        // allocate storage if necessary

        if ( operator_stored_matrix_mode_ != linalg::OperatorStoredMatrixMode::Off )

        {

            local_matrix_storage_ = linalg::solvers::LocalMatrixStorage< ScalarType, LocalMatrixDim >(

                domain_, operator_stored_matrix_mode_, level_range, GCAElements );

        }

    }


    linalg::OperatorStoredMatrixMode get_stored_matrix_mode() { return operator_stored_matrix_mode_; }


    /// @brief Set the local matrix stored in the operator

    KOKKOS_INLINE_FUNCTION


    void set_local_matrix(

        const int                                                    local_subdomain_id,

        const int                                                    x_cell,

        const int                                                    y_cell,

        const int                                                    r_cell,

        const int                                                    wedge,

        const dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim >& mat ) const

    {

        KOKKOS_ASSERT( operator_stored_matrix_mode_ != linalg::OperatorStoredMatrixMode::Off );

        local_matrix_storage_.set_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge, mat );

    }


    /// @brief Retrives the local matrix

    /// if there is stored local matrices, the desired local matrix is loaded and returned

    /// if not, the local matrix is assembled on-the-fly

    KOKKOS_INLINE_FUNCTION


    dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > get_local_matrix(

        const int local_subdomain_id,

        const int x_cell,

        const int y_cell,

        const int r_cell,

        const int wedge ) const

    {

        // request from storage

        if ( operator_stored_matrix_mode_ != linalg::OperatorStoredMatrixMode::Off )

        {

            if ( !local_matrix_storage_.has_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge ) )

            {

                Kokkos::abort( "No matrix found at that spatial index." );

            }

            return local_matrix_storage_.get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge );

        }

        else

        {

            return assemble_local_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge );

        }

    }


    void apply_impl( const SrcVectorType& src, DstVectorType& dst )

    {

        util::Timer timer_apply( "epsilon_divdiv_apply" );


        if ( operator_apply_mode_ == linalg::OperatorApplyMode::Replace )

        {

            assign( dst, 0 );

        }


        src_ = src.grid_data();

        dst_ = dst.grid_data();


        if ( src_.extent( 0 ) != dst_.extent( 0 ) || src_.extent( 1 ) != dst_.extent( 1 ) ||

             src_.extent( 2 ) != dst_.extent( 2 ) || src_.extent( 3 ) != dst_.extent( 3 ) )

        {

            throw std::runtime_error( "EpsilonDivDiv: src/dst mismatch" );

        }


        if ( src_.extent( 1 ) != grid_.extent( 1 ) || src_.extent( 2 ) != grid_.extent( 2 ) )

        {

            throw std::runtime_error( "EpsilonDivDiv: src/dst mismatch" );

        }


        util::Timer          timer_kernel( "epsilon_divdiv_kernel" );

        Kokkos::TeamPolicy<> policy( blocks_, block_size_ );

        Kokkos::parallel_for( "matvec", policy, *this );

        //   grid::shell::local_domain_md_range_policy_cells( domain_ ),

        //s   *this );

        Kokkos::fence();

        timer_kernel.stop();


        if ( operator_communication_mode_ == linalg::OperatorCommunicationMode::CommunicateAdditively )

        {

            util::Timer timer_comm( "epsilon_divdiv_comm" );


            communication::shell::pack_send_and_recv_local_subdomain_boundaries(

                domain_, dst_, send_buffers_, recv_buffers_ );

            communication::shell::unpack_and_reduce_local_subdomain_boundaries( domain_, dst_, recv_buffers_ );

        }

    }


    KOKKOS_INLINE_FUNCTION


    void column_grad_to_sym(

        const int    dim,

        const double g0,

        const double g1,

        const double g2,

        double&      E00,

        double&      E11,

        double&      E22,

        double&      sym01,

        double&      sym02,

        double&      sym12,

        double&      gdd ) const

    {

        E00   = 0.0;

        E11   = 0.0;

        E22   = 0.0;

        sym01 = 0.0;

        sym02 = 0.0;

        sym12 = 0.0;

        gdd   = 0.0;


        // dim selects which COLUMN is populated:

        // dim==0: E[0][0]=g0, E[1][0]=g1, E[2][0]=g2

        // dim==1: E[0][1]=g0, E[1][1]=g1, E[2][1]=g2

        // dim==2: E[0][2]=g0, E[1][2]=g1, E[2][2]=g2

        switch ( dim )

        {

        case 0:

            E00   = g0;

            gdd   = g0;       // E[0][0]

            sym01 = 0.5 * g1; // 0.5*(E[0][1]=0 + E[1][0]=g1)

            sym02 = 0.5 * g2; // 0.5*(E[0][2]=0 + E[2][0]=g2)

            sym12 = 0.0;

            break;


        case 1:

            E11   = g1;

            gdd   = g1;       // E[1][1]

            sym01 = 0.5 * g0; // 0.5*(E[0][1]=g0 + E[1][0]=0)

            sym02 = 0.0;

            sym12 = 0.5 * g2; // 0.5*(E[1][2]=0 + E[2][1]=g2)

            break;


        default: // case 2

            E22   = g2;

            gdd   = g2; // E[2][2]

            sym01 = 0.0;

            sym02 = 0.5 * g0; // 0.5*(E[0][2]=g0 + E[2][0]=0)

            sym12 = 0.5 * g1; // 0.5*(E[1][2]=g1 + E[2][1]=0)

            break;

        }

    }


    using Team = Kokkos::TeamPolicy<>::member_type;


    KOKKOS_INLINE_FUNCTION


    static size_t team_shmem_size( const int ts/*team_size*/ )

    {

        // We store wedge_surf_phy_coords[2][3][3], src, k

        return sizeof( double ) * (2 * 3 * 3 + (ts + 1) * (12 + 4));

    }


    KOKKOS_INLINE_FUNCTION


    void operator()( const Team& team ) const

    {

        int local_subdomain_id, x_cell, y_cell, r_cell;


        {

            int       tmp           = team.league_rank();

            const int r_block_index = tmp % blocks_per_column_;

            tmp /= blocks_per_column_;

            y_cell = tmp & ( hex_lat_ - 1 );

            tmp >>= lat_refinement_level_;

            x_cell = tmp & ( hex_lat_ - 1 );

            tmp >>= lat_refinement_level_;

            local_subdomain_id = tmp;


            r_cell = r_block_index * team.team_size() + team.team_rank();

        }


        const bool at_cmb     = has_flag( local_subdomain_id, x_cell, y_cell, r_cell, CMB );

        const bool at_surface = has_flag( local_subdomain_id, x_cell, y_cell, r_cell + 1, SURFACE );


        // ----- FAST PATH (DCA) -----

        //   - Load ALL source dofs (and k) for the team's r-slab into TEAM SHARED MEMORY once.

        //   - Each thread then reads the dofs for its corresponding r_cell (team_rank) from the shared arrays.

        //

        // Team covers radial levels: r_base ... r_base + team_size

        // (need team_size+1 levels because each thread needs r and r+1).

        //

        static constexpr int WEDGE_NODE_OFF[2][6][3] = {

            { { 0, 0, 0 }, { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, { 1, 0, 1 }, { 0, 1, 1 } },

            { { 1, 1, 0 }, { 0, 1, 0 }, { 1, 0, 0 }, { 1, 1, 1 }, { 0, 1, 1 }, { 1, 0, 1 } } };


        // Map wedge-local node (w, node) -> unique node id in [0..7] that matches your final scatter pattern

        static constexpr int WEDGE_TO_UNIQUE[2][6] = {

            { 0, 1, 2, 3, 4, 5 }, // wedge 0

            { 6, 2, 1, 7, 5, 4 }  // wedge 1

        };


        // ---- single quadrature point collapsed: qp0=qp1=1/3, qp2=0, qw=1 ----

        constexpr double ONE_THIRD      = 1.0 / 3.0;

        constexpr double ONE_SIXTH      = 1.0 / 6.0;

        constexpr double NEG_TWO_THIRDS = -0.66666666666666663;


        // Reference gradients at qp0=qp1=1/3, qp2=0 (constexpr)

        static constexpr double dN_ref[6][3] = {

            { -0.5, -0.5, -ONE_SIXTH },

            { 0.5, 0.0, -ONE_SIXTH },

            { 0.0, 0.5, -ONE_SIXTH },

            { -0.5, -0.5, ONE_SIXTH },

            { 0.5, 0.0, ONE_SIXTH },

            { 0.0, 0.5, ONE_SIXTH } };


        // ---------------------------------------------------------

        // TEAM SCRATCH LAYOUT

        //   wedge_surf_phy_coords: [2][3][3]

        //   src_sh:                [team_size+1][4][3]   (levels, xy, dim)

        //   k_sh:                  [team_size+1][4]      (levels, xy)

        // ---------------------------------------------------------

        double* shmem =

            reinterpret_cast< double* >( team.team_shmem().get_shmem( team_shmem_size( team.team_size() ) ) );


        using Scratch3D = Kokkos::

            View< double***, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;

        using Scratch3DLevels = Kokkos::

            View< double***, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;

        using Scratch2DLevels =

            Kokkos::View< double**, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;


        // wedge coords

        Scratch3D wedge_surf_phy_coords( shmem, 2, 3, 3 );

        shmem += 2 * 3 * 3;


        const int ts   = team.team_size();

        const int nlev = ts + 1;


        // src_sh(level, xy, dim)

        Scratch3DLevels src_sh( shmem, nlev, 4, 3 );

        shmem += nlev * 4 * 3;


        // k_sh(level, xy)

        Scratch2DLevels k_sh( shmem, nlev, 4 );

        shmem += nlev * 4;


        // ---------------------------------------------------------

        // (1) Load surface xy geometry (once per team) as before

        // ---------------------------------------------------------

        Kokkos::single( Kokkos::PerTeam( team ), [&]() {

            const double q00x = grid_( local_subdomain_id, x_cell, y_cell, 0 );

            const double q00y = grid_( local_subdomain_id, x_cell, y_cell, 1 );

            const double q00z = grid_( local_subdomain_id, x_cell, y_cell, 2 );


            const double q01x = grid_( local_subdomain_id, x_cell, y_cell + 1, 0 );

            const double q01y = grid_( local_subdomain_id, x_cell, y_cell + 1, 1 );

            const double q01z = grid_( local_subdomain_id, x_cell, y_cell + 1, 2 );


            const double q10x = grid_( local_subdomain_id, x_cell + 1, y_cell, 0 );

            const double q10y = grid_( local_subdomain_id, x_cell + 1, y_cell, 1 );

            const double q10z = grid_( local_subdomain_id, x_cell + 1, y_cell, 2 );


            const double q11x = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 0 );

            const double q11y = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 1 );

            const double q11z = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 2 );


            // wedge 0: (q00,q10,q01)

            wedge_surf_phy_coords( 0, 0, 0 ) = q00x;

            wedge_surf_phy_coords( 0, 0, 1 ) = q00y;

            wedge_surf_phy_coords( 0, 0, 2 ) = q00z;

            wedge_surf_phy_coords( 0, 1, 0 ) = q10x;

            wedge_surf_phy_coords( 0, 1, 1 ) = q10y;

            wedge_surf_phy_coords( 0, 1, 2 ) = q10z;

            wedge_surf_phy_coords( 0, 2, 0 ) = q01x;

            wedge_surf_phy_coords( 0, 2, 1 ) = q01y;

            wedge_surf_phy_coords( 0, 2, 2 ) = q01z;


            // wedge 1: (q11,q01,q10)

            wedge_surf_phy_coords( 1, 0, 0 ) = q11x;

            wedge_surf_phy_coords( 1, 0, 1 ) = q11y;

            wedge_surf_phy_coords( 1, 0, 2 ) = q11z;

            wedge_surf_phy_coords( 1, 1, 0 ) = q01x;

            wedge_surf_phy_coords( 1, 1, 1 ) = q01y;

            wedge_surf_phy_coords( 1, 1, 2 ) = q01z;

            wedge_surf_phy_coords( 1, 2, 0 ) = q10x;

            wedge_surf_phy_coords( 1, 2, 1 ) = q10y;

            wedge_surf_phy_coords( 1, 2, 2 ) = q10z;

        } );


        // ---------------------------------------------------------

        // (2) Load ALL k/src dofs for the team's radial slab into shared memory

        //

        // Layout:

        //   level = 0..team_size  (absolute r = r_base + level)

        //   xy    = 0..3 mapped by (dx + 2*dy)

        //

        // Each thread loads its own level = team_rank

        // and the last thread additionally loads level = team_size.

        // ---------------------------------------------------------

        const int r_base = ( ( team.league_rank() % blocks_per_column_ ) * ts ); // same as r_block_index * ts


        auto load_level = [&]( const int level ) {

            const int r_abs = r_base + level;


#pragma unroll

            for ( int dy = 0; dy <= 1; ++dy )

            {

#pragma unroll

                for ( int dx = 0; dx <= 1; ++dx )

                {

                    const int xy = dx + 2 * dy;


                    const int xi = x_cell + dx;

                    const int yi = y_cell + dy;


                    k_sh( level, xy ) = k_( local_subdomain_id, xi, yi, r_abs );


                    src_sh( level, xy, 0 ) = src_( local_subdomain_id, xi, yi, r_abs, 0 );

                    src_sh( level, xy, 1 ) = src_( local_subdomain_id, xi, yi, r_abs, 1 );

                    src_sh( level, xy, 2 ) = src_( local_subdomain_id, xi, yi, r_abs, 2 );

                }

            }

        };


        // each thread loads its own level

        load_level( team.team_rank() );


        // one extra level (team_size) needed for r+1 of last thread

        if ( team.team_rank() == ts - 1 )

        {

            load_level( ts );

        }


        team.team_barrier();


        // ---------------------------------------------------------

        // Thread-private radii (depends on r_cell)

        // ---------------------------------------------------------

        const double r_0 = radii_( local_subdomain_id, r_cell );

        const double r_1 = radii_( local_subdomain_id, r_cell + 1 );


        // Boundary treatment flags (guard the BC query)

        const bool at_boundary    = at_cmb || at_surface;

        bool       treat_boundary = false;

        if ( at_boundary )

        {

            const ShellBoundaryFlag sbf = at_cmb ? CMB : SURFACE;

            treat_boundary              = ( get_boundary_condition_flag( bcs_, sbf ) == DIRICHLET );

        }


        const int cmb_shift     = ( ( at_boundary && treat_boundary && ( !diagonal_ ) && at_cmb ) ? 3 : 0 );

        const int surface_shift = ( ( at_boundary && treat_boundary && ( !diagonal_ ) && at_surface ) ? 3 : 0 );


        // (1a) unique-node accumulation: 8 nodes per dim

        double dst8[3][8] = { 0.0 };


        // Local level index for this thread

        const int lvl0 = team.team_rank(); // corresponds to r_cell

        // lvl1 = lvl0 + 1 corresponds to r_cell+1, always valid because we loaded nlev = ts+1


#pragma unroll

        for ( int w = 0; w < 2; ++w )

        {

            // -------------------------

            // (A) k_eval collapsed from shared memory

            // -------------------------

            double k_sum = 0.0;

#pragma unroll

            for ( int node = 0; node < 6; ++node )

            {

                const int dx = WEDGE_NODE_OFF[w][node][0];

                const int dy = WEDGE_NODE_OFF[w][node][1];

                const int dr = WEDGE_NODE_OFF[w][node][2];


                const int xy  = dx + 2 * dy;

                const int lvl = lvl0 + dr;


                k_sum += k_sh( lvl, xy );

            }

            const double k_eval = ONE_SIXTH * k_sum;


            // -------------------------

            // (B) Jacobian + inv(J)^T

            // -------------------------

            double wJ = 0.0;


            double i00, i01, i02;

            double i10, i11, i12;

            double i20, i21, i22;


            {

                const double half_dr = 0.5 * ( r_1 - r_0 );

                const double r_mid   = 0.5 * ( r_0 + r_1 );


                const double J_0_0 = r_mid * ( -wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 1, 0 ) );

                const double J_0_1 = r_mid * ( -wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 2, 0 ) );

                const double J_0_2 =

                    half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 1, 0 ) +

                                              wedge_surf_phy_coords( w, 2, 0 ) ) );


                const double J_1_0 = r_mid * ( -wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 1, 1 ) );

                const double J_1_1 = r_mid * ( -wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 2, 1 ) );

                const double J_1_2 =

                    half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 1, 1 ) +

                                              wedge_surf_phy_coords( w, 2, 1 ) ) );


                const double J_2_0 = r_mid * ( -wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 1, 2 ) );

                const double J_2_1 = r_mid * ( -wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 2, 2 ) );

                const double J_2_2 =

                    half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 1, 2 ) +

                                              wedge_surf_phy_coords( w, 2, 2 ) ) );


                const double J_det = J_0_0 * J_1_1 * J_2_2 - J_0_0 * J_1_2 * J_2_1 - J_0_1 * J_1_0 * J_2_2 +

                                     J_0_1 * J_1_2 * J_2_0 + J_0_2 * J_1_0 * J_2_1 - J_0_2 * J_1_1 * J_2_0;


                const double invJ = 1.0 / J_det;


                // inv(J)^T

                i00 = invJ * ( J_1_1 * J_2_2 - J_1_2 * J_2_1 );

                i01 = invJ * ( -J_1_0 * J_2_2 + J_1_2 * J_2_0 );

                i02 = invJ * ( J_1_0 * J_2_1 - J_1_1 * J_2_0 );


                i10 = invJ * ( -J_0_1 * J_2_2 + J_0_2 * J_2_1 );

                i11 = invJ * ( J_0_0 * J_2_2 - J_0_2 * J_2_0 );

                i12 = invJ * ( -J_0_0 * J_2_1 + J_0_1 * J_2_0 );


                i20 = invJ * ( J_0_1 * J_1_2 - J_0_2 * J_1_1 );

                i21 = invJ * ( -J_0_0 * J_1_2 + J_0_2 * J_1_0 );

                i22 = invJ * ( J_0_0 * J_1_1 - J_0_1 * J_1_0 );


                wJ = Kokkos::abs( J_det ); // qw=1

            }


            const double kwJ = k_eval * wJ;


            // -------------------------

            // (C) grad_u + div_u as scalars (src from shared memory)

            // -------------------------

            double gu00 = 0.0;

            double gu10 = 0.0, gu11 = 0.0;

            double gu20 = 0.0, gu21 = 0.0, gu22 = 0.0;

            double div_u = 0.0;


            if ( !diagonal_ )

            {

                // Assemble gu** and div_u

#pragma unroll

                for ( int dimj = 0; dimj < 3; ++dimj )

                {

#pragma unroll

                    for ( int node_idx = cmb_shift; node_idx < 6 - surface_shift; ++node_idx )

                    {

                        const double gx = dN_ref[node_idx][0];

                        const double gy = dN_ref[node_idx][1];

                        const double gz = dN_ref[node_idx][2];


                        const double g0 = i00 * gx + i01 * gy + i02 * gz;

                        const double g1 = i10 * gx + i11 * gy + i12 * gz;

                        const double g2 = i20 * gx + i21 * gy + i22 * gz;


                        double E00, E11, E22, sym01, sym02, sym12, gdd;

                        column_grad_to_sym( dimj, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );


                        const int dx = WEDGE_NODE_OFF[w][node_idx][0];

                        const int dy = WEDGE_NODE_OFF[w][node_idx][1];

                        const int dr = WEDGE_NODE_OFF[w][node_idx][2];


                        const int xy  = dx + 2 * dy;

                        const int lvl = lvl0 + dr;


                        const double s = src_sh( lvl, xy, dimj );


                        gu00 += E00 * s;

                        gu10 += sym01 * s;

                        gu11 += E11 * s;

                        gu20 += sym02 * s;

                        gu21 += sym12 * s;

                        gu22 += E22 * s;


                        div_u += gdd * s;

                    }

                }


                // Pairing -> accumulate into unique-node array dst8

#pragma unroll

                for ( int dimi = 0; dimi < 3; ++dimi )

                {

#pragma unroll

                    for ( int node_idx = cmb_shift; node_idx < 6 - surface_shift; ++node_idx )

                    {

                        const double gx = dN_ref[node_idx][0];

                        const double gy = dN_ref[node_idx][1];

                        const double gz = dN_ref[node_idx][2];


                        const double g0 = i00 * gx + i01 * gy + i02 * gz;

                        const double g1 = i10 * gx + i11 * gy + i12 * gz;

                        const double g2 = i20 * gx + i21 * gy + i22 * gz;


                        double E00, E11, E22, sym01, sym02, sym12, gdd;

                        column_grad_to_sym( dimi, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );


                        const double pairing0 = 2.0 * sym01;

                        const double pairing1 = 2.0 * sym02;

                        const double pairing2 = 2.0 * sym12;


                        const int u = WEDGE_TO_UNIQUE[w][node_idx];


                        dst8[dimi][u] += kwJ * ( NEG_TWO_THIRDS * div_u * gdd + pairing0 * gu10 + pairing0 * gu10 +

                                                 pairing1 * gu20 + pairing1 * gu20 + pairing2 * gu21 + pairing2 * gu21 +

                                                 2.0 * E00 * gu00 + 2.0 * E11 * gu11 + 2.0 * E22 * gu22 );

                    }

                }

            }


            // Diagonal / BC loop -> also accumulate into dst8

            if ( diagonal_ || ( treat_boundary && at_boundary ) )

            {

#pragma unroll

                for ( int dim_diagBC = 0; dim_diagBC < 3; ++dim_diagBC )

                {

#pragma unroll

                    for ( int node_idx = surface_shift; node_idx < 6 - cmb_shift; ++node_idx )

                    {

                        const double gx = dN_ref[node_idx][0];

                        const double gy = dN_ref[node_idx][1];

                        const double gz = dN_ref[node_idx][2];


                        const double g0 = i00 * gx + i01 * gy + i02 * gz;

                        const double g1 = i10 * gx + i11 * gy + i12 * gz;

                        const double g2 = i20 * gx + i21 * gy + i22 * gz;


                        double E00, E11, E22, sym01, sym02, sym12, gdd;

                        column_grad_to_sym( dim_diagBC, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );


                        const int dx = WEDGE_NODE_OFF[w][node_idx][0];

                        const int dy = WEDGE_NODE_OFF[w][node_idx][1];

                        const int dr = WEDGE_NODE_OFF[w][node_idx][2];


                        const int xy  = dx + 2 * dy;

                        const int lvl = lvl0 + dr;


                        const double s = src_sh( lvl, xy, dim_diagBC );


                        const double pairing0 = 4.0 * s;

                        const double pairing1 = 2.0 * s;


                        const int u = WEDGE_TO_UNIQUE[w][node_idx];


                        dst8[dim_diagBC][u] +=

                            kwJ * ( pairing0 * ( sym01 * sym01 ) + pairing0 * ( sym02 * sym02 ) +

                                    pairing0 * ( sym12 * sym12 ) + pairing1 * ( E00 * E00 ) + pairing1 * ( E11 * E11 ) +

                                    pairing1 * ( E22 * E22 ) + NEG_TWO_THIRDS * ( gdd * gdd ) * s );

                    }

                }

            }

        } // w


        // Final scatter: 8 unique nodes per dim (same result as original merged scatter)

        for ( int dim_add = 0; dim_add < 3; ++dim_add )

        {

            Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell, y_cell, r_cell, dim_add ), dst8[dim_add][0] );

            Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell + 1, y_cell, r_cell, dim_add ), dst8[dim_add][1] );

            Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell, y_cell + 1, r_cell, dim_add ), dst8[dim_add][2] );

            Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell, y_cell, r_cell + 1, dim_add ), dst8[dim_add][3] );

            Kokkos::atomic_add(

                &dst_( local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim_add ), dst8[dim_add][4] );

            Kokkos::atomic_add(

                &dst_( local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim_add ), dst8[dim_add][5] );

            Kokkos::atomic_add(

                &dst_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell, dim_add ), dst8[dim_add][6] );

            Kokkos::atomic_add(

                &dst_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1, dim_add ), dst8[dim_add][7] );

        }

    }


    /// @brief: For both trial and test space this function sets up a vector:

    /// each vector element holds the symmetric gradient (a 3x3 matrix) of the shape function of the corresponding dof

    /// (if dimi == dimj, these are the same and we are on the diagonal of the vectorial diffusion operator)

    /// Additionally, we compute the scalar factor for the numerical integral comp: determinant of the jacobian,

    /// evaluation of the coefficient k on the element and the quadrature weight of the current quad-point.


    /// The idea of this function is that the two vectors can be:

    /// - accumulated to the result of the local matvec with 2 * num_nodes_per_wedge complexity

    ///   by scaling the dot product of the trial vec and local src dofs with each element of the test vec

    ///   (and adding to the dst dofs, this is the fused local matvec).

    /// - propagated to the local matrix by an outer product of the two vectors

    ///   (without applying it to dofs). This is e.g. required to assemble the finest grid local

    ///   matrix on-the-fly during GCA/Galerkin coarsening.


    ///


    KOKKOS_INLINE_FUNCTION void assemble_trial_test_vecs(

        const int                               wedge,

        const dense::Vec< ScalarType, VecDim >& quad_point,

        const ScalarType                        quad_weight,

        const ScalarT                           r_1,

        const ScalarT                           r_2,

        dense::Vec< ScalarT, 3 > ( *wedge_phy_surf )[3],

        const dense::Vec< ScalarT, 6 >*           k_local_hex,

        const int                                 dimi,

        const int                                 dimj,

        dense::Mat< ScalarType, VecDim, VecDim >* sym_grad_i,

        dense::Mat< ScalarType, VecDim, VecDim >* sym_grad_j,

        ScalarType&                               jdet_keval_quadweight ) const

    {

        dense::Mat< ScalarType, VecDim, VecDim >       J       = jac( wedge_phy_surf[wedge], r_1, r_2, quad_point );

        const auto                                     det     = J.det();

        const auto                                     abs_det = Kokkos::abs( det );

        const dense::Mat< ScalarType, VecDim, VecDim > J_inv_transposed = J.inv_transposed( det );


        // dot of coeff dofs and element-local shape functions to evaluate the coefficent on the current element

        ScalarType k_eval = 0.0;

        for ( int k = 0; k < num_nodes_per_wedge; k++ )

        {

            k_eval += shape( k, quad_point ) * k_local_hex[wedge]( k );

        }


        for ( int k = 0; k < num_nodes_per_wedge; k++ )

        {

            sym_grad_i[k] = symmetric_grad( J_inv_transposed, quad_point, k, dimi );

            sym_grad_j[k] = symmetric_grad( J_inv_transposed, quad_point, k, dimj );

        }

        jdet_keval_quadweight = quad_weight * k_eval * abs_det;

    }


    /// @brief assemble the local matrix and return it for a given element, wedge, and vectorial component

    /// (determined by dimi, dimj)

    KOKKOS_INLINE_FUNCTION


    dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > assemble_local_matrix(

        const int local_subdomain_id,

        const int x_cell,

        const int y_cell,

        const int r_cell,

        const int wedge ) const

    {

        // Gather surface points for each wedge.

        // TODO gather this for only 1 wedge

        dense::Vec< ScalarT, 3 > wedge_phy_surf[num_wedges_per_hex_cell][num_nodes_per_wedge_surface] = {};

        wedge_surface_physical_coords( wedge_phy_surf, grid_, local_subdomain_id, x_cell, y_cell );


        // Gather wedge radii.

        const ScalarT r_1 = radii_( local_subdomain_id, r_cell );

        const ScalarT r_2 = radii_( local_subdomain_id, r_cell + 1 );


        dense::Vec< ScalarT, 6 > k_local_hex[num_wedges_per_hex_cell];

        extract_local_wedge_scalar_coefficients( k_local_hex, local_subdomain_id, x_cell, y_cell, r_cell, k_ );


        // Compute the local element matrix.

        dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > A = {};

        for ( int dimi = 0; dimi < 3; ++dimi )

        {

            for ( int dimj = 0; dimj < 3; ++dimj )

            {

                // spatial dimensions: quadrature points and wedge

                for ( int q = 0; q < num_quad_points; q++ )

                {

                    dense::Mat< ScalarType, VecDim, VecDim > sym_grad_i[num_nodes_per_wedge];

                    dense::Mat< ScalarType, VecDim, VecDim > sym_grad_j[num_nodes_per_wedge];

                    ScalarType                               jdet_keval_quadweight = 0;

                    assemble_trial_test_vecs(

                        wedge,

                        quad_points[q],

                        quad_weights[q],

                        r_1,

                        r_2,

                        wedge_phy_surf,

                        k_local_hex,

                        dimi,

                        dimj,

                        sym_grad_i,

                        sym_grad_j,

                        jdet_keval_quadweight );


                    // propagate on local matrix by outer product of test and trial vecs

                    for ( int i = 0; i < num_nodes_per_wedge; i++ )

                    {

                        for ( int j = 0; j < num_nodes_per_wedge; j++ )

                        {

                            A( i + dimi * num_nodes_per_wedge, j + dimj * num_nodes_per_wedge ) +=

                                jdet_keval_quadweight *

                                ( 2 * sym_grad_j[j].double_contract( sym_grad_i[i] ) -

                                  2.0 / 3.0 * sym_grad_j[j]( dimj, dimj ) * sym_grad_i[i]( dimi, dimi ) );

                            // for the div, we just extract the component from the gradient vector

                        }

                    }

                }

            }

        }


        return A;

    }


};


static_assert( linalg::GCACapable< EpsilonDivDivKerngenV05ShmemSrcK< float > > );

static_assert( linalg::GCACapable< EpsilonDivDivKerngenV05ShmemSrcK< double > > );


} // namespace terra::fe::wedge::operators::shell::epsdivdiv_history

terra::communication::shell::SubdomainNeighborhoodSendRecvBuffer
Send and receive buffers for all process-local subdomain boundaries.
Definition communication.hpp:56

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:32

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::set_stored_matrix_mode
void set_stored_matrix_mode(linalg::OperatorStoredMatrixMode operator_stored_matrix_mode, int level_range, grid::Grid4DDataScalar< ScalarType > GCAElements)
allocates memory for the local matrices
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:161

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::assemble_local_matrix
dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > assemble_local_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
assemble the local matrix and return it for a given element, wedge, and vectorial component (determin...
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:786

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::set_local_matrix
void set_local_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge, const dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > &mat) const
Set the local matrix stored in the operator.
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:180

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::k_grid_data
const grid::Grid4DDataScalar< ScalarType > & k_grid_data()
Getter for coefficient.
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:137

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::get_stored_matrix_mode
linalg::OperatorStoredMatrixMode get_stored_matrix_mode()
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:176

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::assemble_trial_test_vecs
void assemble_trial_test_vecs(const int wedge, const dense::Vec< ScalarType, VecDim > &quad_point, const ScalarType quad_weight, const ScalarT r_1, const ScalarT r_2, dense::Vec< ScalarT, 3 >(*wedge_phy_surf)[3], const dense::Vec< ScalarT, 6 > *k_local_hex, const int dimi, const int dimj, dense::Mat< ScalarType, VecDim, VecDim > *sym_grad_i, dense::Mat< ScalarType, VecDim, VecDim > *sym_grad_j, ScalarType &jdet_keval_quadweight) const
: For both trial and test space this function sets up a vector: each vector element holds the symmetr...
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:749

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::get_grid
grid::Grid3DDataVec< ScalarT, 3 > get_grid()
Getter for grid member.
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:146

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::Team
Kokkos::TeamPolicy<>::member_type Team
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:313

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::get_radii
grid::Grid2DDataScalar< ScalarT > get_radii() const
Getter for radii member.
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:143

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::get_domain
const grid::shell::DistributedDomain & get_domain() const
Getter for domain member.
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:140

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::Grid4DDataLocalMatrices
terra::grid::Grid4DDataMatrices< ScalarType, LocalMatrixDim, LocalMatrixDim, 2 > Grid4DDataLocalMatrices
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:38

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::set_operator_apply_and_communication_modes
void set_operator_apply_and_communication_modes(const linalg::OperatorApplyMode operator_apply_mode, const linalg::OperatorCommunicationMode operator_communication_mode)
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:125

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::set_diagonal
void set_diagonal(bool v)
S/Getter for diagonal member.
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:134

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::ScalarType
ScalarT ScalarType
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:36

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::get_local_matrix
dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > get_local_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
Retrives the local matrix if there is stored local matrices, the desired local matrix is loaded and r...
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:196

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::EpsilonDivDivKerngenV05ShmemSrcK
EpsilonDivDivKerngenV05ShmemSrcK(const grid::shell::DistributedDomain &domain, const grid::Grid3DDataVec< ScalarT, 3 > &grid, const grid::Grid2DDataScalar< ScalarT > &radii, const grid::Grid4DDataScalar< grid::shell::ShellBoundaryFlag > &mask, const grid::Grid4DDataScalar< ScalarT > &k, BoundaryConditions bcs, bool diagonal, linalg::OperatorApplyMode operator_apply_mode=linalg::OperatorApplyMode::Replace, linalg::OperatorCommunicationMode operator_communication_mode=linalg::OperatorCommunicationMode::CommunicateAdditively, linalg::OperatorStoredMatrixMode operator_stored_matrix_mode=linalg::OperatorStoredMatrixMode::Off)
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:82

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::has_flag
bool has_flag(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, grid::shell::ShellBoundaryFlag flag) const
Getter for mask member.
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:150

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::apply_impl
void apply_impl(const SrcVectorType &src, DstVectorType &dst)
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:218

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::column_grad_to_sym
void column_grad_to_sym(const int dim, const double g0, const double g1, const double g2, double &E00, double &E11, double &E22, double &sym01, double &sym02, double &sym12, double &gdd) const
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:260

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::operator()
void operator()(const Team &team) const
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:323

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::team_shmem_size
static size_t team_shmem_size(const int ts)
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:316

terra::fe::wedge::operators::shell::epsdivdiv_history::EpsilonDivDivKerngenV05ShmemSrcK::LocalMatrixDim
static constexpr int LocalMatrixDim
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:37

terra::grid::shell::DistributedDomain
Parallel data structure organizing the thick spherical shell metadata for distributed (MPI parallel) ...
Definition spherical_shell.hpp:2518

terra::grid::shell::DistributedDomain::subdomains
const std::map< SubdomainInfo, std::tuple< LocalSubdomainIdx, SubdomainNeighborhood > > & subdomains() const
Definition spherical_shell.hpp:2650

terra::grid::shell::DistributedDomain::domain_info
const DomainInfo & domain_info() const
Returns a const reference.
Definition spherical_shell.hpp:2647

terra::grid::shell::DomainInfo
Information about the thick spherical shell mesh.
Definition spherical_shell.hpp:780

terra::grid::shell::DomainInfo::radii
const std::vector< double > & radii() const
Definition spherical_shell.hpp:845

terra::grid::shell::DomainInfo::diamond_lateral_refinement_level
int diamond_lateral_refinement_level() const
Definition spherical_shell.hpp:843

terra::grid::shell::DomainInfo::subdomain_num_nodes_radially
int subdomain_num_nodes_radially() const
Equivalent to calling subdomain_num_nodes_radially( subdomain_refinement_level() )
Definition spherical_shell.hpp:861

terra::grid::shell::DomainInfo::subdomain_num_nodes_per_side_laterally
int subdomain_num_nodes_per_side_laterally() const
Equivalent to calling subdomain_num_nodes_per_side_laterally( subdomain_refinement_level() )
Definition spherical_shell.hpp:852

terra::linalg::VectorQ1Vec
Static assertion: VectorQ1Scalar satisfies VectorLike concept.
Definition vector_q1.hpp:168

terra::linalg::VectorQ1Vec::grid_data
const grid::Grid4DDataVec< ScalarType, VecDim > & grid_data() const
Get const reference to grid data.
Definition vector_q1.hpp:288

terra::linalg::solvers::LocalMatrixStorage< ScalarType, LocalMatrixDim >

terra::linalg::solvers::LocalMatrixStorage::has_matrix
bool has_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
Checks for presence of a local matrix for a certain element.
Definition local_matrix_storage.hpp:223

terra::linalg::solvers::LocalMatrixStorage::get_matrix
dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > get_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
Retrives the local matrix if there is stored local matrices, the desired local matrix is loaded and r...
Definition local_matrix_storage.hpp:175

terra::linalg::solvers::LocalMatrixStorage::set_matrix
void set_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge, dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > mat) const
Set the local matrix stored in the operator.
Definition local_matrix_storage.hpp:118

terra::util::Timer
Timer supporting RAII scope or manual stop.
Definition timer.hpp:342

terra::util::Timer::stop
void stop()
Stop the timer and record elapsed time.
Definition timer.hpp:364

communication.hpp

terra::linalg::GCACapable
Concept for types that can be used as Galerkin coarse-grid operators in a multigrid hierarchy....
Definition operator.hpp:81

integrands.hpp

kernel_helpers.hpp

local_basis_trafo_normal_tangential.hpp

local_matrix_storage.hpp

terra::communication::shell::unpack_and_reduce_local_subdomain_boundaries
void unpack_and_reduce_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers, CommunicationReduction reduction=CommunicationReduction::SUM)
Unpacks and reduces local subdomain boundaries.
Definition communication.hpp:672

terra::communication::shell::pack_send_and_recv_local_subdomain_boundaries
void pack_send_and_recv_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_send_buffers, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers)
Packs, sends and recvs local subdomain boundaries using two sets of buffers.
Definition communication.hpp:242

terra::fe::wedge::operators::shell::epsdivdiv_history
Definition epsilon_divdiv_kerngen_v01_initial.hpp:16

terra::fe::wedge::quadrature::quad_felippa_1x1_quad_points
constexpr void quad_felippa_1x1_quad_points(dense::Vec< T, 3 >(&quad_points)[quad_felippa_1x1_num_quad_points])
Definition wedge/quadrature/quadrature.hpp:36

terra::fe::wedge::quadrature::quad_felippa_1x1_quad_weights
constexpr void quad_felippa_1x1_quad_weights(T(&quad_weights)[quad_felippa_1x1_num_quad_points])
Definition wedge/quadrature/quadrature.hpp:43

terra::fe::wedge::quadrature::quad_felippa_1x1_num_quad_points
constexpr int quad_felippa_1x1_num_quad_points
Definition wedge/quadrature/quadrature.hpp:32

terra::fe::wedge::num_nodes_per_wedge_surface
constexpr int num_nodes_per_wedge_surface
Definition kernel_helpers.hpp:6

terra::fe::wedge::symmetric_grad
constexpr dense::Mat< T, 3, 3 > symmetric_grad(const dense::Mat< T, 3, 3 > &J_inv_transposed, const dense::Vec< T, 3 > &quad_point, const int dof, const int dim)
Returns the symmetric gradient of the shape function of a dof at a quadrature point.
Definition integrands.hpp:685

terra::fe::wedge::wedge_surface_physical_coords
void wedge_surface_physical_coords(dense::Vec< T, 3 >(&wedge_surf_phy_coords)[num_wedges_per_hex_cell][num_nodes_per_wedge_surface], const grid::Grid3DDataVec< T, 3 > &lateral_grid, const int local_subdomain_id, const int x_cell, const int y_cell)
Extracts the (unit sphere) surface vertex coords of the two wedges of a hex cell.
Definition kernel_helpers.hpp:26

terra::fe::wedge::num_wedges_per_hex_cell
constexpr int num_wedges_per_hex_cell
Definition kernel_helpers.hpp:5

terra::fe::wedge::extract_local_wedge_scalar_coefficients
void extract_local_wedge_scalar_coefficients(dense::Vec< T, 6 >(&local_coefficients)[2], const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const grid::Grid4DDataScalar< T > &global_coefficients)
Extracts the local vector coefficients for the two wedges of a hex cell from the global coefficient v...
Definition kernel_helpers.hpp:306

terra::fe::wedge::num_nodes_per_wedge
constexpr int num_nodes_per_wedge
Definition kernel_helpers.hpp:7

terra::fe::wedge::shape
constexpr T shape(const int node_idx, const T xi, const T eta, const T zeta)
(Tensor-product) Shape function.
Definition integrands.hpp:146

terra::fe::wedge::jac
constexpr dense::Mat< T, 3, 3 > jac(const dense::Vec< T, 3 > &p1_phy, const dense::Vec< T, 3 > &p2_phy, const dense::Vec< T, 3 > &p3_phy, const T r_1, const T r_2, const T xi, const T eta, const T zeta)
Definition integrands.hpp:657

terra::grid::shell::BoundaryConditions
BoundaryConditionMapping[2] BoundaryConditions
Definition shell/bit_masks.hpp:37

terra::grid::shell::ShellBoundaryFlag
ShellBoundaryFlag
FlagLike that indicates boundary types for the thick spherical shell.
Definition shell/bit_masks.hpp:12

terra::grid::shell::ShellBoundaryFlag::SURFACE
@ SURFACE

terra::grid::shell::ShellBoundaryFlag::CMB
@ CMB

terra::grid::shell::get_boundary_condition_flag
BoundaryConditionFlag get_boundary_condition_flag(const BoundaryConditions bcs, ShellBoundaryFlag sbf)
Retrieve the boundary condition flag that is associated with a location in the shell e....
Definition shell/bit_masks.hpp:42

terra::grid::shell::BoundaryConditionFlag
BoundaryConditionFlag
FlagLike that indicates the type of boundary condition
Definition shell/bit_masks.hpp:25

terra::grid::shell::BoundaryConditionFlag::FREESLIP
@ FREESLIP

terra::grid::shell::BoundaryConditionFlag::NEUMANN
@ NEUMANN

terra::grid::shell::BoundaryConditionFlag::DIRICHLET
@ DIRICHLET

terra::grid::Grid4DDataMatrices
Kokkos::View< dense::Mat< ScalarType, Rows, Cols > ****[NumMatrices], Layout > Grid4DDataMatrices
Definition grid_types.hpp:173

terra::grid::Grid3DDataVec
Kokkos::View< ScalarType ***[VecDim], Layout > Grid3DDataVec
Definition grid_types.hpp:42

terra::grid::Grid4DDataScalar
Kokkos::View< ScalarType ****, Layout > Grid4DDataScalar
Definition grid_types.hpp:27

terra::grid::Grid2DDataScalar
Kokkos::View< ScalarType **, Layout > Grid2DDataScalar
Definition grid_types.hpp:21

terra::linalg::trafo::trafo_mat_cartesian_to_normal_tangential
dense::Mat< ScalarType, 3, 3 > trafo_mat_cartesian_to_normal_tangential(const dense::Vec< ScalarType, 3 > &n_input)
Constructs a robust orthonormal transformation matrix from Cartesian to (normal–tangential–tangential...
Definition local_basis_trafo_normal_tangential.hpp:36

terra::linalg::OperatorApplyMode
OperatorApplyMode
Modes for applying an operator to a vector.
Definition operator.hpp:30

terra::linalg::OperatorApplyMode::Replace
@ Replace
Overwrite the destination vector.

terra::linalg::OperatorStoredMatrixMode
OperatorStoredMatrixMode
Modes for applying stored matrices.
Definition operator.hpp:47

terra::linalg::OperatorStoredMatrixMode::Off
@ Off
Do not use stored matrices.

terra::linalg::OperatorCommunicationMode
OperatorCommunicationMode
Modes for communication during operator application.
Definition operator.hpp:40

terra::linalg::OperatorCommunicationMode::CommunicateAdditively
@ CommunicateAdditively
Communicate and add results.

terra::util::has_flag
constexpr bool has_flag(E mask_value, E flag) noexcept
Checks if a bitmask value contains a specific flag.
Definition bit_masking.hpp:43

terra::util::logroot
detail::PrefixCout logroot([]() { return detail::log_prefix();})
std::ostream subclass that just logs on root and adds a timestamp for each line.

operator.hpp

spherical_shell.hpp

terra::dense::Mat
Definition mat.hpp:10

terra::dense::Mat::double_contract
T double_contract(const Mat &mat)
Definition mat.hpp:226

terra::dense::Vec< ScalarT, 3 >

terra::grid::Grid4DDataVec
SoA (Structure-of-Arrays) 4D vector grid data.
Definition grid_types.hpp:51

terra::grid::Grid4DDataVec::extent
auto extent(int i) const
Definition grid_types.hpp:75

timer.hpp

vec.hpp

vector.hpp

vector_q1.hpp