11template <
typename ScalarType >
15 "set_constant (Grid2DDataScalar)",
16 Kokkos::MDRangePolicy( { 0, 0 }, { x.extent( 0 ), x.extent( 1 ) } ),
17 KOKKOS_LAMBDA(
int i,
int j ) { x( i, j ) = value; } );
22template <
typename ScalarType >
26 "set_constant (Grid3DDataScalar)",
27 Kokkos::MDRangePolicy( { 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ) } ),
28 KOKKOS_LAMBDA(
int i,
int j,
int k ) { x( i, j, k ) = value; } );
33template <
typename ScalarType >
37 "set_constant (Grid4DDataScalar)",
38 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ) } ),
39 KOKKOS_LAMBDA(
int subdomain,
int i,
int j,
int k ) { x( subdomain, i, j, k ) = value; } );
44template <
typename ScalarType,
int VecDim >
48 "set_constant (Grid4DDataVec)",
49 Kokkos::MDRangePolicy(
50 { 0, 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ), x.extent( 4 ) } ),
51 KOKKOS_LAMBDA(
int subdomain,
int i,
int j,
int k,
int d ) { x( subdomain, i, j, k, d ) = value; } );
56template <
typename ScalarType >
60 "set_constant (Grid5DDataScalar)",
61 Kokkos::MDRangePolicy(
62 { 0, 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ), x.extent( 4 ) } ),
63 KOKKOS_LAMBDA(
int subdomain,
int i,
int j,
int k,
int w ) { x( subdomain, i, j, k, w ) = value; } );
68template <
typename ScalarType >
72 "scale (Grid3DDataScalar)",
73 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ) } ),
74 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k ) { x( local_subdomain, i, j, k ) *= value; } );
79template <
typename ScalarType, util::FlagLike FlagType >
82 const ScalarType& value,
84 const FlagType mask_value )
88 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { dst.extent( 0 ), dst.extent( 1 ), dst.extent( 2 ), dst.extent( 3 ) } ),
89 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k ) {
90 const ScalarType mask_val =
util::has_flag( mask_grid( local_subdomain, i, j, k ), mask_value ) ? 1.0 : 0.0;
91 dst( local_subdomain, i, j, k ) = mask_val * value + ( 1.0 - mask_val ) * dst( local_subdomain, i, j, k );
97template <
typename ScalarType, util::FlagLike FlagType >
102 const FlagType mask_value )
104 Kokkos::parallel_for(
106 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { dst.extent( 0 ), dst.extent( 1 ), dst.extent( 2 ), dst.extent( 3 ) } ),
107 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k ) {
108 const ScalarType mask_val =
util::has_flag( mask_grid( local_subdomain, i, j, k ), mask_value ) ? 1.0 : 0.0;
109 dst( local_subdomain, i, j, k ) =
110 mask_val * src( local_subdomain, i, j, k ) + ( 1.0 - mask_val ) * dst( local_subdomain, i, j, k );
116template <
typename ScalarType,
int VecDim, util::FlagLike FlagType >
119 const ScalarType& value,
121 const FlagType mask_value )
123 Kokkos::parallel_for(
125 Kokkos::MDRangePolicy(
127 { dst.extent( 0 ), dst.extent( 1 ), dst.extent( 2 ), dst.extent( 3 ), dst.extent( 4 ) } ),
128 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k,
int d ) {
129 const ScalarType mask_val =
util::has_flag( mask_grid( local_subdomain, i, j, k ), mask_value ) ? 1.0 : 0.0;
130 dst( local_subdomain, i, j, k, d ) =
131 mask_val * value + ( 1.0 - mask_val ) * dst( local_subdomain, i, j, k, d );
137template <
typename ScalarType,
int VecDim, util::FlagLike FlagType >
142 const FlagType mask_value )
144 Kokkos::parallel_for(
146 Kokkos::MDRangePolicy(
148 { dst.extent( 0 ), dst.extent( 1 ), dst.extent( 2 ), dst.extent( 3 ), dst.extent( 4 ) } ),
149 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k,
int d ) {
150 const ScalarType mask_val =
util::has_flag( mask_grid( local_subdomain, i, j, k ), mask_value ) ? 1.0 : 0.0;
151 dst( local_subdomain, i, j, k, d ) =
152 mask_val * src( local_subdomain, i, j, k, d ) + ( 1.0 - mask_val ) * dst( local_subdomain, i, j, k, d );
158template <
typename ScalarType,
int VecDim, util::FlagLike FlagType >
161 const ScalarType& value,
163 const FlagType mask_value,
164 const int vector_component )
166 Kokkos::parallel_for(
168 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { dst.extent( 0 ), dst.extent( 1 ), dst.extent( 2 ), dst.extent( 3 ) } ),
169 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k ) {
170 const ScalarType mask_val =
util::has_flag( mask_grid( local_subdomain, i, j, k ), mask_value ) ? 1.0 : 0.0;
171 dst( local_subdomain, i, j, k, vector_component ) =
172 mask_val * value + ( 1.0 - mask_val ) * dst( local_subdomain, i, j, k, vector_component );
178template <
typename ScalarType >
185 Kokkos::parallel_for(
186 "lincomb 1 arg (Grid4DDataScalar)",
187 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { y.extent( 0 ), y.extent( 1 ), y.extent( 2 ), y.extent( 3 ) } ),
188 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k ) {
189 y( local_subdomain, i, j, k ) = c_0 + c_1 * x_1( local_subdomain, i, j, k );
195template <
typename ScalarType >
204 Kokkos::parallel_for(
205 "lincomb 2 args (Grid4DDataScalar)",
206 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { y.extent( 0 ), y.extent( 1 ), y.extent( 2 ), y.extent( 3 ) } ),
207 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k ) {
208 y( local_subdomain, i, j, k ) =
209 c_0 + c_1 * x_1( local_subdomain, i, j, k ) + c_2 * x_2( local_subdomain, i, j, k );
215template <
typename ScalarType >
226 Kokkos::parallel_for(
227 "lincomb 3 args (Grid4DDataScalar)",
228 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { y.extent( 0 ), y.extent( 1 ), y.extent( 2 ), y.extent( 3 ) } ),
229 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k ) {
230 y( local_subdomain, i, j, k ) = c_0 + c_1 * x_1( local_subdomain, i, j, k ) +
231 c_2 * x_2( local_subdomain, i, j, k ) +
232 c_3 * x_3( local_subdomain, i, j, k );
238template <
typename ScalarType,
int VecDim >
245 Kokkos::parallel_for(
246 "lincomb 1 arg (Grid4DDataVec)",
247 Kokkos::MDRangePolicy(
248 { 0, 0, 0, 0, 0 }, { y.extent( 0 ), y.extent( 1 ), y.extent( 2 ), y.extent( 3 ), y.extent( 4 ) } ),
249 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k,
int d ) {
250 y( local_subdomain, i, j, k, d ) = c_0 + c_1 * x_1( local_subdomain, i, j, k, d );
256template <
typename ScalarType,
int VecDim >
265 Kokkos::parallel_for(
266 "lincomb 2 args (Grid4DDataVec)",
267 Kokkos::MDRangePolicy(
268 { 0, 0, 0, 0, 0 }, { y.extent( 0 ), y.extent( 1 ), y.extent( 2 ), y.extent( 3 ), y.extent( 4 ) } ),
269 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k,
int d ) {
270 y( local_subdomain, i, j, k, d ) =
271 c_0 + c_1 * x_1( local_subdomain, i, j, k, d ) + c_2 * x_2( local_subdomain, i, j, k, d );
277template <
typename ScalarType,
int VecDim >
288 Kokkos::parallel_for(
289 "lincomb 3 args (Grid4DDataVec)",
290 Kokkos::MDRangePolicy(
291 { 0, 0, 0, 0, 0 }, { y.extent( 0 ), y.extent( 1 ), y.extent( 2 ), y.extent( 3 ), y.extent( 4 ) } ),
292 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k,
int d ) {
293 y( local_subdomain, i, j, k, d ) = c_0 + c_1 * x_1( local_subdomain, i, j, k, d ) +
294 c_2 * x_2( local_subdomain, i, j, k, d ) +
295 c_3 * x_3( local_subdomain, i, j, k, d );
301template <
typename ScalarType >
304 Kokkos::parallel_for(
306 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { y.extent( 0 ), y.extent( 1 ), y.extent( 2 ), y.extent( 3 ) } ),
307 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k ) {
308 y( local_subdomain, i, j, k ) = 1.0 / y( local_subdomain, i, j, k );
314template <
typename ScalarType,
int VecDim >
317 Kokkos::parallel_for(
319 Kokkos::MDRangePolicy(
320 { 0, 0, 0, 0, 0 }, { y.extent( 0 ), y.extent( 1 ), y.extent( 2 ), y.extent( 3 ), y.extent( 4 ) } ),
321 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k,
int d ) {
322 y( local_subdomain, i, j, k, d ) = 1.0 / y( local_subdomain, i, j, k, d );
328template <
typename ScalarType >
333 Kokkos::parallel_for(
334 "mult_elementwise_inplace",
335 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { y.extent( 0 ), y.extent( 1 ), y.extent( 2 ), y.extent( 3 ) } ),
336 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k ) {
337 y( local_subdomain, i, j, k ) *= x( local_subdomain, i, j, k );
343template <
typename ScalarType,
int VecDim >
348 Kokkos::parallel_for(
349 "mult_elementwise_inplace",
350 Kokkos::MDRangePolicy(
351 { 0, 0, 0, 0, 0 }, { y.extent( 0 ), y.extent( 1 ), y.extent( 2 ), y.extent( 3 ), y.extent( 4 ) } ),
352 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k,
int d ) {
353 y( local_subdomain, i, j, k, d ) *= x( local_subdomain, i, j, k, d );
359template <
typename ScalarType >
362 ScalarType min_val = 0.0;
363 Kokkos::parallel_reduce(
365 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ) } ),
366 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k, ScalarType& local_min ) {
367 ScalarType val = x( local_subdomain, i, j, k );
368 local_min = Kokkos::min( local_min, val );
370 Kokkos::Min< ScalarType >( min_val ) );
374 MPI_Allreduce( MPI_IN_PLACE, &min_val, 1, mpi::mpi_datatype< ScalarType >(), MPI_MIN, MPI_COMM_WORLD );
379template <
typename ScalarType >
382 ScalarType min_mag = 0.0;
383 Kokkos::parallel_reduce(
385 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ) } ),
386 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k, ScalarType& local_min ) {
387 ScalarType val = Kokkos::abs( x( local_subdomain, i, j, k ) );
388 local_min = Kokkos::min( local_min, val );
390 Kokkos::Min< ScalarType >( min_mag ) );
394 MPI_Allreduce( MPI_IN_PLACE, &min_mag, 1, mpi::mpi_datatype< ScalarType >(), MPI_MIN, MPI_COMM_WORLD );
399template <
typename ScalarType >
402 ScalarType max_mag = 0.0;
403 Kokkos::parallel_reduce(
405 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ) } ),
406 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k, ScalarType& local_max ) {
407 ScalarType val = Kokkos::abs( x( local_subdomain, i, j, k ) );
408 local_max = Kokkos::max( local_max, val );
410 Kokkos::Max< ScalarType >( max_mag ) );
414 MPI_Allreduce( MPI_IN_PLACE, &max_mag, 1, mpi::mpi_datatype< ScalarType >(), MPI_MAX, MPI_COMM_WORLD );
419template <
typename ScalarType,
int VecDim >
422 ScalarType max_mag = 0.0;
423 Kokkos::parallel_reduce(
425 Kokkos::MDRangePolicy(
426 { 0, 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ), x.extent( 4 ) } ),
427 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k,
int d, ScalarType& local_max ) {
428 ScalarType val = Kokkos::abs( x( local_subdomain, i, j, k, d ) );
429 local_max = Kokkos::max( local_max, val );
431 Kokkos::Max< ScalarType >( max_mag ) );
435 MPI_Allreduce( MPI_IN_PLACE, &max_mag, 1, mpi::mpi_datatype< ScalarType >(), MPI_MAX, MPI_COMM_WORLD );
440template <
typename ScalarType, util::FlagLike FlagType >
444 const FlagType& mask_value )
446 ScalarType max_mag = 0.0;
447 Kokkos::parallel_reduce(
449 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ) } ),
450 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k, ScalarType& local_max ) {
451 if (
util::has_flag( mask( local_subdomain, i, j, k ), mask_value ) )
453 ScalarType val = Kokkos::abs( x( local_subdomain, i, j, k ) );
454 local_max = Kokkos::max( local_max, val );
457 Kokkos::Max< ScalarType >( max_mag ) );
461 MPI_Allreduce( MPI_IN_PLACE, &max_mag, 1, mpi::mpi_datatype< ScalarType >(), MPI_MAX, MPI_COMM_WORLD );
466template <
typename ScalarType,
int VecDim >
469 ScalarType max_mag = 0.0;
470 Kokkos::parallel_reduce(
471 "max_vector_magnitude",
472 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ) } ),
473 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k, ScalarType& local_max ) {
475 for (
int d = 0; d < VecDim; ++d )
477 val += x( local_subdomain, i, j, k, d ) * x( local_subdomain, i, j, k, d );
479 val = Kokkos::sqrt( val );
480 local_max = Kokkos::max( local_max, val );
482 Kokkos::Max< ScalarType >( max_mag ) );
486 MPI_Allreduce( MPI_IN_PLACE, &max_mag, 1, mpi::mpi_datatype< ScalarType >(), MPI_MAX, MPI_COMM_WORLD );
491template <
typename ScalarType,
int VecDim >
496 Kokkos::parallel_for(
498 Kokkos::MDRangePolicy(
500 { vectorial_data_in.extent( 0 ),
501 vectorial_data_in.extent( 1 ),
502 vectorial_data_in.extent( 2 ),
503 vectorial_data_in.extent( 3 ) } ),
504 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k ) {
506 for (
int d = 0; d < VecDim; ++d )
509 vectorial_data_in( local_subdomain, i, j, k, d ) * vectorial_data_in( local_subdomain, i, j, k, d );
511 magnitude_out( local_subdomain, i, j, k ) = Kokkos::sqrt( val );
517template <
typename ScalarType,
int VecDim >
521 const int component )
523 if ( component < 0 || component >= VecDim )
525 Kokkos::abort(
"Vector component invalid." );
528 Kokkos::parallel_for(
529 "extract_vector_component",
530 Kokkos::MDRangePolicy(
532 { vectorial_data_in.extent( 0 ),
533 vectorial_data_in.extent( 1 ),
534 vectorial_data_in.extent( 2 ),
535 vectorial_data_in.extent( 3 ) } ),
536 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k ) {
537 component_out( local_subdomain, i, j, k ) = vectorial_data_in( local_subdomain, i, j, k, component );
543template <
typename ScalarType,
int VecDim >
547 const ScalarType constant )
549 if ( component < 0 || component >= VecDim )
551 Kokkos::abort(
"Vector component invalid." );
554 Kokkos::parallel_for(
555 "set_vector_component",
556 Kokkos::MDRangePolicy(
558 { vectorial_data.extent( 0 ),
559 vectorial_data.extent( 1 ),
560 vectorial_data.extent( 2 ),
561 vectorial_data.extent( 3 ) } ),
562 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k ) {
563 vectorial_data( local_subdomain, i, j, k, component ) = constant;
569template <
typename ScalarType >
572 ScalarType sum_abs = 0.0;
573 Kokkos::parallel_reduce(
575 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ) } ),
576 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k, ScalarType& local_sum_abs ) {
577 ScalarType val = Kokkos::abs( x( local_subdomain, i, j, k ) );
578 local_sum_abs = local_sum_abs + val;
580 Kokkos::Sum< ScalarType >( sum_abs ) );
584 MPI_Allreduce( MPI_IN_PLACE, &sum_abs, 1, mpi::mpi_datatype< ScalarType >(), MPI_SUM, MPI_COMM_WORLD );
589template <
typename ScalarType, util::FlagLike FlagType >
592 auto count =
static_cast< ScalarType
>( 0 );
594 Kokkos::parallel_reduce(
596 Kokkos::MDRangePolicy(
597 { 0, 0, 0, 0 }, { mask.extent( 0 ), mask.extent( 1 ), mask.extent( 2 ), mask.extent( 3 ) } ),
598 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k, ScalarType& local_sum ) {
599 const ScalarType mask_val =
util::has_flag( mask( local_subdomain, i, j, k ), mask_value ) ?
600 static_cast< ScalarType
>( 1 ) :
601 static_cast< ScalarType
>( 0 );
602 local_sum = local_sum + mask_val;
604 Kokkos::Sum< ScalarType >( count ) );
608 MPI_Allreduce( MPI_IN_PLACE, &count, 1, mpi::mpi_datatype< ScalarType >(), MPI_SUM, MPI_COMM_WORLD );
613template <
typename ScalarType, util::FlagLike FlagType >
617 const FlagType& mask_value )
619 ScalarType sum = 0.0;
621 Kokkos::parallel_reduce(
623 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ) } ),
624 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k, ScalarType& local_sum ) {
625 const ScalarType mask_val =
util::has_flag( mask( local_subdomain, i, j, k ), mask_value ) ? 1.0 : 0.0;
626 ScalarType val = x( local_subdomain, i, j, k ) * mask_val;
627 local_sum = local_sum + val;
629 Kokkos::Sum< ScalarType >( sum ) );
633 MPI_Allreduce( MPI_IN_PLACE, &sum, 1, mpi::mpi_datatype< ScalarType >(), MPI_SUM, MPI_COMM_WORLD );
638template <
typename ScalarType, util::FlagLike FlagType0, util::FlagLike FlagType1 >
643 const FlagType0& mask0_value,
644 const FlagType1& mask1_value )
646 ScalarType sum = 0.0;
648 Kokkos::parallel_reduce(
650 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ) } ),
651 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k, ScalarType& local_sum ) {
652 ScalarType mask_val = 1.0;
653 mask_val *=
util::has_flag( mask0( local_subdomain, i, j, k ), mask0_value ) ? 1.0 : 0.0;
654 mask_val *=
util::has_flag( mask1( local_subdomain, i, j, k ), mask1_value ) ? 1.0 : 0.0;
655 ScalarType val = x( local_subdomain, i, j, k ) * mask_val;
656 local_sum = local_sum + val;
658 Kokkos::Sum< ScalarType >( sum ) );
662 MPI_Allreduce( MPI_IN_PLACE, &sum, 1, mpi::mpi_datatype< ScalarType >(), MPI_SUM, MPI_COMM_WORLD );
667template <
typename ScalarType >
670 ScalarType dot_prod = 0.0;
672 Kokkos::parallel_reduce(
674 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ) } ),
675 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k, ScalarType& local_dot_prod ) {
676 ScalarType val = x( local_subdomain, i, j, k ) * y( local_subdomain, i, j, k );
677 local_dot_prod = local_dot_prod + val;
679 Kokkos::Sum< ScalarType >( dot_prod ) );
681 Kokkos::fence(
"dot_product" );
683 MPI_Allreduce( MPI_IN_PLACE, &dot_prod, 1, mpi::mpi_datatype< ScalarType >(), MPI_SUM, MPI_COMM_WORLD );
688template <
typename ScalarType, util::FlagLike FlagType >
693 const FlagType& mask_value )
695 ScalarType dot_prod = 0.0;
697 Kokkos::parallel_reduce(
698 "masked_dot_product",
699 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ) } ),
700 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k, ScalarType& local_dot_prod ) {
701 const ScalarType mask_val =
util::has_flag( mask( local_subdomain, i, j, k ), mask_value ) ? 1.0 : 0.0;
702 ScalarType val = x( local_subdomain, i, j, k ) * y( local_subdomain, i, j, k ) * mask_val;
703 local_dot_prod = local_dot_prod + val;
705 Kokkos::Sum< ScalarType >( dot_prod ) );
707 Kokkos::fence(
"masked_dot_product" );
709 MPI_Allreduce( MPI_IN_PLACE, &dot_prod, 1, mpi::mpi_datatype< ScalarType >(), MPI_SUM, MPI_COMM_WORLD );
714template <
typename ScalarType, util::FlagLike FlagType,
int VecDim >
719 const FlagType& mask_value )
721 ScalarType dot_prod = 0.0;
723 Kokkos::parallel_reduce(
724 "masked_dot_product",
725 Kokkos::MDRangePolicy(
726 { 0, 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ), x.extent( 4 ) } ),
727 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k,
int d, ScalarType& local_dot_prod ) {
728 const ScalarType mask_val =
util::has_flag( mask( local_subdomain, i, j, k ), mask_value ) ? 1.0 : 0.0;
729 ScalarType val = x( local_subdomain, i, j, k, d ) * y( local_subdomain, i, j, k, d ) * mask_val;
730 local_dot_prod = local_dot_prod + val;
732 Kokkos::Sum< ScalarType >( dot_prod ) );
734 Kokkos::fence(
"masked_dot_product" );
736 MPI_Allreduce( MPI_IN_PLACE, &dot_prod, 1, mpi::mpi_datatype< ScalarType >(), MPI_SUM, MPI_COMM_WORLD );
741template <
typename ScalarType >
746 Kokkos::parallel_reduce(
747 "masked_dot_product",
748 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ) } ),
749 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k,
bool& local_has_nan_or_inf ) {
750 local_has_nan_or_inf = local_has_nan_or_inf || ( Kokkos::isnan( x( local_subdomain, i, j, k ) ) ||
751 Kokkos::isinf( x( local_subdomain, i, j, k ) ) );
762template <
typename ScalarType,
int VecDim >
767 Kokkos::parallel_reduce(
768 "masked_dot_product",
769 Kokkos::MDRangePolicy(
770 { 0, 0, 0, 0, 0 }, { x.extent( 0 ), x.extent( 1 ), x.extent( 2 ), x.extent( 3 ), x.extent( 4 ) } ),
771 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k,
int d,
bool& local_has_nan_or_inf ) {
772 local_has_nan_or_inf = local_has_nan_or_inf || ( Kokkos::isnan( x( local_subdomain, i, j, k, d ) ) ||
773 Kokkos::isinf( x( local_subdomain, i, j, k, d ) ) );
784template <
typename ScalarTypeDst,
typename ScalarTypeSrc >
787 Kokkos::parallel_for(
789 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { dst.extent( 0 ), dst.extent( 1 ), dst.extent( 2 ), dst.extent( 3 ) } ),
790 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k ) {
791 dst( local_subdomain, i, j, k ) =
static_cast< ScalarTypeDst
>( src( local_subdomain, i, j, k ) );
797template <
typename ScalarTypeDst >
801 std::is_same_v< ScalarTypeDst, double > || std::is_same_v< ScalarTypeDst, float >,
802 "Random integers not implemented. But can be done easily below." );
804 Kokkos::Random_XorShift64_Pool<> random_pool( 12345 );
805 Kokkos::parallel_for(
807 Kokkos::MDRangePolicy( { 0, 0, 0, 0 }, { dst.extent( 0 ), dst.extent( 1 ), dst.extent( 2 ), dst.extent( 3 ) } ),
808 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k ) {
809 auto generator = random_pool.get_state();
810 dst( local_subdomain, i, j, k ) =
static_cast< ScalarTypeDst
>( generator.drand() );
811 random_pool.free_state( generator );
817template <
typename ScalarTypeDst,
int VecDim >
821 std::is_same_v< ScalarTypeDst, double > || std::is_same_v< ScalarTypeDst, float >,
822 "Random integers not implemented. But can be done easily below." );
824 Kokkos::Random_XorShift64_Pool<> random_pool( 12345 );
825 Kokkos::parallel_for(
827 Kokkos::MDRangePolicy(
829 { dst.extent( 0 ), dst.extent( 1 ), dst.extent( 2 ), dst.extent( 3 ), dst.extent( 4 ) } ),
830 KOKKOS_LAMBDA(
int local_subdomain,
int i,
int j,
int k,
int d ) {
831 auto generator = random_pool.get_state();
832 dst( local_subdomain, i, j, k, d ) =
static_cast< ScalarTypeDst
>( generator.drand() );
833 random_pool.free_state( generator );
Kokkos::View< ScalarType *****, Layout > Grid5DDataScalar
Definition grid_types.hpp:28
Kokkos::View< ScalarType ***, Layout > Grid3DDataScalar
Definition grid_types.hpp:22
Kokkos::View< ScalarType ****[VecDim], Layout > Grid4DDataVec
Definition grid_types.hpp:43
Kokkos::View< ScalarType ****, Layout > Grid4DDataScalar
Definition grid_types.hpp:25
Kokkos::View< ScalarType **, Layout > Grid2DDataScalar
Definition grid_types.hpp:19
Definition grid_operations.hpp:9
void extract_vector_component(grid::Grid4DDataScalar< ScalarType > &component_out, const grid::Grid4DDataVec< ScalarType, VecDim > &vectorial_data_in, const int component)
Definition grid_operations.hpp:518
ScalarType max_vector_magnitude(const grid::Grid4DDataVec< ScalarType, VecDim > &x)
Definition grid_operations.hpp:467
void invert_inplace(const grid::Grid4DDataScalar< ScalarType > &y)
Definition grid_operations.hpp:302
ScalarType count_masked(const grid::Grid4DDataScalar< FlagType > &mask, const FlagType &mask_value)
Definition grid_operations.hpp:590
ScalarType dot_product(const grid::Grid4DDataScalar< ScalarType > &x, const grid::Grid4DDataScalar< ScalarType > &y)
Definition grid_operations.hpp:668
ScalarType min_abs_entry(const grid::Grid4DDataScalar< ScalarType > &x)
Definition grid_operations.hpp:380
bool has_nan_or_inf(const grid::Grid4DDataScalar< ScalarType > &x)
Definition grid_operations.hpp:742
void cast(const grid::Grid4DDataScalar< ScalarTypeDst > &dst, const grid::Grid4DDataScalar< ScalarTypeSrc > &src)
Definition grid_operations.hpp:785
ScalarType masked_dot_product(const grid::Grid4DDataScalar< ScalarType > &x, const grid::Grid4DDataScalar< ScalarType > &y, const grid::Grid4DDataScalar< FlagType > &mask, const FlagType &mask_value)
Definition grid_operations.hpp:689
ScalarType masked_sum(const grid::Grid4DDataScalar< ScalarType > &x, const grid::Grid4DDataScalar< FlagType > &mask, const FlagType &mask_value)
Definition grid_operations.hpp:614
void scale(const grid::Grid4DDataScalar< ScalarType > &x, ScalarType value)
Definition grid_operations.hpp:69
ScalarType sum_of_absolutes(const grid::Grid4DDataScalar< ScalarType > &x)
Definition grid_operations.hpp:570
void vector_magnitude(grid::Grid4DDataScalar< ScalarType > &magnitude_out, const grid::Grid4DDataVec< ScalarType, VecDim > &vectorial_data_in)
Definition grid_operations.hpp:492
void set_constant(const grid::Grid2DDataScalar< ScalarType > &x, ScalarType value)
Definition grid_operations.hpp:12
ScalarType min_entry(const grid::Grid4DDataScalar< ScalarType > &x)
Definition grid_operations.hpp:360
void mult_elementwise_inplace(const grid::Grid4DDataScalar< ScalarType > &y, const grid::Grid4DDataScalar< ScalarType > &x)
Definition grid_operations.hpp:329
void assign_masked_else_keep_old(const grid::Grid4DDataScalar< ScalarType > &dst, const ScalarType &value, const grid::Grid4DDataScalar< FlagType > &mask_grid, const FlagType mask_value)
Definition grid_operations.hpp:80
void set_vector_component(grid::Grid4DDataVec< ScalarType, VecDim > &vectorial_data, const int component, const ScalarType constant)
Definition grid_operations.hpp:544
void rand(const grid::Grid4DDataScalar< ScalarTypeDst > &dst)
Definition grid_operations.hpp:798
void lincomb(const grid::Grid4DDataScalar< ScalarType > &y, ScalarType c_0, ScalarType c_1, const grid::Grid4DDataScalar< ScalarType > &x_1)
Definition grid_operations.hpp:179
ScalarType max_abs_entry(const grid::Grid4DDataScalar< ScalarType > &x)
Definition grid_operations.hpp:400
MPI_Datatype mpi_datatype< bool >()
Definition mpi.hpp:208
constexpr bool has_flag(E mask_value, E flag) noexcept
Checks if a bitmask value contains a specific flag.
Definition bit_masking.hpp:43