gridwise_put_element_1d.hpp Source File#
gridwise_put_element_1d.hpp
Go to the documentation of this file.
Definition ck.hpp:268
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
__device__ X atomic_max(X *p_dst, const X &x)
__host__ __device__ constexpr Y type_convert(X x)
Definition utility/type_convert.hpp:98
__global__ void kernel_put_element_1d(const InGrid1dDesc in_grid_1d_desc, const InDataType *__restrict__ p_in_global, const IndexDataType *__restrict__ p_indices_global, OutDataType *__restrict__ p_out_global, const ElementwiseOperation elementwise_op)
Definition gridwise_put_element_1d.hpp:17
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__device__ X atomic_add(X *p_dst, const X &x)
__host__ __device__ constexpr auto make_dynamic_buffer(T *p, ElementSpaceSize element_space_size)
Definition dynamic_buffer.hpp:472
Definition gridwise_put_element_1d.hpp:36
static __device__ void Run(const InGrid1dDesc &in_grid_1d_desc, const InDataType *__restrict__ p_in_global, const IndexDataType *__restrict__ p_indices_global, OutDataType *__restrict__ p_out_global, const ElementwiseOperation &elementwise_op)
Definition gridwise_put_element_1d.hpp:42
static constexpr auto thread_buffer_desc_m
Definition gridwise_put_element_1d.hpp:39
static constexpr auto I0
Definition gridwise_put_element_1d.hpp:37
__host__ static __device__ constexpr T Lowest()
Definition numeric_limits.hpp:312
Definition utility/sequence.hpp:43
Definition static_buffer.hpp:16
Helper structure that facilitates transfer of source (grid) data to destination threads.
Definition threadwise_tensor_slice_transfer.hpp:234
Definition functional2.hpp:33