thread_group_tensor_slice_transfer_v7r3.hpp Source File#
thread_group_tensor_slice_transfer_v7r3.hpp
Go to the documentation of this file.
Definition ck.hpp:268
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
typename detail::StaticallyIndexedArrayImpl< T, N >::type StaticallyIndexedArray
Definition utility/statically_indexed_array.hpp:45
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
typename detail::detector< nonesuch, void, Op, Args... >::value_t is_detected
Definition is_detected.hpp:34
typename tuple_element< I, TTuple >::type tuple_element_t
Definition utility/tuple.hpp:208
__host__ __device__ constexpr auto generate_tuple(F &&f, Number< N >)
Definition tuple_helper.hpp:21
__device__ void Run(const SrcDescs &src_descs, const SrcBuffers &src_bufs, const DstDescs &dst_descs, DstBuffers dst_bufs)
Definition thread_group_tensor_slice_transfer_v7r3.hpp:183
static constexpr index_t nSrc
Definition thread_group_tensor_slice_transfer_v7r3.hpp:52
__device__ void MoveDstSliceWindow(const DstDescs &dst_descs, Number< IDst > iDst, const Index &step)
Definition thread_group_tensor_slice_transfer_v7r3.hpp:211
static constexpr auto thread_slice_lengths
Definition thread_group_tensor_slice_transfer_v7r3.hpp:57
MultiIndex< nDim > Index
Definition thread_group_tensor_slice_transfer_v7r3.hpp:55
__device__ void MoveDstSliceWindow(const DstDescs &dst_descs, const Index &step)
Definition thread_group_tensor_slice_transfer_v7r3.hpp:220
__device__ constexpr ThreadGroupTensorSliceTransfer_v7r3(const SrcDescs &src_descs, const StaticallyIndexedArray< Index, nSrc > &src_block_slice_origins, const DstDescs &dst_descs, const StaticallyIndexedArray< Index, nDst > &dst_block_slice_origins, const ElementwiseOperation &element_op)
Definition thread_group_tensor_slice_transfer_v7r3.hpp:59
__device__ void MoveSrcSliceWindow(const SrcDescs &src_descs, Number< ISrc > iSrc, const Index &step)
Definition thread_group_tensor_slice_transfer_v7r3.hpp:194
__device__ void RunWriteAndStoreVgpr(const DstDescs &dst_descs, DstBuffers dst_bufs, const DstVgprDescs &dst_vgpr_desc, DstVgprBuffers dst_vgpr_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
Definition thread_group_tensor_slice_transfer_v7r3.hpp:157
decltype(std::declval< T & >().IsTuple()) is_tuple
Definition thread_group_tensor_slice_transfer_v7r3.hpp:135
__device__ void RunWrite(const DstDescs &dst_descs, DstBuffers dst_bufs, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
Definition thread_group_tensor_slice_transfer_v7r3.hpp:138
static constexpr index_t nDim
Definition thread_group_tensor_slice_transfer_v7r3.hpp:49
__device__ void RunRead(const SrcDescs &src_descs, const SrcBuffers &src_bufs, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
Definition thread_group_tensor_slice_transfer_v7r3.hpp:123
__device__ void MoveSrcSliceWindow(const SrcDescs &src_descs, const Index &step)
Definition thread_group_tensor_slice_transfer_v7r3.hpp:203
static constexpr index_t nDst
Definition thread_group_tensor_slice_transfer_v7r3.hpp:53
__device__ void RunRead(const SrcDescs &src_descs, const SrcBuffers &src_bufs, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
Definition threadwise_tensor_slice_transfer_v7r3.hpp:150
__device__ void RunWrite(const DstDescs &dst_descs, DstBuffers dst_bufs, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
Definition threadwise_tensor_slice_transfer_v7r3.hpp:501
__device__ void RunWriteAndStoreVgpr(const DstDescs &dst_descs, DstBuffers dst_bufs, const DstVgprDescs &, DstVgprBuffers dst_vgpr_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
Definition threadwise_tensor_slice_transfer_v7r3.hpp:408
Definition functional2.hpp:33