blockwise_gemm_xdlops_skip_b_lds.hpp Source File#
blockwise_gemm_xdlops_skip_b_lds.hpp
Go to the documentation of this file.
Definition ck.hpp:268
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
__host__ __device__ constexpr auto make_static_buffer(Number< N >)
Definition static_buffer.hpp:186
__host__ __device__ constexpr auto make_pass_through_transform(const LowLength &low_length)
Definition multi_index_transform_helper.hpp:12
__host__ __device__ constexpr auto make_single_stage_tensor_adaptor(const Transforms &transforms, LowerDimensionOldTopIdss, UpperDimensionNewTopIdss)
Definition tensor_description/tensor_adaptor.hpp:425
__host__ __device__ constexpr auto make_merge_transform(const LowLengths &low_lengths)
Definition multi_index_transform_helper.hpp:55
__host__ __device__ constexpr auto make_merge_transform_v3_division_mod(const LowLengths &low_lengths)
Definition multi_index_transform_helper.hpp:84
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__host__ __device__ constexpr auto transform_tensor_descriptor(const OldTensorDescriptor &old_tensor_desc, const NewTransforms &new_transforms, NewLowerDimensionOldVisibleIdss, NewUpperDimensionNewVisibleIdss)
Definition tensor_description/tensor_descriptor.hpp:319
__host__ __device__ constexpr auto make_unmerge_transform(const UpLengths &up_lengths, integral_constant< bool, Use24BitIntegerCalculation >=integral_constant< bool, false >{})
Definition multi_index_transform_helper.hpp:90
__host__ __device__ constexpr auto & GetCThreadBuffer()
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:54
static constexpr auto I2
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:30
__host__ static __device__ constexpr auto GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:158
__host__ static __device__ constexpr auto GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:171
static constexpr index_t NWaves
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:44
__host__ __device__ BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1()
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:119
static constexpr auto a_block_desc_m0_m1_m2_k
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:245
static constexpr auto I1
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:29
static constexpr index_t KPerBlock
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:33
static constexpr index_t MWaves
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:43
static constexpr index_t KPerThread
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:40
static __device__ auto CalculateCThreadOriginDataIndex(Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:92
static constexpr auto I3
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:31
static __device__ auto CalculateAThreadOriginDataIndex()
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:68
static __device__ auto CalculateBThreadOriginDataIndex()
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:79
static constexpr auto xdlops_gemm
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:38
__host__ static __device__ constexpr auto GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:145
__host__ static __device__ constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:132
__host__ static __device__ constexpr auto MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2(const CGridDesc_M_N &c_grid_desc_m_n)
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:188
static constexpr auto I0
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:28
static constexpr index_t K0PerThread
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:41
__device__ void Run(const ABlockBuffer &a_block_buf, const BBlockBuffer &b_thread_buf, CThreadBuffer &c_thread_buf) const
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:248
static constexpr index_t WaveSize
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:45
__device__ void ResetABlockStartWindow()
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:240
__host__ static __device__ constexpr auto MakeABlockDescriptor_M0_M1_M2_K()
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:223
__host__ static __device__ constexpr auto MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2(const CGridDesc_G_M_N &c_grid_desc_g_m_n)
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:205
__device__ void MoveABlockSliceWindow()
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:235
static constexpr index_t A_K1
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:36
StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, FloatAcc, MRepeat *NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true > c_thread_buf_
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:52
static constexpr index_t A_K0
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:35
static __device__ auto GetWaveIdx()
Definition blockwise_gemm_xdlops_skip_b_lds.hpp:56
Definition utility/sequence.hpp:43
Definition static_buffer.hpp:75
Definition xdlops_gemm.hpp:1821
Definition functional2.hpp:33
Definition dtype_vector.hpp:10