#include <blockwise_gemm_wmma.hpp>
◆ ThisThreadBlock
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| using ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::ThisThreadBlock = ThisThreadBlock<BlockSize> |
◆ Tuple6
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| using ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::Tuple6 = decltype(CalculateAThreadOriginDataIndex()) |
◆ BlockwiseGemmWMMA()
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| __host__ __device__ ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::BlockwiseGemmWMMA |
( |
Tuple6 | a_origin = CalculateAThreadOriginDataIndex(), |
|
|
Tuple6 | b_origin = CalculateBThreadOriginDataIndex() ) |
|
inline |
◆ CalculateAThreadOriginDataIndex()
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| __device__ auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::CalculateAThreadOriginDataIndex |
( |
| ) |
|
|
inlinestatic |
◆ CalculateBThreadOriginDataIndex()
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| __device__ auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::CalculateBThreadOriginDataIndex |
( |
| ) |
|
|
inlinestatic |
◆ CalculateCThreadOriginDataIndex()
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| __device__ auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::CalculateCThreadOriginDataIndex |
( |
Number< m0 > | , |
|
|
Number< n0 > | ) |
|
inlinestatic |
◆ CalculateCThreadOriginDataIndex7D()
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| __device__ auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::CalculateCThreadOriginDataIndex7D |
( |
Number< m0 > | , |
|
|
Number< n0 > | ) |
|
inlinestatic |
◆ GetCBlockDescriptor_MRepeat_MWave_MSubGroup_NRepeat_NWave_NThreadPerSubGroup_MAccVgprs()
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| __host__ static __device__ constexpr auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::GetCBlockDescriptor_MRepeat_MWave_MSubGroup_NRepeat_NWave_NThreadPerSubGroup_MAccVgprs |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_MRepeat_MWave_MThreadPerSubGroup_NRepeat_NWave_NSubGroup_NAccVgprs()
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| __host__ static __device__ constexpr auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::GetCBlockDescriptor_MRepeat_MWave_MThreadPerSubGroup_NRepeat_NWave_NSubGroup_NAccVgprs |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetCThreadBuffer()
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| __host__ __device__ constexpr auto & ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::GetCThreadBuffer |
( |
| ) |
|
|
inlineconstexpr |
◆ GetCThreadDescriptor_MRepeat_MWave_MSubGroup_NRepeat_NWave_NThreadPerSubGroup_MAccVgprs()
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| __host__ static __device__ constexpr auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::GetCThreadDescriptor_MRepeat_MWave_MSubGroup_NRepeat_NWave_NThreadPerSubGroup_MAccVgprs |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetCThreadDescriptor_MRepeat_MWave_MThreadPerSubGroup_NRepeat_NWave_NSubGroup_NAccVgprs()
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| __host__ static __device__ constexpr auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::GetCThreadDescriptor_MRepeat_MWave_MThreadPerSubGroup_NRepeat_NWave_NSubGroup_NAccVgprs |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ GetWaveIdx()
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| __device__ auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::GetWaveIdx |
( |
| ) |
|
|
inlinestatic |
◆ MakeCGridDescriptor_MBlockxRepeat_MWave_MSubGroup_NBlockxRepeat_NWave_NThreadPerSubGroup_MAccVgprs()
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
template<typename CGridDesc_M_N>
| __host__ static __device__ constexpr auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::MakeCGridDescriptor_MBlockxRepeat_MWave_MSubGroup_NBlockxRepeat_NWave_NThreadPerSubGroup_MAccVgprs |
( |
const CGridDesc_M_N & | c_grid_desc_m_n | ) |
|
|
inlinestaticconstexpr |
◆ Run()
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
| __device__ void ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::Run |
( |
const ABlockBuffer & | a_block_buf, |
|
|
const BBlockBuffer & | b_block_buf, |
|
|
CThreadBuffer & | c_thread_buf ) const |
|
inline |
◆ a_block_desc_k0_m0_m1_m2_k1
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| ABlockDesc ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::a_block_desc_k0_m0_m1_m2_k1 |
|
staticconstexpr |
◆ A_K1
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| index_t ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::A_K1 = ABlockDesc{}.GetLength(I5) |
|
staticconstexpr |
◆ A_KRow
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| index_t ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::A_KRow = AEnableLds ? 1 : 2 |
|
staticconstexpr |
◆ a_thread_copy_
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| AThreadCopySelector<AEnableLds>::type ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::a_thread_copy_ |
|
protected |
◆ a_thread_desc_
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::a_thread_desc_ |
|
staticconstexprprotected |
Initial value:=
__host__ __device__ constexpr auto make_naive_tensor_descriptor(const Tuple< Lengths... > &lengths, const Tuple< Strides... > &strides)
Definition tensor_descriptor_helper.hpp:49
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
Definition blockwise_gemm_wmma.hpp:550
static constexpr auto I1
Definition blockwise_gemm_wmma.hpp:552
◆ b_block_desc_k0_n0_n1_n2_k1
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| BBlockDesc ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::b_block_desc_k0_n0_n1_n2_k1 |
|
staticconstexpr |
◆ B_K1
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| index_t ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::B_K1 = BBlockDesc{}.GetLength(I5) |
|
staticconstexpr |
◆ B_KRow
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| index_t ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::B_KRow = BEnableLds ? 1 : 2 |
|
staticconstexpr |
◆ b_thread_copy_
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| BThreadCopySelector<BEnableLds>::type ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::b_thread_copy_ |
|
protected |
◆ b_thread_desc_
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::b_thread_desc_ |
|
staticconstexprprotected |
◆ c_thread_buf_
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| StaticBufferTupleOfVector<AddressSpaceEnum::Vgpr, FloatAcc, MRepeat * NRepeat, wmma_gemm.GetRegSizePerWmma(), true> ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::c_thread_buf_ |
◆ c_thread_desc_
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::c_thread_desc_ |
|
staticconstexprprotected |
Initial value:
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
static constexpr auto wmma_gemm
Definition blockwise_gemm_wmma.hpp:572
◆ I0
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::I0 = Number<0>{} |
|
staticconstexpr |
◆ I1
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::I1 = Number<1>{} |
|
staticconstexpr |
◆ I2
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::I2 = Number<2>{} |
|
staticconstexpr |
◆ I3
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::I3 = Number<3>{} |
|
staticconstexpr |
◆ I4
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::I4 = Number<4>{} |
|
staticconstexpr |
◆ I5
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::I5 = Number<5>{} |
|
staticconstexpr |
◆ MWaves
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| index_t ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::MWaves = MPerBlock / (MRepeat * MPerWMMA) |
|
staticconstexpr |
◆ NWaves
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| index_t ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::NWaves = NPerBlock / (NRepeat * NPerWMMA) |
|
staticconstexpr |
◆ WaveSize
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| index_t ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::WaveSize = 32 |
|
staticconstexpr |
◆ wmma_gemm
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::wmma_gemm |
|
staticconstexpr |
Initial value:=
Definition wmma_gemm.hpp:663
◆ WmmaK
template<
index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc,
index_t MPerBlock,
index_t NPerBlock,
index_t KPerBlock,
index_t MPerWMMA,
index_t NPerWMMA,
index_t MRepeat,
index_t NRepeat,
index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
| auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::WmmaK = Number<16>{} |
|
staticconstexpr |
The documentation for this struct was generated from the following file: