WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend > Struct Template Reference

WmmaGemm&lt; src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend &gt; Struct Template Reference#

Composable Kernel: ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend > Struct Template Reference
ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend > Struct Template Reference

#include <wmma_gemm.hpp>

Public Types

using CIndex = MultiIndex<2>
using CIndex3D = MultiIndex<3>

Public Member Functions

__host__ __device__ constexpr WmmaGemm ()
template<class FloatA, class FloatB, class FloatC>
__device__ void Run (const FloatA &p_a_wave, const FloatB &p_b_wave, FloatC &p_c_thread) const

Static Public Member Functions

template<typename CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA>
__host__ static __device__ constexpr auto MakeCDesc_MBlockxRepeat_MWave_MSubGroup_NBlockxRepeat_NWave_NThreadPerSubGroup_MAccVgprs (const CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA &c_desc_mblockxrepeat_mwave_mperwmma_nblockxrepeat_nwave_nperwmma)
template<typename CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA>
__host__ static __device__ constexpr auto MakeCDesc_MBlockxRepeat_MWave_MThreadPerSubGroup_NBlockxRepeat_NWave_NSubGroup_NAccVgprs (const CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA &c_desc_mblockxrepeat_mwave_mperwmma_nblockxrepeat_nwave_nperwmma)
static __device__ constexpr index_t GetRegSizePerWmma ()
static __device__ constexpr index_t GetWaveSize ()
static __device__ auto GetLaneId ()
static __device__ auto GetSubGroupId ()
static __device__ auto GetLaneIdUnderSubGroup ()
static __device__ auto GetSwizzledLaneIdLow ()
__host__ static __device__ auto CalculateAThreadOriginDataIndex ()
__host__ static __device__ auto CalculateBThreadOriginDataIndex ()
static __device__ CIndex GetBeginOfThreadBlk ()
static __device__ CIndex3D GetBeginOfThreadBlk3D ()
__host__ static __device__ constexpr auto GetCMSubGroupNThreadPerSubGroupMAccVgprsThreadBlkLengths ()

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr auto I4 = Number<4>{}
static constexpr auto I5 = Number<5>{}
static constexpr auto wmma
static constexpr auto wmma_instr = wmma.selected_wmma

Member Typedef Documentation

◆ CIndex

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
using ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::CIndex = MultiIndex<2>

◆ CIndex3D

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
using ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::CIndex3D = MultiIndex<3>

Constructor & Destructor Documentation

◆ WmmaGemm()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__host__ __device__ constexpr ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::WmmaGemm ( )
inlineconstexpr

Member Function Documentation

◆ CalculateAThreadOriginDataIndex()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__host__ static __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::CalculateAThreadOriginDataIndex ( )
inlinestatic

◆ CalculateBThreadOriginDataIndex()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__host__ static __device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::CalculateBThreadOriginDataIndex ( )
inlinestatic

◆ GetBeginOfThreadBlk()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__device__ CIndex ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetBeginOfThreadBlk ( )
inlinestatic

◆ GetBeginOfThreadBlk3D()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__device__ CIndex3D ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetBeginOfThreadBlk3D ( )
inlinestatic

◆ GetCMSubGroupNThreadPerSubGroupMAccVgprsThreadBlkLengths()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__host__ static __device__ constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetCMSubGroupNThreadPerSubGroupMAccVgprsThreadBlkLengths ( )
inlinestaticconstexpr

◆ GetLaneId()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetLaneId ( )
inlinestatic

◆ GetLaneIdUnderSubGroup()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetLaneIdUnderSubGroup ( )
inlinestatic

◆ GetRegSizePerWmma()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__device__ constexpr index_t ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetRegSizePerWmma ( )
inlinestaticconstexpr

◆ GetSubGroupId()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetSubGroupId ( )
inlinestatic

◆ GetSwizzledLaneIdLow()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__device__ auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetSwizzledLaneIdLow ( )
inlinestatic

◆ GetWaveSize()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
__device__ constexpr index_t ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::GetWaveSize ( )
inlinestaticconstexpr

◆ MakeCDesc_MBlockxRepeat_MWave_MSubGroup_NBlockxRepeat_NWave_NThreadPerSubGroup_MAccVgprs()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
template<typename CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA>
__host__ static __device__ constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::MakeCDesc_MBlockxRepeat_MWave_MSubGroup_NBlockxRepeat_NWave_NThreadPerSubGroup_MAccVgprs ( const CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA & c_desc_mblockxrepeat_mwave_mperwmma_nblockxrepeat_nwave_nperwmma)
inlinestaticconstexpr

◆ MakeCDesc_MBlockxRepeat_MWave_MThreadPerSubGroup_NBlockxRepeat_NWave_NSubGroup_NAccVgprs()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
template<typename CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA>
__host__ static __device__ constexpr auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::MakeCDesc_MBlockxRepeat_MWave_MThreadPerSubGroup_NBlockxRepeat_NWave_NSubGroup_NAccVgprs ( const CDesc_MBlockxRepeat_MWave_MPerWMMA_NBlockxRepeat_NWave_NPerWMMA & c_desc_mblockxrepeat_mwave_mperwmma_nblockxrepeat_nwave_nperwmma)
inlinestaticconstexpr

◆ Run()

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
template<class FloatA, class FloatB, class FloatC>
__device__ void ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::Run ( const FloatA & p_a_wave,
const FloatB & p_b_wave,
FloatC & p_c_thread ) const
inline

Member Data Documentation

◆ I0

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I3 = Number<3>{}
staticconstexpr

◆ I4

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I4 = Number<4>{}
staticconstexpr

◆ I5

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::I5 = Number<5>{}
staticconstexpr

◆ wmma

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::wmma
staticconstexpr

◆ wmma_instr

template<typename src_type_a, typename src_type_b, typename dst_type, index_t MPerWmma, index_t NPerWmma, index_t KPack, bool TransposeC = false, bool AssemblyBackend = false>
auto ck::WmmaGemm< src_type_a, src_type_b, dst_type, MPerWmma, NPerWmma, KPack, TransposeC, AssemblyBackend >::wmma_instr = wmma.selected_wmma
staticconstexpr

The documentation for this struct was generated from the following file: