Flatmm_32x512x128_1x4x1_16x16x32_FP16 Struct Reference

Flatmm_32x512x128_1x4x1_16x16x32_FP16 Struct Reference#

Composable Kernel: ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_FP16 Struct Reference
ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_FP16 Struct Reference

#include <flatmm_32x512x128_1x4x1_16x16x32.hpp>

Inheritance diagram for ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_FP16:
ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_Base

Public Types

using ADataType = fp16_t
using BDataType = fp16_t

Public Member Functions

template<typename ARes, typename ACoords, typename BRes, typename BCoords, bool Is2B = false>
CK_TILE_DEVICE auto operator() (const ARes &res_a, const ACoords &cached_coords_a, const BRes &res_b, const BCoords &cached_coords_b, CK_TILE_LDS_ADDR void *smem, index_t k, index_t tile_offset_a, index_t tile_offset_b, bool_constant< Is2B >={})

Additional Inherited Members

Static Public Member Functions inherited from ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_Base
static CK_TILE_DEVICE constexpr auto MakeCBlockDist ()
static CK_TILE_DEVICE constexpr auto MakeCBlockTile ()
static CK_TILE_HOST_DEVICE constexpr auto MakeLdsStoreDesc_A ()
static CK_TILE_HOST_DEVICE constexpr auto MakeLdsLoadDesc_A ()
static constexpr auto GetGemm_AWarpEnc ()
static CK_TILE_HOST_DEVICE constexpr ck_tile::index_t GetSmemSize ()
Static Public Attributes inherited from ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_Base
static constexpr index_t Block_M = 32
static constexpr index_t Block_N = 512
static constexpr index_t Block_K = 128
static constexpr index_t WarpPerBlock_M = 1
static constexpr index_t WarpPerBlock_N = 4
static constexpr index_t WarpPerBlock_K = 1
static constexpr index_t NumWarps = 4
static constexpr index_t Warp_M = 16
static constexpr index_t Warp_N = 16
static constexpr index_t Warp_K = 32
static constexpr index_t BlockSize = 256
static constexpr index_t SubKPacks = 2
static constexpr index_t Block_W = Warp_N * Warp_K
static constexpr index_t Block_Nr = Block_N / Warp_N
static constexpr index_t Block_Kr = Block_K / Warp_K
static constexpr index_t Repeat_M = Block_M / (Warp_M * WarpPerBlock_M)
static constexpr index_t Repeat_N = Block_N / (Warp_N * WarpPerBlock_N)
static constexpr index_t Repeat_K = Block_K / (Warp_K * WarpPerBlock_K)

Member Typedef Documentation

◆ ADataType

◆ BDataType

Member Function Documentation

◆ operator()()

template<typename ARes, typename ACoords, typename BRes, typename BCoords, bool Is2B = false>
CK_TILE_DEVICE auto ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_FP16::operator() ( const ARes & res_a,
const ACoords & cached_coords_a,
const BRes & res_b,
const BCoords & cached_coords_b,
CK_TILE_LDS_ADDR void * smem,
index_t k,
index_t tile_offset_a,
index_t tile_offset_b,
bool_constant< Is2B > = {} )
inline

The documentation for this struct was generated from the following file: