DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB > Struct Template Reference#
ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB > Struct Template Reference
#include <device_gemm_wmma_cshuffle_v3_b_scale.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >:
Public Types | |
| using | GridwiseGemm |
| using | Argument = typename GridwiseGemm::Argument |
| using | DeviceGemmCommon |
| using | Invoker = typename DeviceGemmCommon::Invoker |
Public Member Functions | |
| bool | IsSupportedArgument (const BaseArgument *p_arg) override |
| index_t | GetKPerBlock () override |
| bool | GetPermuteB () override |
| std::unique_ptr< BaseArgument > | MakeArgumentPointer (const void *p_a, const void *p_b, void *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, index_t StrideScaleB, const void *p_b_scale, index_t KBatch, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op) override |
| std::unique_ptr< BaseInvoker > | MakeInvokerPointer () override |
| std::string | GetTypeString () const override |
| Public Member Functions inherited from ck::tensor_operation::device::BaseOperator | |
| BaseOperator ()=default | |
| BaseOperator (const BaseOperator &)=default | |
| BaseOperator & | operator= (const BaseOperator &)=default |
| virtual std::string | GetInstanceString () const |
| virtual std::string | GetTypeIdName () const |
| virtual std::optional< std::string > | GetObjectName () const |
| virtual std::optional< std::string > | GetTemplateInfo () const |
| virtual std::string | GetTypeIdHashCode () const |
| virtual size_t | GetWorkSpaceSize (const BaseArgument *) const |
| virtual void | SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const |
| virtual | ~BaseOperator () |
Static Public Member Functions | |
| static bool | IsSupportedArgument (const Argument &arg) |
| static auto | MakeArgument (const ADataType *p_a, const BDataType *p_b, CDataType *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, index_t StrideScaleB, const BScaleDataType *p_b_scale, index_t KBatch, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation cde_element_op) |
| static auto | MakeInvoker () |
Member Typedef Documentation
◆ Argument
template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| using ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Argument = typename GridwiseGemm::Argument |
◆ DeviceGemmCommon
template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| using ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::DeviceGemmCommon |
Initial value:
CDataType,
MPerBlock,
NPerBlock,
KPerBlock,
BlockSize,
AK1,
BK1,
GemmSpec,
BlkGemmPipeSched,
BlkGemmPipelineVer,
ComputeTypeA,
ComputeTypeB>
Definition utility/sequence.hpp:43
Definition utility/tuple.hpp:186
Definition utility/tuple.hpp:117
Definition device_gemm_wmma_cshuffle_v3_common.hpp:43
GridwiseGemm_wmma_cshuffle_v3< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, false, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, false, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB > GridwiseGemm
Definition device_gemm_multiple_abd_wmma_cshuffle_v3.hpp:195
◆ GridwiseGemm
template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| using ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::GridwiseGemm |
◆ Invoker
template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| using ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Invoker = typename DeviceGemmCommon::Invoker |
Member Function Documentation
◆ GetKPerBlock()
template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
|
inlineoverridevirtual |
◆ GetPermuteB()
template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
|
inlineoverridevirtual |
◆ GetTypeString()
template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsSupportedArgument() [1/2]
template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
|
inlinestatic |
◆ IsSupportedArgument() [2/2]
template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ MakeArgument()
template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
|
inlinestatic |
◆ MakeArgumentPointer()
template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
|
inlineoverridevirtual |
◆ MakeInvoker()
template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
|
inlinestatic |
◆ MakeInvokerPointer()
template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
|
inlineoverridevirtual |
The documentation for this struct was generated from the following file: