DeviceGroupedGemmSplitK< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation > Struct Template Reference

DeviceGroupedGemmSplitK&lt; ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation &gt; Struct Template Reference#

Composable Kernel: ck::tensor_operation::device::DeviceGroupedGemmSplitK< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation > Struct Template Reference
ck::tensor_operation::device::DeviceGroupedGemmSplitK< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation > Struct Template Referenceabstract

#include <device_grouped_gemm_splitk.hpp>

Inheritance diagram for ck::tensor_operation::device::DeviceGroupedGemmSplitK< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation >:
ck::tensor_operation::device::DeviceGroupedGemm< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation > ck::tensor_operation::device::BaseOperator ck::tensor_operation::device::DeviceGroupedGemmFixedNK< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation >

Public Member Functions

virtual void SetKBatchSize (BaseArgument *p_arg, index_t kbatch) const =0
 Sets the k batch size.
virtual void SetKBatch (BaseArgument *p_arg, index_t kbatch) const
 Sets the k batch size.
Public Member Functions inherited from ck::tensor_operation::device::DeviceGroupedGemm< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation >
virtual std::unique_ptr< BaseArgumentMakeArgumentPointer (std::vector< const void * > &p_a, std::vector< const void * > &p_b, std::vector< std::array< const void *, NumDTensor > > &p_ds, std::vector< void * > &p_e, std::vector< GemmDesc > &gemm_desc, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op)=0
virtual std::unique_ptr< BaseInvokerMakeInvokerPointer ()=0
virtual void SetDeviceKernelArgs (BaseArgument *p_arg, void *p_dev_kernel_args, const void *p_host_kernel_args) const
 Sets the device kernel arguments pointer and may copy data to device.
virtual void SetDeviceKernelArgs (BaseArgument *p_arg, void *p_dev_kernel_args) const
 Sets the device kernel arguments pointer and may copy data to device.
virtual size_t GetDeviceKernelArgSize (const BaseArgument *p_arg) const
 Gets the device kernel argument size.
Public Member Functions inherited from ck::tensor_operation::device::BaseOperator
 BaseOperator ()=default
 BaseOperator (const BaseOperator &)=default
BaseOperatoroperator= (const BaseOperator &)=default
virtual bool IsSupportedArgument (const BaseArgument *)
virtual std::string GetTypeString () const
virtual std::string GetInstanceString () const
virtual std::string GetTypeIdName () const
virtual std::optional< std::string > GetObjectName () const
virtual std::optional< std::string > GetTemplateInfo () const
virtual std::string GetTypeIdHashCode () const
virtual size_t GetWorkSpaceSize (const BaseArgument *) const
virtual void SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const
virtual ~BaseOperator ()

Additional Inherited Members

Static Public Attributes inherited from ck::tensor_operation::device::DeviceGroupedGemm< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation >
static constexpr index_t NumDTensor = DsDataType::Size()

Member Function Documentation

◆ SetKBatch()

template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation>
virtual void ck::tensor_operation::device::DeviceGroupedGemmSplitK< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation >::SetKBatch ( BaseArgument * p_arg,
index_t kbatch ) const
inlinevirtual

◆ SetKBatchSize()

template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation>
virtual void ck::tensor_operation::device::DeviceGroupedGemmSplitK< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation >::SetKBatchSize ( BaseArgument * p_arg,
index_t kbatch ) const
pure virtual

Sets the k batch size.

Parameters
p_argPointer to the Argument we're going to change.
[in]kbatchThe kbatch value.

Implemented in ck::tensor_operation::device::DeviceGroupedGemm_Xdl_Fixed_NK< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, NumPrefetch, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, PipelineVer, LoopSched, ComputeType, ALDSType, BLDSType >, ck::tensor_operation::device::DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVer, LoopSched, ComputeDataType, >, and ck::tensor_operation::device::DeviceGroupedGemmXdlSplitKCShuffle< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, PipelineVer, LoopSched, >.


The documentation for this struct was generated from the following file: