DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector > Struct Template Reference

DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1&lt; NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector &gt; Struct Template Reference#

Composable Kernel: ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector > Struct Template Reference
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector > Struct Template Reference

#include <device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp>

Inheritance diagram for ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >:
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ADataType, ADataType > ck::tensor_operation::device::BaseOperator

Classes

struct  GemmArgs
struct  Argument
struct  Invoker

Public Types

using DeviceOp = DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1
using ABDataType = ADataType
using ALayoutAfterTranspose
using BLayoutAfterTranspose
using ELayoutAfterTranspose
using ConvToGemmBwdDataTransform
template<index_t NXdlPerWave_>
using GridwiseGemmBase = GridwiseGemmMultipleD_xdl_cshuffle<GridwiseGemmMultiDTemplateParams>
template<index_t NXdlPerWave_>
using GridwiseGemmCTransposeBase
using GridwiseGemm64 = GridwiseGemmBase<math::max(NXdlPerWave64, 1)>
using GridwiseGemm32 = GridwiseGemmBase<NXdlPerWave32>
using GridwiseGemmCTranspose64
using GridwiseGemmCTranspose32
using ABDsEGridDesc = decltype(GetDummyABDsEGridDescriptor(dummy_conv_to_gemm_transform))
using AGridDesc_AK0_M_AK1 = remove_cvref_t<tuple_element_t<0, ABDsEGridDesc>>
using BGridDesc_BK0_N_BK1 = remove_cvref_t<tuple_element_t<1, ABDsEGridDesc>>
using DsGridDesc_M_N = remove_cvref_t<tuple_element_t<2, ABDsEGridDesc>>
using EGridDesc_M_N = remove_cvref_t<tuple_element_t<3, ABDsEGridDesc>>
using AGridDesc_M_K = decltype(transform_k0_m_k1_to_m_k(AGridDesc_AK0_M_AK1{}))
using BGridDesc_N_K = decltype(transform_k0_m_k1_to_m_k(BGridDesc_BK0_N_BK1{}))
using DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
using EGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
using Block2ETileMap
using GroupedGemmBlock2ETileMap = OffsettedBlockToCTileMap<Block2ETileMap>
using Block2TileMapInOutElementwise = BlockToCTileMap_M00_N0_M01Adapt<NPerBlock, MPerBlock>
using Block2TileMapWeiElementwise = BlockToCTileMap_M00_N0_M01Adapt<MPerBlock, NPerBlock>
using NGCHWTransposeDescType
using NHWGCTransposeDescType
using GKCYXTransposeDescType
using GKYXCTransposeDescType
using GridwiseElementwiseInputTranspose
using GridwiseElementwiseWeightTranspose
using GridwiseElementwiseOutputTranspose

Public Member Functions

bool IsSupportedArgument (const BaseArgument *p_arg) override
std::unique_ptr< BaseArgumentMakeArgumentPointer (const void *p_a, const void *p_b, const std::array< const void *, NumDTensor > &p_ds, void *p_e, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_strides, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_c_wis_lengths, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &e_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_n_c_wis_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads, const AElementwiseOp &a_element_op, const BElementwiseOp &b_element_op, const CDEElementwiseOp &cde_element_op, const ck::index_t split_k=1) override
std::unique_ptr< BaseInvokerMakeInvokerPointer () override
std::string GetTypeString () const override
size_t GetWorkSpaceSize (const BaseArgument *p_arg) const override
void SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const override
Public Member Functions inherited from ck::tensor_operation::device::BaseOperator
 BaseOperator ()=default
 BaseOperator (const BaseOperator &)=default
BaseOperatoroperator= (const BaseOperator &)=default
virtual std::string GetInstanceString () const
virtual std::string GetTypeIdName () const
virtual std::optional< std::string > GetObjectName () const
virtual std::optional< std::string > GetTemplateInfo () const
virtual std::string GetTypeIdHashCode () const
virtual ~BaseOperator ()

Static Public Member Functions

static auto GetDummyABDsEGridDescriptor (const ConvToGemmBwdDataTransform &conv_to_gemm_transform)
template<typename EGridDesc_M_N>
static auto MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock (const EGridDesc_M_N e_grid_desc_m_n)
template<typename Desc_K0_M_K1>
static auto transform_k0_m_k1_to_m_k (const Desc_K0_M_K1 &desc_k0_m_k1)
static bool IsSupportedArgument (const Argument &arg)
static auto MakeArgument (const void *p_a, const void *p_b, const std::array< const void *, NumDTensor > &p_ds, void *p_e, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_strides, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_c_wis_lengths, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &e_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_n_c_wis_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads, const AElementwiseOp &a_element_op, const BElementwiseOp &b_element_op, const CDEElementwiseOp &cde_element_op, const ck::index_t split_k=1)
static auto MakeInvoker ()

Static Public Attributes

static constexpr index_t MaxGroupedGemmGroupsNum
static GET_NXDL_PER_WAVE_IMPL constexpr auto NXdlPerWave64 = GetNXdlPerWave<true>()
static constexpr auto NXdlPerWave32 = GetNXdlPerWave<false>()
static constexpr index_t NumDTensor = DsDataType::Size()
static constexpr GemmSpecialization GemmSpec = GemmSpecialization::MNKPadding
static constexpr bool IsSplitKSupported
static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr bool isATensorColMajor
static constexpr bool NeedTransposeKernel
static constexpr bool CTranspose
static constexpr ConvToGemmBwdDataTransform dummy_conv_to_gemm_transform
static constexpr index_t ClusterLengthMPerBlock
static constexpr index_t ClusterLengthNPerBlock
static constexpr auto conv_ngchw_to_nhwgc_transformer
static constexpr index_t TransposeTransferInScalarPerVectorAligned
static constexpr index_t TransposeTransferOutScalarPerVectorAligned
static constexpr index_t ElementwiseBlocksize = ClusterLengthMPerBlock * ClusterLengthNPerBlock
Static Public Attributes inherited from ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ADataType, ADataType >
static constexpr index_t NumDTensor

Member Typedef Documentation

◆ ABDataType

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::ABDataType = ADataType

◆ ABDsEGridDesc

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::ABDsEGridDesc = decltype(GetDummyABDsEGridDescriptor(dummy_conv_to_gemm_transform))

◆ AGridDesc_AK0_M_AK1

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::AGridDesc_AK0_M_AK1 = remove_cvref_t<tuple_element_t<0, ABDsEGridDesc>>

◆ AGridDesc_M_K

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::AGridDesc_M_K = decltype(transform_k0_m_k1_to_m_k(AGridDesc_AK0_M_AK1{}))

◆ ALayoutAfterTranspose

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::ALayoutAfterTranspose
Initial value:
std::conditional_t<
std::conditional_t<is_NGCDHW_NGKDHW<ELayout, BLayout, ALayout>() && NeedTransposeKernel,
ALayout>>
constexpr bool is_NGCHW_NGKHW()
Definition device_grouped_conv_utils.hpp:72
Definition tensor_operation/gpu/device/tensor_layout.hpp:362
Definition tensor_operation/gpu/device/tensor_layout.hpp:357
static constexpr bool NeedTransposeKernel
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:347

◆ BGridDesc_BK0_N_BK1

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::BGridDesc_BK0_N_BK1 = remove_cvref_t<tuple_element_t<1, ABDsEGridDesc>>

◆ BGridDesc_N_K

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::BGridDesc_N_K = decltype(transform_k0_m_k1_to_m_k(BGridDesc_BK0_N_BK1{}))

◆ BLayoutAfterTranspose

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::BLayoutAfterTranspose
Initial value:
std::conditional_t<
std::conditional_t<is_NGCDHW_GKCZYX_NGKDHW<ELayout, BLayout, ALayout>() &&
BLayout>>
constexpr bool is_NGCHW_GKCYX_NGKHW()
Definition device_grouped_conv_utils.hpp:64
Definition tensor_operation/gpu/device/tensor_layout.hpp:238
Definition tensor_operation/gpu/device/tensor_layout.hpp:243

◆ Block2ETileMap

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::Block2ETileMap
Initial value:
decltype(GridwiseGemmCTranspose64::MakeDefaultBlock2ETileMap(EGridDesc_M_N{}))
remove_cvref_t< decltype(MakeEGridDescriptor_M_N< ELayout >(dummy_conv_to_gemm_transformer))> EGridDesc_M_N
Definition codegen_device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp:441

◆ Block2TileMapInOutElementwise

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::Block2TileMapInOutElementwise = BlockToCTileMap_M00_N0_M01Adapt<NPerBlock, MPerBlock>

◆ Block2TileMapWeiElementwise

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::Block2TileMapWeiElementwise = BlockToCTileMap_M00_N0_M01Adapt<MPerBlock, NPerBlock>

◆ ConvToGemmBwdDataTransform

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::ConvToGemmBwdDataTransform
Initial value:
ConvBackwardDataSpecialization,
AK1,
BK1,
MPerBlock,
NPerBlock,
DoPadGemmM,
DoPadGemmN,
true,
EDataType,
1,
int32_t index_t
Definition ck.hpp:299
Definition transform_conv_bwd_data_to_gemm_v1.hpp:44
static constexpr index_t KPerBlock
Definition device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp:108
ADataType ABDataType
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:333
std::conditional_t< is_NGCHW_NGKHW< ELayout, BLayout, ALayout >() &&NeedTransposeKernel, tensor_layout::convolution::NHWGC, std::conditional_t< is_NGCDHW_NGKDHW< ELayout, BLayout, ALayout >() &&NeedTransposeKernel, tensor_layout::convolution::NDHWGC, ELayout > > ELayoutAfterTranspose
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:368
std::conditional_t< is_NGCHW_NGKHW< ELayout, BLayout, ALayout >() &&NeedTransposeKernel, tensor_layout::convolution::NHWGK, std::conditional_t< is_NGCDHW_NGKDHW< ELayout, BLayout, ALayout >() &&NeedTransposeKernel, tensor_layout::convolution::NDHWGK, ALayout > > ALayoutAfterTranspose
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:355
std::conditional_t< is_NGCHW_GKCYX_NGKHW< ELayout, BLayout, ALayout >() &&NeedTransposeKernel, tensor_layout::convolution::GKYXC, std::conditional_t< is_NGCDHW_GKCZYX_NGKDHW< ELayout, BLayout, ALayout >() && NeedTransposeKernel, tensor_layout::convolution::GKZYXC, BLayout > > BLayoutAfterTranspose
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:361
static constexpr bool CTranspose
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:351

◆ DeviceOp

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::DeviceOp = DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1

◆ DsGridDesc_M_N

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::DsGridDesc_M_N = remove_cvref_t<tuple_element_t<2, ABDsEGridDesc>>

◆ DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
Initial value:
decltype(GridwiseGemmCTranspose64::MakeDsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(
remove_cvref_t< decltype(MakeDsGridDescriptor_M_N({}, {}, {}))> DsGridDesc_M_N
Definition gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp:348

◆ EGridDesc_M_N

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::EGridDesc_M_N = remove_cvref_t<tuple_element_t<3, ABDsEGridDesc>>

◆ EGridDesc_MBlock_MPerBlock_NBlock_NPerBlock

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::EGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
Initial value:
decltype(MakeEGridDescriptor_M_N(1, 1, 1)) EGridDesc_M_N
Definition gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp:349
static auto MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(const EGridDesc_M_N e_grid_desc_m_n)
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:489

◆ ELayoutAfterTranspose

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::ELayoutAfterTranspose
Initial value:
std::conditional_t<
std::conditional_t<is_NGCDHW_NGKDHW<ELayout, BLayout, ALayout>() && NeedTransposeKernel,
ELayout>>
Definition tensor_operation/gpu/device/tensor_layout.hpp:135
Definition tensor_operation/gpu/device/tensor_layout.hpp:130

◆ GKCYXTransposeDescType

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GKCYXTransposeDescType
Initial value:
.template MakeGKCYXTransposeDesc<NDimSpatial>({}, {}))>
remove_cv_t< remove_reference_t< T > > remove_cvref_t
Definition type.hpp:297
static constexpr auto conv_ngchw_to_nhwgc_transformer
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:583

◆ GKYXCTransposeDescType

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GKYXCTransposeDescType
Initial value:
.template MakeGKYXCTransposeDesc<NDimSpatial>({}, {}))>

◆ GridwiseElementwiseInputTranspose

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GridwiseElementwiseInputTranspose
Initial value:
NPerBlock,
MPerBlock,
I1,
I0>
Definition gridwise_elementwise_2d.hpp:278
Definition utility/sequence.hpp:43
Definition utility/tuple.hpp:117
static constexpr index_t ElementwiseBlocksize
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:609
static constexpr index_t ClusterLengthNPerBlock
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:580
static constexpr auto I1
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:336
static constexpr auto I0
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:335
static constexpr index_t ClusterLengthMPerBlock
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:578
BlockToCTileMap_M00_N0_M01Adapt< NPerBlock, MPerBlock > Block2TileMapInOutElementwise
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:575
Definition tensor_operation/gpu/element/unary_element_wise_operation.hpp:340

◆ GridwiseElementwiseOutputTranspose

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GridwiseElementwiseOutputTranspose

◆ GridwiseElementwiseWeightTranspose

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GridwiseElementwiseWeightTranspose

◆ GridwiseGemm32

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GridwiseGemm32 = GridwiseGemmBase<NXdlPerWave32>

◆ GridwiseGemm64

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GridwiseGemm64 = GridwiseGemmBase<math::max(NXdlPerWave64, 1)>

◆ GridwiseGemmBase

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
template<index_t NXdlPerWave_>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GridwiseGemmBase = GridwiseGemmMultipleD_xdl_cshuffle<GridwiseGemmMultiDTemplateParams>

◆ GridwiseGemmCTranspose32

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GridwiseGemmCTranspose32
Initial value:
std::conditional_t<CTranspose, GridwiseGemmCTransposeBase<NXdlPerWave32>, GridwiseGemm32>
GridwiseGemmBase< NXdlPerWave32 > GridwiseGemm32
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:478

◆ GridwiseGemmCTranspose64

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GridwiseGemmCTranspose64
Initial value:
std::conditional_t<CTranspose,
__host__ __device__ constexpr T max(T x)
Definition utility/math.hpp:84
GridwiseGemmMultipleD_xdl_cshuffle< GridwiseGemmCTransposeTemplateParameters > GridwiseGemmCTransposeBase
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:475
GridwiseGemmBase< math::max(NXdlPerWave64, 1)> GridwiseGemm64
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:477
static GET_NXDL_PER_WAVE_IMPL constexpr auto NXdlPerWave64
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:323

◆ GridwiseGemmCTransposeBase

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
template<index_t NXdlPerWave_>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GridwiseGemmCTransposeBase

◆ GroupedGemmBlock2ETileMap

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GroupedGemmBlock2ETileMap = OffsettedBlockToCTileMap<Block2ETileMap>

◆ NGCHWTransposeDescType

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::NGCHWTransposeDescType
Initial value:
.template MakeNGCHWTransposeDesc<NDimSpatial>({}, {}))>

◆ NHWGCTransposeDescType

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
using ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::NHWGCTransposeDescType
Initial value:
.template MakeNHWGCTransposeDesc<NDimSpatial>({}, {}))>

Member Function Documentation

◆ GetDummyABDsEGridDescriptor()

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
auto ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GetDummyABDsEGridDescriptor ( const ConvToGemmBwdDataTransform & conv_to_gemm_transform)
inlinestatic

◆ GetTypeString()

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
std::string ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GetTypeString ( ) const
inlineoverridevirtual

◆ GetWorkSpaceSize()

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
size_t ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GetWorkSpaceSize ( const BaseArgument * p_arg) const
inlineoverridevirtual

◆ IsSupportedArgument() [1/2]

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
bool ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::IsSupportedArgument ( const Argument & arg)
inlinestatic

◆ IsSupportedArgument() [2/2]

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
bool ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::IsSupportedArgument ( const BaseArgument * p_arg)
inlineoverridevirtual

◆ MakeArgument()

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
auto ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::MakeArgument ( const void * p_a,
const void * p_b,
const std::array< const void *, NumDTensor > & p_ds,
void * p_e,
const std::array< index_t, NDimSpatial+3 > & a_g_n_k_wos_lengths,
const std::array< index_t, NDimSpatial+3 > & a_g_n_k_wos_strides,
const std::array< index_t, NDimSpatial+3 > & b_g_k_c_xs_lengths,
const std::array< index_t, NDimSpatial+3 > & b_g_k_c_xs_strides,
const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > & ds_g_n_c_wis_lengths,
const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > & ds_g_n_c_wis_strides,
const std::array< index_t, NDimSpatial+3 > & e_g_n_c_wis_lengths,
const std::array< index_t, NDimSpatial+3 > & e_g_n_c_wis_strides,
const std::array< index_t, NDimSpatial > & conv_filter_strides,
const std::array< index_t, NDimSpatial > & conv_filter_dilations,
const std::array< index_t, NDimSpatial > & input_left_pads,
const std::array< index_t, NDimSpatial > & input_right_pads,
const AElementwiseOp & a_element_op,
const BElementwiseOp & b_element_op,
const CDEElementwiseOp & cde_element_op,
const ck::index_t split_k = 1 )
inlinestatic

◆ MakeArgumentPointer()

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
std::unique_ptr< BaseArgument > ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::MakeArgumentPointer ( const void * p_a,
const void * p_b,
const std::array< const void *, NumDTensor > & p_ds,
void * p_e,
const std::array< index_t, NDimSpatial+3 > & a_g_n_k_wos_lengths,
const std::array< index_t, NDimSpatial+3 > & a_g_n_k_wos_strides,
const std::array< index_t, NDimSpatial+3 > & b_g_k_c_xs_lengths,
const std::array< index_t, NDimSpatial+3 > & b_g_k_c_xs_strides,
const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > & ds_g_n_c_wis_lengths,
const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > & ds_g_n_c_wis_strides,
const std::array< index_t, NDimSpatial+3 > & e_g_n_c_wis_lengths,
const std::array< index_t, NDimSpatial+3 > & e_g_n_c_wis_strides,
const std::array< index_t, NDimSpatial > & conv_filter_strides,
const std::array< index_t, NDimSpatial > & conv_filter_dilations,
const std::array< index_t, NDimSpatial > & input_left_pads,
const std::array< index_t, NDimSpatial > & input_right_pads,
const AElementwiseOp & a_element_op,
const BElementwiseOp & b_element_op,
const CDEElementwiseOp & cde_element_op,
const ck::index_t split_k = 1 )
inlineoverridevirtual

◆ MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock()

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
template<typename EGridDesc_M_N>
auto ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock ( const EGridDesc_M_N e_grid_desc_m_n)
inlinestatic

◆ MakeInvoker()

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
auto ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::MakeInvoker ( )
inlinestatic

◆ MakeInvokerPointer()

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
std::unique_ptr< BaseInvoker > ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::MakeInvokerPointer ( )
inlineoverridevirtual

◆ SetWorkSpacePointer()

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
void ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::SetWorkSpacePointer ( BaseArgument * p_arg,
void * p_workspace,
const StreamConfig & = StreamConfig{} ) const
inlineoverridevirtual

◆ transform_k0_m_k1_to_m_k()

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
template<typename Desc_K0_M_K1>
auto ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::transform_k0_m_k1_to_m_k ( const Desc_K0_M_K1 & desc_k0_m_k1)
inlinestatic

Member Data Documentation

◆ ClusterLengthMPerBlock

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
index_t ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::ClusterLengthMPerBlock
staticconstexpr
Initial value:
=
CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock::At(1)

◆ ClusterLengthNPerBlock

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
index_t ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::ClusterLengthNPerBlock
staticconstexpr
Initial value:
=
CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock::At(3)

◆ conv_ngchw_to_nhwgc_transformer

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
auto ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::conv_ngchw_to_nhwgc_transformer
staticconstexpr
Initial value:
=
BLayout,
ALayout,
NDimSpatial,
MPerBlock / ClusterLengthMPerBlock>{}
Definition transform_conv_ngchw_to_nhwgc.hpp:31

◆ CTranspose

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
bool ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::CTranspose
staticconstexpr

◆ dummy_conv_to_gemm_transform

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
ConvToGemmBwdDataTransform ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::dummy_conv_to_gemm_transform
staticconstexpr

◆ ElementwiseBlocksize

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
index_t ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::ElementwiseBlocksize = ClusterLengthMPerBlock * ClusterLengthNPerBlock
staticconstexpr

◆ GemmSpec

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
GemmSpecialization ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::GemmSpec = GemmSpecialization::MNKPadding
staticconstexpr

◆ I0

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
auto ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
auto ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
auto ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
auto ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::I3 = Number<3>{}
staticconstexpr

◆ isATensorColMajor

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
bool ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::isATensorColMajor
staticconstexpr
Initial value:
=
(ConvBackwardDataSpecialization ==
(ABlockTransferSrcVectorDim == 1) &&
constexpr bool is_NGCDHW_NGKDHW()
Definition device_grouped_conv_utils.hpp:112
@ Filter1x1Stride1Pad0
Definition convolution_backward_data_specialization.hpp:13

◆ IsSplitKSupported

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
bool ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::IsSplitKSupported
staticconstexpr
Initial value:
=
(CDEBlockTransferScalarPerVector_NPerBlock % 2 == 0 || sizeof(EDataType) % 4 == 0) &&

◆ MaxGroupedGemmGroupsNum

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
index_t ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::MaxGroupedGemmGroupsNum
staticconstexpr
Initial value:
=
ConvBackwardDataSpecialization ==
? 1
: 32

◆ NeedTransposeKernel

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
bool ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::NeedTransposeKernel
staticconstexpr
Initial value:
=
static constexpr bool isATensorColMajor
Definition device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp:340

◆ NumDTensor

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
index_t ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::NumDTensor = DsDataType::Size()
staticconstexpr

◆ NXdlPerWave32

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
auto ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::NXdlPerWave32 = GetNXdlPerWave<false>()
staticconstexpr

◆ NXdlPerWave64

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
GET_NXDL_PER_WAVE_IMPL constexpr auto ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::NXdlPerWave64 = GetNXdlPerWave<true>()
staticconstexpr

◆ TransposeTransferInScalarPerVectorAligned

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
index_t ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::TransposeTransferInScalarPerVectorAligned
staticconstexpr
Initial value:
=
std::min(MPerBlock / ClusterLengthMPerBlock, MaxTransposeTransferInScalarPerVector)

◆ TransposeTransferOutScalarPerVectorAligned

template<index_t NDimSpatial, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOp, typename BElementwiseOp, typename CDEElementwiseOp, ConvolutionBackwardDataSpecialization ConvBackwardDataSpecialization, bool DoPadGemmM, bool DoPadGemmN, index_t NumGemmKPrefetchStage, index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerXDL, index_t NPerXDL, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = make_default_loop_scheduler(), typename AComputeType = ADataType, typename BComputeType = AComputeType, index_t MaxTransposeTransferInScalarPerVector = 1, index_t MaxTransposeTransferOutScalarPerVector = 1>
index_t ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1< NDimSpatial, ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOp, BElementwiseOp, CDEElementwiseOp, ConvBackwardDataSpecialization, DoPadGemmM, DoPadGemmN, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched, AComputeType, BComputeType, MaxTransposeTransferInScalarPerVector, MaxTransposeTransferOutScalarPerVector >::TransposeTransferOutScalarPerVectorAligned
staticconstexpr
Initial value:
=
std::min(MPerBlock / ClusterLengthMPerBlock, MaxTransposeTransferOutScalarPerVector)

The documentation for this struct was generated from the following file: