GridwiseBatchNormBackwardWithBlockwiseWelford< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize > Struct Template Reference#
ck::GridwiseBatchNormBackwardWithBlockwiseWelford< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize > Struct Template Reference
#include <gridwise_batchnorm_backward_blockwise_welford.hpp>
Public Types | |
| using | ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize> |
| using | ThreadBufferDimAccessOrder |
| using | ThreadClusterArrangeOrder |
| using | ThreadReduceSrcDesc_M_K |
| using | ThreadReduceDstDesc_M |
| using | ThreadwiseWelford |
| using | BlockwiseWelford |
| using | BlockwiseReduce |
| using | ThreadwiseReduce |
| using | PassThroughOp = tensor_operation::element_wise::PassThrough |
Static Public Member Functions | |
| static __device__ void | Run (const XYGridDesc_M_K x_grid_desc_m_k, const XYGridDesc_M_K dy_grid_desc_m_k, const XYGridDesc_M_K dx_grid_desc_m_k, const ScaleBiasGridDesc_M scale_grid_desc_m, const ScaleBiasGridDesc_M dscale_dbias_grid_desc_m, const MeanVarGridDesc_M mean_var_grid_desc_m, const GetReduceCountPerThreadFunctor get_reduce_count_per_thread, long_index_t reduce_size, index_t num_k_block_tile_iteration, AccDataType epsilon, const XDataType *const __restrict__ p_x, const DyDataType *const __restrict__ p_dy, const ScaleDataType *const __restrict__ p_scale, bool haveSavedMeanInvVar, const MeanVarDataType *const __restrict__ p_savedMean, const MeanVarDataType *const __restrict__ p_savedInvVar, const DyElementwiseOp dy_elementwise_op, DxDataType *const __restrict__ p_dx, DscaleDbiasDataType *const __restrict__ p_dscale, DscaleDbiasDataType *const __restrict__ p_dbias) |
Static Public Attributes | |
| static constexpr bool | reorder_thread_cluster = (XDyDxVectorDim == 0) |
| static constexpr auto | thread_cluster_desc |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr index_t | M_BlockTileSize = MThreadClusterSize * MThreadSliceSize |
| static constexpr index_t | K_BlockTileSize = KThreadClusterSize * KThreadSliceSize |
Member Typedef Documentation
◆ BlockwiseReduce
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
| using ck::GridwiseBatchNormBackwardWithBlockwiseWelford< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::BlockwiseReduce |
Initial value:
PartitionedBlockwiseReduction<AccDataType,
BlockSize,
false>
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp:110
typename conditional< reorder_thread_cluster, Sequence< 1, 0 >, Sequence< 0, 1 > >::type ThreadClusterArrangeOrder
Definition gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp:115
Definition reduction_functions_blockwise.hpp:28
Definition reduction_operator.hpp:37
◆ BlockwiseWelford
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
| using ck::GridwiseBatchNormBackwardWithBlockwiseWelford< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::BlockwiseWelford |
Initial value:
BlockwiseWelford<AccDataType,
BlockSize,
BlockwiseWelford< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, false > BlockwiseWelford
Definition gridwise_multiblock_welford_first_half.hpp:82
typename conditional< reorder_thread_cluster, Sequence< 1, 0 >, Sequence< 0, 1 > >::type ThreadClusterArrangeOrder
Definition gridwise_multiblock_welford_first_half.hpp:68
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_multiblock_welford_first_half.hpp:63
◆ PassThroughOp
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
| using ck::GridwiseBatchNormBackwardWithBlockwiseWelford< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::PassThroughOp = tensor_operation::element_wise::PassThrough |
◆ ThreadBufferDimAccessOrder
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
| using ck::GridwiseBatchNormBackwardWithBlockwiseWelford< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::ThreadBufferDimAccessOrder |
Initial value:
Definition utility/sequence.hpp:43
Definition utility/functional.hpp:100
◆ ThreadClusterArrangeOrder
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
| using ck::GridwiseBatchNormBackwardWithBlockwiseWelford< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::ThreadClusterArrangeOrder |
Initial value:
◆ ThreadClusterLengths_M_K
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
| using ck::GridwiseBatchNormBackwardWithBlockwiseWelford< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize> |
◆ ThreadReduceDstDesc_M
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
| using ck::GridwiseBatchNormBackwardWithBlockwiseWelford< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::ThreadReduceDstDesc_M |
Initial value:
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
◆ ThreadReduceSrcDesc_M_K
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
| using ck::GridwiseBatchNormBackwardWithBlockwiseWelford< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::ThreadReduceSrcDesc_M_K |
Initial value:
◆ ThreadwiseReduce
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
| using ck::GridwiseBatchNormBackwardWithBlockwiseWelford< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::ThreadwiseReduce |
Initial value:
ThreadwiseReduction<AccDataType,
false>
decltype(make_naive_tensor_descriptor_packed(make_tuple(Number< MThreadSliceSize >{}))) ThreadReduceDstDesc_M
Definition gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp:123
decltype(make_naive_tensor_descriptor_packed( make_tuple(Number< MThreadSliceSize >{}, Number< KThreadSliceSize >{}))) ThreadReduceSrcDesc_M_K
Definition gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp:116
Definition reduction_functions_threadwise.hpp:23
◆ ThreadwiseWelford
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
| using ck::GridwiseBatchNormBackwardWithBlockwiseWelford< XDataType, DyDataType, DxDataType, AccDataType, ScaleDataType, DscaleDbiasDataType, MeanVarDataType, DyElementwiseOp, XYGridDesc_M_K, ScaleBiasGridDesc_M, MeanVarGridDesc_M, GetReduceCountPerThreadFunctor, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XDyDxVectorDim, XSrcVectorSize, DySrcVectorSize, DxDstVectorSize, ScaleSrcVectorSize, DscaleDbiasDstVectorSize, MeanVarSrcVectorSize >::ThreadwiseWelford |
Initial value:
ThreadwiseWelford< AccDataType, ThreadReduceSrcDesc_M_K, ThreadReduceDstDesc_M > ThreadwiseWelford
Definition gridwise_multiblock_welford_first_half.hpp:79
Member Function Documentation
◆ Run()
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
|
inlinestatic |
Member Data Documentation
◆ I0
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
|
staticconstexpr |
◆ I1
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
|
staticconstexpr |
◆ K_BlockTileSize
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
|
staticconstexpr |
◆ M_BlockTileSize
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
|
staticconstexpr |
◆ reorder_thread_cluster
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
|
staticconstexpr |
◆ thread_cluster_desc
template<typename XDataType, typename DyDataType, typename DxDataType, typename AccDataType, typename ScaleDataType, typename DscaleDbiasDataType, typename MeanVarDataType, typename DyElementwiseOp, typename XYGridDesc_M_K, typename ScaleBiasGridDesc_M, typename MeanVarGridDesc_M, typename GetReduceCountPerThreadFunctor, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XDyDxVectorDim, index_t XSrcVectorSize, index_t DySrcVectorSize, index_t DxDstVectorSize, index_t ScaleSrcVectorSize, index_t DscaleDbiasDstVectorSize, index_t MeanVarSrcVectorSize>
|
staticconstexpr |
Initial value:
=
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_batchnorm_backward_blockwise_welford.hpp:111
typename conditional< reorder_thread_cluster, Sequence< 1, 0 >, Sequence< 0, 1 > >::type ThreadClusterArrangeOrder
Definition gridwise_batchnorm_backward_blockwise_welford.hpp:116
The documentation for this struct was generated from the following file: