amd_transpose_load_encoding.hpp Source File

amd_transpose_load_encoding.hpp Source File#

Composable Kernel: amd_transpose_load_encoding.hpp Source File
amd_transpose_load_encoding.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
9
10namespace ck_tile {
11
12// this generate wave level tile distribution
13template <typename T, index_t LaneGroupSize = 16, typename = void>
15
16template <typename T, index_t LaneGroupSize>
17struct LaneGroupTransposeTraits<T, LaneGroupSize, std::enable_if_t<sizeof(T) == 2>>
18{
19 static_assert(LaneGroupSize == 16 || LaneGroupSize == 32 || LaneGroupSize == 64,
20 "LaneGroupSize must be 16, 32, or 64");
21 // before transpose, 4x16
22 static constexpr index_t ksecondDim = 4;
23 static constexpr index_t kleadDim = LaneGroupSize;
24 // after transpose, 16x4
25 static constexpr index_t ksecondDimT = LaneGroupSize;
26 static constexpr index_t kleadDimT = 4;
27 template <index_t kOuterDistDim0,
28 index_t kOuterDistDim1,
29 index_t kInnerDistDim0,
30 index_t kInnerDistDim1>
34 sequence<kInnerDistDim0, kInnerDistDim1, LaneGroupSize / 16, 4, 4>>,
39};
40
41template <typename T, index_t LaneGroupSize>
42struct LaneGroupTransposeTraits<T, LaneGroupSize, std::enable_if_t<sizeof(T) == 1>>
43{
44 static constexpr index_t ksecondDim = 8;
45 static constexpr index_t kleadDim = LaneGroupSize;
46
47 static constexpr index_t ksecondDimT = LaneGroupSize;
48 static constexpr index_t kleadDimT = 8;
49
50 template <index_t kOuterDistDim0,
51 index_t kOuterDistDim1,
52 index_t kInnerDistDim0,
53 index_t kInnerDistDim1>
57 sequence<kInnerDistDim0, kInnerDistDim1, LaneGroupSize / 16, 2, 8>>,
62};
63
64/*
65 * @brief This function is used to generate the transposed distribution encoding
66 * for the given data type and distribution dimensions.
67 *
68 * @tparam T The data type of the elements in the tensor.
69 * @tparam kOuterDistDim0 The outer distribution dimension 0, which is outer dimension for stride.
70 * @tparam kOuterDistDim1 The outer distribution dimension 1, which is inner dimension for stride.
71 * @tparam kInnerDistDim0 The inner distribution dimension 0, which is outer dimension for
72 * consecutive.
73 * @tparam kInnerDistDim1 The inner distribution dimension 1, which is inner dimension for
74 * consecutive.
75 */
76template <typename T,
77 index_t LaneGroupSize,
78 index_t kOuterDistDim0,
79 index_t kOuterDistDim1,
80 index_t kInnerDistDim0,
81 index_t kInnerDistDim1>
83{
85 template TileDistribution<kOuterDistDim0, kOuterDistDim1, kInnerDistDim0, kInnerDistDim1>{};
86}
87
88} // namespace ck_tile
#define CK_TILE_DEVICE
Definition config.hpp:41
Definition tile/core/algorithm/cluster_descriptor.hpp:13
CK_TILE_DEVICE constexpr auto make_transposed_distr_encode()
Definition amd_transpose_load_encoding.hpp:82
int32_t index_t
Definition integer.hpp:9
STL namespace.
static constexpr index_t ksecondDimT
Definition amd_transpose_load_encoding.hpp:25
static constexpr index_t kleadDimT
Definition amd_transpose_load_encoding.hpp:26
tile_distribution_encoding< sequence<>, tuple< sequence< kOuterDistDim0, kOuterDistDim1, 4 >, sequence< kInnerDistDim0, kInnerDistDim1, LaneGroupSize/16, 4, 4 > >, tuple< sequence< 1, 2, 2, 1, 2 > >, tuple< sequence< 0, 0, 2, 2, 3 > >, sequence< 2, 1, 2 >, sequence< 1, 1, 4 > > TileDistribution
Definition amd_transpose_load_encoding.hpp:31
static constexpr index_t kleadDim
Definition amd_transpose_load_encoding.hpp:23
static constexpr index_t ksecondDim
Definition amd_transpose_load_encoding.hpp:22
tile_distribution_encoding< sequence<>, tuple< sequence< kOuterDistDim0, kOuterDistDim1, 8 >, sequence< kInnerDistDim0, kInnerDistDim1, LaneGroupSize/16, 2, 8 > >, tuple< sequence< 1, 2, 2, 1, 2 > >, tuple< sequence< 0, 0, 2, 2, 3 > >, sequence< 2, 1, 2 >, sequence< 1, 1, 4 > > TileDistribution
Definition amd_transpose_load_encoding.hpp:54
static constexpr index_t ksecondDimT
Definition amd_transpose_load_encoding.hpp:47
static constexpr index_t ksecondDim
Definition amd_transpose_load_encoding.hpp:44
static constexpr index_t kleadDimT
Definition amd_transpose_load_encoding.hpp:48
static constexpr index_t kleadDim
Definition amd_transpose_load_encoding.hpp:45
Definition amd_transpose_load_encoding.hpp:14
Definition tile_distribution_encoding.hpp:26
Definition tile/core/container/tuple.hpp:192