BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC > Struct Template Reference

BlockwiseGemmWMMA&lt; BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC &gt; Struct Template Reference#

Composable Kernel: ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC > Struct Template Reference
ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC > Struct Template Reference

#include <blockwise_gemm_wmma.hpp>

Classes

struct  AThreadCopySelector
struct  AThreadCopySelector< true >
struct  AThreadCopySelector< false >
struct  BThreadCopySelector
struct  BThreadCopySelector< true >
struct  BThreadCopySelector< false >

Public Types

using ThisThreadBlock = ThisThreadBlock<BlockSize>
using Tuple6 = decltype(CalculateAThreadOriginDataIndex())

Public Member Functions

__host__ __device__ constexpr auto & GetCThreadBuffer ()
__host__ __device__ BlockwiseGemmWMMA (Tuple6 a_origin=CalculateAThreadOriginDataIndex(), Tuple6 b_origin=CalculateBThreadOriginDataIndex())
template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
__device__ void Run (const ABlockBuffer &a_block_buf, const BBlockBuffer &b_block_buf, CThreadBuffer &c_thread_buf) const

Static Public Member Functions

static __device__ auto GetWaveIdx ()
static __device__ auto CalculateAThreadOriginDataIndex ()
static __device__ auto CalculateBThreadOriginDataIndex ()
template<index_t m0, index_t n0>
static __device__ auto CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >)
template<index_t m0, index_t n0>
static __device__ auto CalculateCThreadOriginDataIndex7D (Number< m0 >, Number< n0 >)
__host__ static __device__ constexpr auto GetCThreadDescriptor_MRepeat_MWave_MThreadPerSubGroup_NRepeat_NWave_NSubGroup_NAccVgprs ()
__host__ static __device__ constexpr auto GetCThreadDescriptor_MRepeat_MWave_MSubGroup_NRepeat_NWave_NThreadPerSubGroup_MAccVgprs ()
template<typename CGridDesc_M_N>
__host__ static __device__ constexpr auto MakeCGridDescriptor_MBlockxRepeat_MWave_MSubGroup_NBlockxRepeat_NWave_NThreadPerSubGroup_MAccVgprs (const CGridDesc_M_N &c_grid_desc_m_n)
__host__ static __device__ constexpr auto GetCBlockDescriptor_MRepeat_MWave_MThreadPerSubGroup_NRepeat_NWave_NSubGroup_NAccVgprs ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_MRepeat_MWave_MSubGroup_NRepeat_NWave_NThreadPerSubGroup_MAccVgprs ()

Public Attributes

StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, FloatAcc, MRepeat *NRepeat, wmma_gemm.GetRegSizePerWmma(), true > c_thread_buf_

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr auto I4 = Number<4>{}
static constexpr auto I5 = Number<5>{}
static constexpr auto WmmaK = Number<16>{}
static constexpr index_t WaveSize = 32
static constexpr index_t A_KRow = AEnableLds ? 1 : 2
static constexpr index_t B_KRow = BEnableLds ? 1 : 2
static constexpr index_t A_K1 = ABlockDesc{}.GetLength(I5)
static constexpr index_t B_K1 = BBlockDesc{}.GetLength(I5)
static constexpr auto wmma_gemm
static constexpr index_t MWaves = MPerBlock / (MRepeat * MPerWMMA)
static constexpr index_t NWaves = NPerBlock / (NRepeat * NPerWMMA)
static constexpr ABlockDesc a_block_desc_k0_m0_m1_m2_k1
static constexpr BBlockDesc b_block_desc_k0_n0_n1_n2_k1

Protected Attributes

AThreadCopySelector< AEnableLds >::type a_thread_copy_
BThreadCopySelector< BEnableLds >::type b_thread_copy_

Static Protected Attributes

static constexpr auto a_thread_desc_
static constexpr auto b_thread_desc_
static constexpr auto c_thread_desc_

Member Typedef Documentation

◆ ThisThreadBlock

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
using ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::ThisThreadBlock = ThisThreadBlock<BlockSize>

◆ Tuple6

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
using ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::Tuple6 = decltype(CalculateAThreadOriginDataIndex())

Constructor & Destructor Documentation

◆ BlockwiseGemmWMMA()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
__host__ __device__ ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::BlockwiseGemmWMMA ( Tuple6 a_origin = CalculateAThreadOriginDataIndex(),
Tuple6 b_origin = CalculateBThreadOriginDataIndex() )
inline

Member Function Documentation

◆ CalculateAThreadOriginDataIndex()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
__device__ auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::CalculateAThreadOriginDataIndex ( )
inlinestatic

◆ CalculateBThreadOriginDataIndex()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
__device__ auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::CalculateBThreadOriginDataIndex ( )
inlinestatic

◆ CalculateCThreadOriginDataIndex()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
template<index_t m0, index_t n0>
__device__ auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::CalculateCThreadOriginDataIndex ( Number< m0 > ,
Number< n0 >  )
inlinestatic

◆ CalculateCThreadOriginDataIndex7D()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
template<index_t m0, index_t n0>
__device__ auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::CalculateCThreadOriginDataIndex7D ( Number< m0 > ,
Number< n0 >  )
inlinestatic

◆ GetCBlockDescriptor_MRepeat_MWave_MSubGroup_NRepeat_NWave_NThreadPerSubGroup_MAccVgprs()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
__host__ static __device__ constexpr auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::GetCBlockDescriptor_MRepeat_MWave_MSubGroup_NRepeat_NWave_NThreadPerSubGroup_MAccVgprs ( )
inlinestaticconstexpr

◆ GetCBlockDescriptor_MRepeat_MWave_MThreadPerSubGroup_NRepeat_NWave_NSubGroup_NAccVgprs()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
__host__ static __device__ constexpr auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::GetCBlockDescriptor_MRepeat_MWave_MThreadPerSubGroup_NRepeat_NWave_NSubGroup_NAccVgprs ( )
inlinestaticconstexpr

◆ GetCThreadBuffer()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
__host__ __device__ constexpr auto & ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::GetCThreadBuffer ( )
inlineconstexpr

◆ GetCThreadDescriptor_MRepeat_MWave_MSubGroup_NRepeat_NWave_NThreadPerSubGroup_MAccVgprs()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
__host__ static __device__ constexpr auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::GetCThreadDescriptor_MRepeat_MWave_MSubGroup_NRepeat_NWave_NThreadPerSubGroup_MAccVgprs ( )
inlinestaticconstexpr

◆ GetCThreadDescriptor_MRepeat_MWave_MThreadPerSubGroup_NRepeat_NWave_NSubGroup_NAccVgprs()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
__host__ static __device__ constexpr auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::GetCThreadDescriptor_MRepeat_MWave_MThreadPerSubGroup_NRepeat_NWave_NSubGroup_NAccVgprs ( )
inlinestaticconstexpr

◆ GetWaveIdx()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
__device__ auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::GetWaveIdx ( )
inlinestatic

◆ MakeCGridDescriptor_MBlockxRepeat_MWave_MSubGroup_NBlockxRepeat_NWave_NThreadPerSubGroup_MAccVgprs()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
template<typename CGridDesc_M_N>
__host__ static __device__ constexpr auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::MakeCGridDescriptor_MBlockxRepeat_MWave_MSubGroup_NBlockxRepeat_NWave_NThreadPerSubGroup_MAccVgprs ( const CGridDesc_M_N & c_grid_desc_m_n)
inlinestaticconstexpr

◆ Run()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
__device__ void ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::Run ( const ABlockBuffer & a_block_buf,
const BBlockBuffer & b_block_buf,
CThreadBuffer & c_thread_buf ) const
inline

Member Data Documentation

◆ a_block_desc_k0_m0_m1_m2_k1

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
ABlockDesc ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::a_block_desc_k0_m0_m1_m2_k1
staticconstexpr

◆ A_K1

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
index_t ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::A_K1 = ABlockDesc{}.GetLength(I5)
staticconstexpr

◆ A_KRow

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
index_t ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::A_KRow = AEnableLds ? 1 : 2
staticconstexpr

◆ a_thread_copy_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
AThreadCopySelector<AEnableLds>::type ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::a_thread_copy_
protected

◆ a_thread_desc_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::a_thread_desc_
staticconstexprprotected
Initial value:
=
I1,
I1,
__host__ __device__ constexpr auto make_naive_tensor_descriptor(const Tuple< Lengths... > &lengths, const Tuple< Strides... > &strides)
Definition tensor_descriptor_helper.hpp:49
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
Definition blockwise_gemm_wmma.hpp:550
static constexpr auto I1
Definition blockwise_gemm_wmma.hpp:552

◆ b_block_desc_k0_n0_n1_n2_k1

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
BBlockDesc ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::b_block_desc_k0_n0_n1_n2_k1
staticconstexpr

◆ B_K1

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
index_t ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::B_K1 = BBlockDesc{}.GetLength(I5)
staticconstexpr

◆ B_KRow

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
index_t ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::B_KRow = BEnableLds ? 1 : 2
staticconstexpr

◆ b_thread_copy_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
BThreadCopySelector<BEnableLds>::type ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::b_thread_copy_
protected

◆ b_thread_desc_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::b_thread_desc_
staticconstexprprotected

◆ c_thread_buf_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
StaticBufferTupleOfVector<AddressSpaceEnum::Vgpr, FloatAcc, MRepeat * NRepeat, wmma_gemm.GetRegSizePerWmma(), true> ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::c_thread_buf_

◆ c_thread_desc_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::c_thread_desc_
staticconstexprprotected
Initial value:
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
static constexpr auto wmma_gemm
Definition blockwise_gemm_wmma.hpp:572

◆ I0

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::I3 = Number<3>{}
staticconstexpr

◆ I4

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::I4 = Number<4>{}
staticconstexpr

◆ I5

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::I5 = Number<5>{}
staticconstexpr

◆ MWaves

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
index_t ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::MWaves = MPerBlock / (MRepeat * MPerWMMA)
staticconstexpr

◆ NWaves

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
index_t ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::NWaves = NPerBlock / (NRepeat * NPerWMMA)
staticconstexpr

◆ WaveSize

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
index_t ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::WaveSize = 32
staticconstexpr

◆ wmma_gemm

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::wmma_gemm
staticconstexpr

◆ WmmaK

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename ABlockDesc, typename BBlockDesc, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWMMA, index_t NPerWMMA, index_t MRepeat, index_t NRepeat, index_t KPack, bool AEnableLds = true, bool BEnableLds = true, bool TransposeC = false>
auto ck::BlockwiseGemmWMMA< BlockSize, FloatA, FloatB, FloatAcc, ABlockDesc, BBlockDesc, MPerBlock, NPerBlock, KPerBlock, MPerWMMA, NPerWMMA, MRepeat, NRepeat, KPack, AEnableLds, BEnableLds, TransposeC >::WmmaK = Number<16>{}
staticconstexpr

The documentation for this struct was generated from the following file: