MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy > Struct Template Reference

MXF4FlatmmPipelineAGmemBGmemCRegV1&lt; Problem, PipelinePolicy &gt; Struct Template Reference#

Composable Kernel: ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy > Struct Template Reference
ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy > Struct Template Reference

#include <mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp>

Inheritance diagram for ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >:
ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, MXF4FlatmmPipelineAgBgCrPolicy >

Public Types

using Underlying = FlatmmPipelineAGmemBGmemCRegV1<Problem, PipelinePolicy>
using ADataType = remove_cvref_t<typename Problem::ADataType>
using BDataType = remove_cvref_t<typename Problem::BDataType>
using CDataType = remove_cvref_t<typename Problem::CDataType>
using BlockGemmShape = remove_cvref_t<typename Problem::BlockGemmShape>
using ComputeType = ADataType
using ALayout = remove_cvref_t<typename Problem::ALayout>
using BLayout = remove_cvref_t<typename Problem::BLayout>
using CLayout = remove_cvref_t<typename Problem::CLayout>
using BlockFlatmm
using WG = remove_cvref_t<decltype(config.template at<0>())>
using BlockTile = remove_cvref_t<typename BlockGemmShape::BlockTile>
using BlockWarps = remove_cvref_t<typename BlockGemmShape::BlockWarps>
using WarpTile = remove_cvref_t<typename BlockGemmShape::WarpTile>
Public Types inherited from ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, MXF4FlatmmPipelineAgBgCrPolicy >
using ADataType
using BDataType
using CDataType
using BlockGemmShape
using ALayout
using BLayout
using CLayout
using BlockFlatmm
using WG
using BlockTile
using BlockWarps
using WarpTile

Public Member Functions

template<typename ADramBlockWindowTmp, typename AElementFunction, typename BFlatBlockWindowTmp, typename ScaleADramBlockWindowTmp, typename ScaleBDramBlockWindowTmp>
CK_TILE_HOST_DEVICE auto operator() (ADramBlockWindowTmp a_copy_dram_window, const AElementFunction &a_element_func, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const ScaleADramBlockWindowTmp &scale_a_window, const ScaleBDramBlockWindowTmp &scale_b_window, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename ScaleADramBlockWindowTmp, typename ScaleBDramBlockWindowTmp>
CK_TILE_DEVICE auto operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const ScaleADramBlockWindowTmp &scale_a_flat_window_tmp, const ScaleBDramBlockWindowTmp &scale_b_flat_window_tmp, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
Public Member Functions inherited from ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, MXF4FlatmmPipelineAgBgCrPolicy >
CK_TILE_HOST_DEVICE auto operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const

Static Public Member Functions

static constexpr index_t GetVectorSizeA ()
static constexpr index_t GetVectorSizeB ()
static constexpr index_t GetVectorSizeC ()
static CK_TILE_HOST_DEVICE constexpr auto SchedulerPerM (index_t dsread_perM, index_t dswrite_perM, index_t load_perM)
static CK_TILE_HOST_DEVICE constexpr auto HotLoopScheduler ()
static CK_TILE_HOST_DEVICE constexpr auto Last2ndHotLoopScheduler ()
static CK_TILE_HOST_DEVICE constexpr auto LastHotLoopScheduler ()
static CK_TILE_HOST_DEVICE constexpr auto GetADramTileDistribution ()
Static Public Member Functions inherited from ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, MXF4FlatmmPipelineAgBgCrPolicy >
static constexpr index_t GetVectorSizeA ()
static constexpr index_t GetVectorSizeB ()
static constexpr index_t GetVectorSizeC ()
static CK_TILE_HOST const std::string GetName ()
static CK_TILE_HOST_DEVICE constexpr auto TransposeC ()
static CK_TILE_HOST_DEVICE constexpr index_t GetSmemSize ()
static CK_TILE_HOST_DEVICE constexpr auto SchedulerPerM (index_t dsread_perM, index_t dswrite_perM, index_t load_perM)
static CK_TILE_HOST_DEVICE constexpr auto HotLoopScheduler ()
static CK_TILE_HOST_DEVICE constexpr auto Last2ndHotLoopScheduler ()
static CK_TILE_HOST_DEVICE constexpr auto LastHotLoopScheduler ()

Static Public Attributes

static constexpr auto config
static constexpr index_t DsWritePreIssue = 3
static constexpr index_t DsReadPreload = 4
static constexpr index_t BlockSize = Problem::kBlockSize
static constexpr index_t WaveSize = get_warp_size()
static constexpr index_t kMPerBlock = BlockGemmShape::kM
static constexpr index_t kNPerBlock = BlockGemmShape::kN
static constexpr index_t kKPerBlock = BlockGemmShape::kK
static constexpr index_t flatKPerWarp = Problem::flatKPerWarp
static constexpr index_t flatNPerWarp = Problem::flatNPerWarp
static constexpr bool kPadM = Problem::kPadM
static constexpr bool kPadN = Problem::kPadN
static constexpr bool kPadK = Problem::kPadK
static constexpr index_t NumWaveGroups = Problem::NumWaveGroups
static constexpr bool UsePersistentKernel = Problem::Traits::UsePersistentKernel
static constexpr auto I0 = number<0>()
static constexpr auto I1 = number<1>()
static constexpr auto I2 = number<2>()
static constexpr auto idxM = I0
static constexpr auto idxN = I1
static constexpr auto idxK = I2
static constexpr index_t MWarp = config.template at<1>()
static constexpr index_t NWarp = config.template at<2>()
static constexpr index_t MIterPerWarp = kMPerBlock / (MWarp * WG::kM)
static constexpr index_t NIterPerWarp = kNPerBlock / (NWarp * WG::kN)
static constexpr index_t KIterPerWarp = kKPerBlock / WG::kK
static constexpr index_t KFlatPerBlockPerIter = flatKPerWarp
static constexpr index_t NFlatPerBlockPerIter = flatNPerWarp
static constexpr index_t MPerBlockPerIter = kMPerBlock / MIterPerWarp
static constexpr index_t KPerBlockPerIter = kKPerBlock / KIterPerWarp
static constexpr index_t APackedSize = numeric_traits<ADataType>::PackedSize
static constexpr index_t BPackedSize = numeric_traits<BDataType>::PackedSize
static constexpr index_t MXdlPack = Problem::MXdlPack
static constexpr index_t NXdlPack = Problem::NXdlPack
static constexpr index_t KXdlPack = Problem::KXdlPack
static constexpr index_t AK1 = Problem::VectorLoadSize / sizeof(ADataType) * APackedSize
static constexpr index_t BK1 = Problem::VectorLoadSize / sizeof(BDataType) * BPackedSize
static constexpr index_t m_preload
static constexpr bool HasHotLoop = Problem::HasHotLoop
static constexpr auto TailNum = Problem::TailNum
static constexpr index_t mfma_per_wg = 1
static constexpr index_t dsread_per_wg
static constexpr index_t dsread_num_perK = dsread_per_wg * MIterPerWarp
static constexpr index_t dswrite_num_perK = dsread_num_perK / (MWarp * NWarp)
static constexpr index_t dswrite_rep = (dswrite_num_perK + MIterPerWarp - 1) / MIterPerWarp
static constexpr index_t Aload_num_perK = dswrite_num_perK
static constexpr index_t Aload_rep = dswrite_rep
static constexpr index_t Bload_num_perK = kNPerBlock * WG::kK / NWarp / BK1 / WaveSize
static constexpr index_t ScaleBload_K1 = NXdlPack * KXdlPack
static constexpr index_t ScaleBload_num
static constexpr index_t KPerScaleLoad = KIterPerWarp / ScaleBload_num
static constexpr index_t HalfMIter = (MIterPerWarp + 1) / 2
static constexpr index_t Bload_rep = (Bload_num_perK + HalfMIter - 1) / HalfMIter
static constexpr index_t mfma_perM_perK = NIterPerWarp * mfma_per_wg
static constexpr index_t dswrite_mIter = (DsWritePreIssue - 1) % MIterPerWarp
static constexpr index_t dswrite_kIter = (DsWritePreIssue - 1) / MIterPerWarp
static constexpr bool DoubleSmemBuffer = false
Static Public Attributes inherited from ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, MXF4FlatmmPipelineAgBgCrPolicy >
static constexpr auto config
static constexpr index_t DsWritePreIssue
static constexpr index_t DsReadPreload
static constexpr index_t BlockSize
static constexpr index_t WaveSize
static constexpr index_t kMPerBlock
static constexpr index_t kNPerBlock
static constexpr index_t kKPerBlock
static constexpr index_t flatKPerWarp
static constexpr index_t flatNPerWarp
static constexpr bool kPadM
static constexpr bool kPadN
static constexpr bool kPadK
static constexpr index_t kLdsAlignmentInBytes
static constexpr index_t NumWaveGroups
static constexpr bool UsePersistentKernel
static constexpr auto I0
static constexpr auto I1
static constexpr auto I2
static constexpr auto idxM
static constexpr auto idxN
static constexpr auto idxK
static constexpr index_t MWarp
static constexpr index_t NWarp
static constexpr index_t MIterPerWarp
static constexpr index_t NIterPerWarp
static constexpr index_t KIterPerWarp
static constexpr index_t KFlatPerBlockPerIter
static constexpr index_t NFlatPerBlockPerIter
static constexpr index_t MPerBlockPerIter
static constexpr index_t KPerBlockPerIter
static constexpr index_t K1
static constexpr index_t m_preload
static constexpr bool HasHotLoop
static constexpr auto TailNum
static constexpr index_t mfma_per_wg
static constexpr index_t dsread_per_wg
static constexpr index_t dsread_num_perK
static constexpr index_t dswrite_num_perK
static constexpr index_t dswrite_rep
static constexpr index_t Aload_num_perK
static constexpr index_t Aload_rep
static constexpr index_t Bload_num_perK
static constexpr index_t HalfMIter
static constexpr index_t Bload_rep
static constexpr index_t mfma_perM_perK
static constexpr index_t dswrite_mIter
static constexpr index_t dswrite_kIter
static constexpr bool DoubleSmemBuffer

Member Typedef Documentation

◆ ADataType

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
using ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::ADataType = remove_cvref_t<typename Problem::ADataType>

◆ ALayout

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
using ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::ALayout = remove_cvref_t<typename Problem::ALayout>

◆ BDataType

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
using ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BDataType = remove_cvref_t<typename Problem::BDataType>

◆ BLayout

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
using ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BLayout = remove_cvref_t<typename Problem::BLayout>

◆ BlockFlatmm

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
using ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BlockFlatmm
Initial value:
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21

◆ BlockGemmShape

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
using ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BlockGemmShape = remove_cvref_t<typename Problem::BlockGemmShape>

◆ BlockTile

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
using ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BlockTile = remove_cvref_t<typename BlockGemmShape::BlockTile>

◆ BlockWarps

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
using ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BlockWarps = remove_cvref_t<typename BlockGemmShape::BlockWarps>

◆ CDataType

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
using ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::CDataType = remove_cvref_t<typename Problem::CDataType>

◆ CLayout

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
using ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::CLayout = remove_cvref_t<typename Problem::CLayout>

◆ ComputeType

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
using ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::ComputeType = ADataType

◆ Underlying

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
using ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::Underlying = FlatmmPipelineAGmemBGmemCRegV1<Problem, PipelinePolicy>

◆ WarpTile

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
using ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::WarpTile = remove_cvref_t<typename BlockGemmShape::WarpTile>

◆ WG

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
using ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::WG = remove_cvref_t<decltype(config.template at<0>())>

Member Function Documentation

◆ GetADramTileDistribution()

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::GetADramTileDistribution ( )
inlinestaticconstexpr

◆ GetVectorSizeA()

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
constexpr index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::GetVectorSizeA ( )
inlinestaticconstexpr

◆ GetVectorSizeB()

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
constexpr index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::GetVectorSizeB ( )
inlinestaticconstexpr

◆ GetVectorSizeC()

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
constexpr index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::GetVectorSizeC ( )
inlinestaticconstexpr

◆ HotLoopScheduler()

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::HotLoopScheduler ( )
inlinestaticconstexpr

◆ Last2ndHotLoopScheduler()

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::Last2ndHotLoopScheduler ( )
inlinestaticconstexpr

◆ LastHotLoopScheduler()

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::LastHotLoopScheduler ( )
inlinestaticconstexpr

◆ operator()() [1/2]

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
template<typename ADramBlockWindowTmp, typename AElementFunction, typename BFlatBlockWindowTmp, typename ScaleADramBlockWindowTmp, typename ScaleBDramBlockWindowTmp>
CK_TILE_HOST_DEVICE auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::operator() ( ADramBlockWindowTmp a_copy_dram_window,
const AElementFunction & a_element_func,
const BFlatBlockWindowTmp & b_flat_dram_block_window_tmp,
const ScaleADramBlockWindowTmp & scale_a_window,
const ScaleBDramBlockWindowTmp & scale_b_window,
index_t num_loop,
void * p_smem_ping,
void * p_smem_pong ) const
inline

◆ operator()() [2/2]

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename ScaleADramBlockWindowTmp, typename ScaleBDramBlockWindowTmp>
CK_TILE_DEVICE auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::operator() ( const ADramBlockWindowTmp & a_dram_block_window_tmp,
const BFlatBlockWindowTmp & b_flat_dram_block_window_tmp,
const ScaleADramBlockWindowTmp & scale_a_flat_window_tmp,
const ScaleBDramBlockWindowTmp & scale_b_flat_window_tmp,
index_t num_loop,
void * p_smem_ping,
void * p_smem_pong ) const
inline

◆ SchedulerPerM()

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::SchedulerPerM ( index_t dsread_perM,
index_t dswrite_perM,
index_t load_perM )
inlinestaticconstexpr

Member Data Documentation

◆ AK1

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::AK1 = Problem::VectorLoadSize / sizeof(ADataType) * APackedSize
staticconstexpr

◆ Aload_num_perK

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::Aload_num_perK = dswrite_num_perK
staticconstexpr

◆ Aload_rep

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::Aload_rep = dswrite_rep
staticconstexpr

◆ APackedSize

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::APackedSize = numeric_traits<ADataType>::PackedSize
staticconstexpr

◆ BK1

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BK1 = Problem::VectorLoadSize / sizeof(BDataType) * BPackedSize
staticconstexpr

◆ Bload_num_perK

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::Bload_num_perK = kNPerBlock * WG::kK / NWarp / BK1 / WaveSize
staticconstexpr

◆ Bload_rep

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::Bload_rep = (Bload_num_perK + HalfMIter - 1) / HalfMIter
staticconstexpr

◆ BlockSize

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BlockSize = Problem::kBlockSize
staticconstexpr

◆ BPackedSize

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BPackedSize = numeric_traits<BDataType>::PackedSize
staticconstexpr

◆ config

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::config
staticconstexpr
Initial value:
=
BlockFlatmm::BlockPolicy::template GetWarpGemmMWarpNWarp<Problem>()

◆ DoubleSmemBuffer

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
bool ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::DoubleSmemBuffer = false
staticconstexpr

◆ dsread_num_perK

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::dsread_num_perK = dsread_per_wg * MIterPerWarp
staticconstexpr

◆ dsread_per_wg

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::dsread_per_wg
staticconstexpr
Initial value:
=
WG::kM * WG::kK * sizeof(ADataType) / APackedSize / WaveSize / Problem::VectorLoadSize
remove_cvref_t< typename Problem::ADataType > ADataType
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:48
static constexpr index_t WaveSize
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:69
static constexpr index_t APackedSize
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:122

◆ DsReadPreload

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::DsReadPreload = 4
staticconstexpr

◆ dswrite_kIter

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::dswrite_kIter = (DsWritePreIssue - 1) / MIterPerWarp
staticconstexpr

◆ dswrite_mIter

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::dswrite_mIter = (DsWritePreIssue - 1) % MIterPerWarp
staticconstexpr

◆ dswrite_num_perK

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::dswrite_num_perK = dsread_num_perK / (MWarp * NWarp)
staticconstexpr

◆ dswrite_rep

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::dswrite_rep = (dswrite_num_perK + MIterPerWarp - 1) / MIterPerWarp
staticconstexpr

◆ DsWritePreIssue

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::DsWritePreIssue = 3
staticconstexpr

◆ flatKPerWarp

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::flatKPerWarp = Problem::flatKPerWarp
staticconstexpr

◆ flatNPerWarp

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::flatNPerWarp = Problem::flatNPerWarp
staticconstexpr

◆ HalfMIter

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::HalfMIter = (MIterPerWarp + 1) / 2
staticconstexpr

◆ HasHotLoop

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
bool ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::HasHotLoop = Problem::HasHotLoop
staticconstexpr

◆ I0

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::I0 = number<0>()
staticconstexpr

◆ I1

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::I1 = number<1>()
staticconstexpr

◆ I2

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::I2 = number<2>()
staticconstexpr

◆ idxK

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::idxK = I2
staticconstexpr

◆ idxM

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::idxM = I0
staticconstexpr

◆ idxN

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::idxN = I1
staticconstexpr

◆ KFlatPerBlockPerIter

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::KFlatPerBlockPerIter = flatKPerWarp
staticconstexpr

◆ KIterPerWarp

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::KIterPerWarp = kKPerBlock / WG::kK
staticconstexpr

◆ kKPerBlock

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::kKPerBlock = BlockGemmShape::kK
staticconstexpr

◆ kMPerBlock

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::kMPerBlock = BlockGemmShape::kM
staticconstexpr

◆ kNPerBlock

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::kNPerBlock = BlockGemmShape::kN
staticconstexpr

◆ kPadK

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
bool ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::kPadK = Problem::kPadK
staticconstexpr

◆ kPadM

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
bool ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::kPadM = Problem::kPadM
staticconstexpr

◆ kPadN

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
bool ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::kPadN = Problem::kPadN
staticconstexpr

◆ KPerBlockPerIter

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::KPerBlockPerIter = kKPerBlock / KIterPerWarp
staticconstexpr

◆ KPerScaleLoad

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::KPerScaleLoad = KIterPerWarp / ScaleBload_num
staticconstexpr

◆ KXdlPack

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::KXdlPack = Problem::KXdlPack
staticconstexpr

◆ m_preload

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::m_preload
staticconstexpr
Initial value:
static constexpr index_t MIterPerWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:103
static constexpr index_t KIterPerWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:105
static constexpr index_t DsReadPreload
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:66

◆ mfma_per_wg

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::mfma_per_wg = 1
staticconstexpr

◆ mfma_perM_perK

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::mfma_perM_perK = NIterPerWarp * mfma_per_wg
staticconstexpr

◆ MIterPerWarp

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::MIterPerWarp = kMPerBlock / (MWarp * WG::kM)
staticconstexpr

◆ MPerBlockPerIter

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::MPerBlockPerIter = kMPerBlock / MIterPerWarp
staticconstexpr

◆ MWarp

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::MWarp = config.template at<1>()
staticconstexpr

◆ MXdlPack

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::MXdlPack = Problem::MXdlPack
staticconstexpr

◆ NFlatPerBlockPerIter

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::NFlatPerBlockPerIter = flatNPerWarp
staticconstexpr

◆ NIterPerWarp

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::NIterPerWarp = kNPerBlock / (NWarp * WG::kN)
staticconstexpr

◆ NumWaveGroups

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::NumWaveGroups = Problem::NumWaveGroups
staticconstexpr

◆ NWarp

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::NWarp = config.template at<2>()
staticconstexpr

◆ NXdlPack

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::NXdlPack = Problem::NXdlPack
staticconstexpr

◆ ScaleBload_K1

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::ScaleBload_K1 = NXdlPack * KXdlPack
staticconstexpr

◆ ScaleBload_num

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::ScaleBload_num
staticconstexpr
Initial value:
=
static constexpr index_t kNPerBlock
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:80
static constexpr index_t WaveSize
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:77
static constexpr index_t ScaleBload_K1
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:169
static constexpr index_t NWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:109
static constexpr index_t kKPerBlock
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:81

◆ TailNum

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
auto ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::TailNum = Problem::TailNum
staticconstexpr

◆ UsePersistentKernel

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
bool ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::UsePersistentKernel = Problem::Traits::UsePersistentKernel
staticconstexpr

◆ WaveSize

template<typename Problem, typename PipelinePolicy = MXF4FlatmmPipelineAgBgCrPolicy>
index_t ck_tile::MXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::WaveSize = get_warp_size()
staticconstexpr

The documentation for this struct was generated from the following file: