DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB > Struct Template Reference

DeviceGemm_BScale_Wmma_CShuffleV3&lt; ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB &gt; Struct Template Reference#

Composable Kernel: ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB > Struct Template Reference
ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB > Struct Template Reference

#include <device_gemm_wmma_cshuffle_v3_b_scale.hpp>

Inheritance diagram for ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >:
ck::tensor_operation::device::DeviceGemmV2BScale< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, ScaleBlockN, ScaleBlockK, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation > ck::tensor_operation::device::BaseOperator

Public Types

using GridwiseGemm
using Argument = typename GridwiseGemm::Argument
using DeviceGemmCommon
using Invoker = typename DeviceGemmCommon::Invoker

Public Member Functions

bool IsSupportedArgument (const BaseArgument *p_arg) override
index_t GetKPerBlock () override
bool GetPermuteB () override
std::unique_ptr< BaseArgumentMakeArgumentPointer (const void *p_a, const void *p_b, void *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, index_t StrideScaleB, const void *p_b_scale, index_t KBatch, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op) override
std::unique_ptr< BaseInvokerMakeInvokerPointer () override
std::string GetTypeString () const override
Public Member Functions inherited from ck::tensor_operation::device::BaseOperator
 BaseOperator ()=default
 BaseOperator (const BaseOperator &)=default
BaseOperatoroperator= (const BaseOperator &)=default
virtual std::string GetInstanceString () const
virtual std::string GetTypeIdName () const
virtual std::optional< std::string > GetObjectName () const
virtual std::optional< std::string > GetTemplateInfo () const
virtual std::string GetTypeIdHashCode () const
virtual size_t GetWorkSpaceSize (const BaseArgument *) const
virtual void SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const
virtual ~BaseOperator ()

Static Public Member Functions

static bool IsSupportedArgument (const Argument &arg)
static auto MakeArgument (const ADataType *p_a, const BDataType *p_b, CDataType *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, index_t StrideScaleB, const BScaleDataType *p_b_scale, index_t KBatch, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation cde_element_op)
static auto MakeInvoker ()

Member Typedef Documentation

◆ Argument

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
using ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Argument = typename GridwiseGemm::Argument

◆ DeviceGemmCommon

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
using ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::DeviceGemmCommon
Initial value:
CDataType,
MPerBlock,
NPerBlock,
KPerBlock,
BlockSize,
AK1,
BK1,
GemmSpec,
BlkGemmPipeSched,
BlkGemmPipelineVer,
ComputeTypeA,
ComputeTypeB>
Definition utility/sequence.hpp:43
Definition utility/tuple.hpp:186
Definition utility/tuple.hpp:117
Definition device_gemm_wmma_cshuffle_v3_common.hpp:43
GridwiseGemm_wmma_cshuffle_v3< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, false, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, false, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB > GridwiseGemm
Definition device_gemm_multiple_abd_wmma_cshuffle_v3.hpp:195

◆ GridwiseGemm

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
using ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::GridwiseGemm

◆ Invoker

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
using ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Invoker = typename DeviceGemmCommon::Invoker

Member Function Documentation

◆ GetKPerBlock()

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
index_t ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::GetKPerBlock ( )
inlineoverridevirtual

◆ GetPermuteB()

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
bool ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::GetPermuteB ( )
inlineoverridevirtual

◆ GetTypeString()

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
std::string ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::GetTypeString ( ) const
inlineoverridevirtual

◆ IsSupportedArgument() [1/2]

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
bool ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::IsSupportedArgument ( const Argument & arg)
inlinestatic

◆ IsSupportedArgument() [2/2]

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
bool ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::IsSupportedArgument ( const BaseArgument * p_arg)
inlineoverridevirtual

◆ MakeArgument()

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
auto ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::MakeArgument ( const ADataType * p_a,
const BDataType * p_b,
CDataType * p_c,
index_t M,
index_t N,
index_t K,
index_t StrideA,
index_t StrideB,
index_t StrideC,
index_t StrideScaleB,
const BScaleDataType * p_b_scale,
index_t KBatch,
AElementwiseOperation a_element_op,
BElementwiseOperation b_element_op,
CElementwiseOperation cde_element_op )
inlinestatic

◆ MakeArgumentPointer()

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
std::unique_ptr< BaseArgument > ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::MakeArgumentPointer ( const void * p_a,
const void * p_b,
void * p_c,
index_t M,
index_t N,
index_t K,
index_t StrideA,
index_t StrideB,
index_t StrideC,
index_t StrideScaleB,
const void * p_b_scale,
index_t KBatch,
AElementwiseOperation a_element_op,
BElementwiseOperation b_element_op,
CElementwiseOperation c_element_op )
inlineoverridevirtual

◆ MakeInvoker()

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
auto ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::MakeInvoker ( )
inlinestatic

◆ MakeInvokerPointer()

template<typename ALayout, typename BLayout, typename CLayout, typename ADataType, typename BDataType, typename BScaleDataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1, index_t BK1, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1, typename ComputeTypeA = CDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
std::unique_ptr< BaseInvoker > ck::tensor_operation::device::DeviceGemm_BScale_Wmma_CShuffleV3< ALayout, BLayout, CLayout, ADataType, BDataType, BScaleDataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::MakeInvokerPointer ( )
inlineoverridevirtual

The documentation for this struct was generated from the following file: