ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch > Struct Template Reference

ThreadGroupTensorSliceTransfer_v4r1_gather&lt; ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch &gt; Struct Template Reference#

Composable Kernel: ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch > Struct Template Reference
ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch > Struct Template Reference

Blockwise data transfer. More...

#include <thread_group_tensor_slice_transfer_v4r1_gather.hpp>

Public Types

using Index = MultiIndex<nDim>

Public Member Functions

__device__ constexpr ThreadGroupTensorSliceTransfer_v4r1_gather (const SrcDesc &src_desc, const Index &src_block_slice_origin, const SrcElementwiseOperation &src_element_op, const DstDesc &dst_desc, const Index &dst_block_slice_origin, const DstElementwiseOperation &dst_element_op, const StaticallyIndexedArray< IndexType, gather_num > &gather_offsets)
__device__ void SetSrcSliceOrigin (const SrcDesc &src_desc, const Index &src_block_slice_origin)
template<typename SeqIdx, index_t ThreadScratchId = 0>
__device__ constexpr auto GetSrcThreadScratchIdx ()
template<typename SrcBuffer, index_t ThreadScratchId = 0>
__device__ void RunRead (const SrcDesc &src_desc, const SrcBuffer &src_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
template<typename DstBuffer, index_t ThreadScratchId = 0>
__device__ void RunWrite (const DstDesc &dst_desc, DstBuffer &dst_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
template<typename SrcBuffer, typename DstBuffer, index_t ThreadScratchId>
__device__ void Run (const SrcDesc &src_desc, const SrcBuffer &src_buf, const DstDesc &dst_desc, DstBuffer &dst_buf, Number< ThreadScratchId > thread_scratch_id)
__device__ void MoveSrcSliceWindow (const SrcDesc &src_desc, const Index &step)
__device__ void MoveDstSliceWindow (const DstDesc &dst_desc, const Index &step)

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr index_t nDim = remove_reference_t<SrcDesc>::GetNumOfDimension()
static constexpr auto thread_slice_lengths = BlockSliceLengths{} / ThreadClusterLengths{}
static constexpr index_t gather_num = thread_slice_lengths.At(Number<GatherDim>{})

Detailed Description

template<typename ThreadGroup, typename SrcElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t DstScalarStrideInVector, bool ThreadTransferSrcResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun, typename IndexType, index_t GatherDim = 1, index_t NumThreadScratch = 1>
struct ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >

Blockwise data transfer.

This version does following things to avoid scratch memory issue

  1. Use StaticallyIndexedArray instead of C array for thread buffer
  2. ThreadwiseTensorSliceTransfer_v3 does not keep reference to tensor descriptor
  3. ThreadwiseTensorSliceTransfer_v3::Run() does not construct new tensor coordinate

Member Typedef Documentation

◆ Index

template<typename ThreadGroup, typename SrcElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t DstScalarStrideInVector, bool ThreadTransferSrcResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun, typename IndexType, index_t GatherDim = 1, index_t NumThreadScratch = 1>
using ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::Index = MultiIndex<nDim>

Constructor & Destructor Documentation

◆ ThreadGroupTensorSliceTransfer_v4r1_gather()

template<typename ThreadGroup, typename SrcElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t DstScalarStrideInVector, bool ThreadTransferSrcResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun, typename IndexType, index_t GatherDim = 1, index_t NumThreadScratch = 1>
__device__ constexpr ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::ThreadGroupTensorSliceTransfer_v4r1_gather ( const SrcDesc & src_desc,
const Index & src_block_slice_origin,
const SrcElementwiseOperation & src_element_op,
const DstDesc & dst_desc,
const Index & dst_block_slice_origin,
const DstElementwiseOperation & dst_element_op,
const StaticallyIndexedArray< IndexType, gather_num > & gather_offsets )
inlineconstexpr

Member Function Documentation

◆ GetSrcThreadScratchIdx()

template<typename ThreadGroup, typename SrcElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t DstScalarStrideInVector, bool ThreadTransferSrcResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun, typename IndexType, index_t GatherDim = 1, index_t NumThreadScratch = 1>
template<typename SeqIdx, index_t ThreadScratchId = 0>
__device__ constexpr auto ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::GetSrcThreadScratchIdx ( )
inlineconstexpr

◆ MoveDstSliceWindow()

template<typename ThreadGroup, typename SrcElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t DstScalarStrideInVector, bool ThreadTransferSrcResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun, typename IndexType, index_t GatherDim = 1, index_t NumThreadScratch = 1>
__device__ void ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::MoveDstSliceWindow ( const DstDesc & dst_desc,
const Index & step )
inline

◆ MoveSrcSliceWindow()

template<typename ThreadGroup, typename SrcElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t DstScalarStrideInVector, bool ThreadTransferSrcResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun, typename IndexType, index_t GatherDim = 1, index_t NumThreadScratch = 1>
__device__ void ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::MoveSrcSliceWindow ( const SrcDesc & src_desc,
const Index & step )
inline

◆ Run()

template<typename ThreadGroup, typename SrcElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t DstScalarStrideInVector, bool ThreadTransferSrcResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun, typename IndexType, index_t GatherDim = 1, index_t NumThreadScratch = 1>
template<typename SrcBuffer, typename DstBuffer, index_t ThreadScratchId>
__device__ void ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::Run ( const SrcDesc & src_desc,
const SrcBuffer & src_buf,
const DstDesc & dst_desc,
DstBuffer & dst_buf,
Number< ThreadScratchId > thread_scratch_id )
inline

◆ RunRead()

template<typename ThreadGroup, typename SrcElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t DstScalarStrideInVector, bool ThreadTransferSrcResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun, typename IndexType, index_t GatherDim = 1, index_t NumThreadScratch = 1>
template<typename SrcBuffer, index_t ThreadScratchId = 0>
__device__ void ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::RunRead ( const SrcDesc & src_desc,
const SrcBuffer & src_buf,
Number< ThreadScratchId > thread_scratch_id = Number<ThreadScratchId>{} )
inline

◆ RunWrite()

template<typename ThreadGroup, typename SrcElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t DstScalarStrideInVector, bool ThreadTransferSrcResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun, typename IndexType, index_t GatherDim = 1, index_t NumThreadScratch = 1>
template<typename DstBuffer, index_t ThreadScratchId = 0>
__device__ void ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::RunWrite ( const DstDesc & dst_desc,
DstBuffer & dst_buf,
Number< ThreadScratchId > thread_scratch_id = Number<ThreadScratchId>{} )
inline

◆ SetSrcSliceOrigin()

template<typename ThreadGroup, typename SrcElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t DstScalarStrideInVector, bool ThreadTransferSrcResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun, typename IndexType, index_t GatherDim = 1, index_t NumThreadScratch = 1>
__device__ void ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::SetSrcSliceOrigin ( const SrcDesc & src_desc,
const Index & src_block_slice_origin )
inline

Member Data Documentation

◆ gather_num

template<typename ThreadGroup, typename SrcElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t DstScalarStrideInVector, bool ThreadTransferSrcResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun, typename IndexType, index_t GatherDim = 1, index_t NumThreadScratch = 1>
index_t ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::gather_num = thread_slice_lengths.At(Number<GatherDim>{})
staticconstexpr

◆ I0

template<typename ThreadGroup, typename SrcElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t DstScalarStrideInVector, bool ThreadTransferSrcResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun, typename IndexType, index_t GatherDim = 1, index_t NumThreadScratch = 1>
auto ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::I0 = Number<0>{}
staticconstexpr

◆ nDim

template<typename ThreadGroup, typename SrcElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t DstScalarStrideInVector, bool ThreadTransferSrcResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun, typename IndexType, index_t GatherDim = 1, index_t NumThreadScratch = 1>
index_t ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::nDim = remove_reference_t<SrcDesc>::GetNumOfDimension()
staticconstexpr

◆ thread_slice_lengths

template<typename ThreadGroup, typename SrcElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcData, typename DstData, typename SrcDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t DstScalarStrideInVector, bool ThreadTransferSrcResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun, typename IndexType, index_t GatherDim = 1, index_t NumThreadScratch = 1>
auto ck::ThreadGroupTensorSliceTransfer_v4r1_gather< ThreadGroup, SrcElementwiseOperation, DstElementwiseOperation, DstInMemOp, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcData, DstData, SrcDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, DstScalarStrideInVector, ThreadTransferSrcResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun, IndexType, GatherDim, NumThreadScratch >::thread_slice_lengths = BlockSliceLengths{} / ThreadClusterLengths{}
staticconstexpr

The documentation for this struct was generated from the following file: