device_memory.hpp Source File

device_memory.hpp Source File#

Composable Kernel: device_memory.hpp Source File
tile/host/device_memory.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
6#include <hip/hip_runtime.h>
7#include <stdint.h>
8#include <stdexcept>
11
12namespace ck_tile {
13template <typename T>
14__global__ void set_buffer_value(T* p, T x, uint64_t buffer_element_size)
15{
16 for(uint64_t i = threadIdx.x; i < buffer_element_size; i += blockDim.x)
17 {
18 p[i] = x;
19 }
20}
21
51
52{
53 DeviceMem() : mpDeviceBuf(nullptr), mMemSize(0) {}
54 DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
55 {
56 if(mMemSize != 0)
57 {
58 HIP_CHECK_ERROR(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
59 }
60 else
61 {
62 mpDeviceBuf = nullptr;
63 }
64 }
65 template <typename T>
66 DeviceMem(const HostTensor<T>& t) : mMemSize(t.get_element_space_size_in_bytes())
67 {
68 if(mMemSize != 0)
69 {
70 HIP_CHECK_ERROR(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
71 }
72 else
73 {
74 mpDeviceBuf = nullptr;
75 }
76 ToDevice(t.data());
77 }
78 void Realloc(std::size_t mem_size)
79 {
80 if(mpDeviceBuf)
81 {
83 }
84 mMemSize = mem_size;
85 if(mMemSize != 0)
86 {
87 HIP_CHECK_ERROR(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
88 }
89 else
90 {
91 mpDeviceBuf = nullptr;
92 }
93 }
94 void* GetDeviceBuffer() const { return mpDeviceBuf; }
95 std::size_t GetBufferSize() const { return mMemSize; }
96 void ToDevice(const void* p) const
97 {
98 if(mpDeviceBuf)
99 {
101 hipMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, hipMemcpyHostToDevice));
102 }
103 // else
104 // {
105 // throw std::runtime_error("ToDevice with an empty pointer");
106 // }
107 }
108 void ToDevice(const void* p, const std::size_t cpySize) const
109 {
110 if(mpDeviceBuf)
111 {
113 hipMemcpy(mpDeviceBuf, const_cast<void*>(p), cpySize, hipMemcpyHostToDevice));
114 }
115 }
116 void FromDevice(void* p) const
117 {
118 if(mpDeviceBuf)
119 {
120 HIP_CHECK_ERROR(hipMemcpy(p, mpDeviceBuf, mMemSize, hipMemcpyDeviceToHost));
121 }
122 // else
123 // {
124 // throw std::runtime_error("FromDevice with an empty pointer");
125 // }
126 }
127 void FromDevice(void* p, const std::size_t cpySize) const
128 {
129 if(mpDeviceBuf)
130 {
131 HIP_CHECK_ERROR(hipMemcpy(p, mpDeviceBuf, cpySize, hipMemcpyDeviceToHost));
132 }
133 }
134
135 // construct a host tensor with type T
136 template <typename T>
137 HostTensor<T> ToHost(std::size_t cpySize)
138 {
139 // TODO: host tensor could be slightly larger than the device tensor
140 // we just copy all data from GPU buffer
141 std::size_t host_elements = (cpySize + sizeof(T) - 1) / sizeof(T);
142 HostTensor<T> h_({host_elements});
143 if(mpDeviceBuf)
144 {
145 HIP_CHECK_ERROR(hipMemcpy(h_.data(), mpDeviceBuf, cpySize, hipMemcpyDeviceToHost));
146 }
147 return h_;
148 }
149 template <typename T>
151 {
152 return ToHost<T>(mMemSize);
153 }
154
155 void SetZero() const
156 {
157 if(mpDeviceBuf)
158 {
159 HIP_CHECK_ERROR(hipMemset(mpDeviceBuf, 0, mMemSize));
160 }
161 }
162 template <typename T>
163 void SetValue(T x) const
164 {
165 if(mpDeviceBuf)
166 {
167 if(mMemSize % sizeof(T) != 0)
168 {
169 throw std::runtime_error("wrong! not entire DeviceMem will be set");
170 }
171
172 // TODO: call a gpu kernel to set the value (?)
173 set_buffer_value<T><<<1, 1024>>>(static_cast<T*>(mpDeviceBuf), x, mMemSize / sizeof(T));
174 }
175 }
177 {
178 if(mpDeviceBuf)
179 {
180 try
181 {
183 }
184 catch(std::runtime_error& re)
185 {
186 std::cerr << re.what() << std::endl;
187 }
188 }
189 }
190
192 std::size_t mMemSize;
193};
194
195} // namespace ck_tile
#define HIP_CHECK_ERROR(retval_or_funcall)
Definition host_utility/hip_check_error.hpp:21
Definition tile/core/algorithm/cluster_descriptor.hpp:13
__global__ void set_buffer_value(T *p, T x, uint64_t buffer_element_size)
Definition tile/host/device_memory.hpp:14
unsigned __int64 uint64_t
Definition stdint.h:136
DeviceMem()
Definition tile/host/device_memory.hpp:53
HostTensor< T > ToHost(std::size_t cpySize)
Definition tile/host/device_memory.hpp:137
HostTensor< T > ToHost()
Definition tile/host/device_memory.hpp:150
DeviceMem(std::size_t mem_size)
Definition tile/host/device_memory.hpp:54
void SetValue(T x) const
Definition tile/host/device_memory.hpp:163
void ToDevice(const void *p, const std::size_t cpySize) const
Definition tile/host/device_memory.hpp:108
void Realloc(std::size_t mem_size)
Definition tile/host/device_memory.hpp:78
std::size_t mMemSize
size of device buffer in bytes
Definition tile/host/device_memory.hpp:192
DeviceMem(const HostTensor< T > &t)
Definition tile/host/device_memory.hpp:66
void FromDevice(void *p) const
Definition tile/host/device_memory.hpp:116
void * GetDeviceBuffer() const
Definition tile/host/device_memory.hpp:94
void SetZero() const
Definition tile/host/device_memory.hpp:155
void FromDevice(void *p, const std::size_t cpySize) const
Definition tile/host/device_memory.hpp:127
std::size_t GetBufferSize() const
Definition tile/host/device_memory.hpp:95
~DeviceMem()
Definition tile/host/device_memory.hpp:176
void * mpDeviceBuf
pointer to device buffer
Definition tile/host/device_memory.hpp:191
void ToDevice(const void *p) const
Definition tile/host/device_memory.hpp:96
Definition tile/host/host_tensor.hpp:336
Data::pointer data()
Definition tile/host/host_tensor.hpp:591