half.hpp Source File

half.hpp Source File#

Composable Kernel: half.hpp Source File
half.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3
7#include <hip/hip_fp16.h>
8
9#pragma once
10
11namespace ck_tile {
12
13using fp16_hip_t = _Float16; // most of hip internal function use this type
15
17constexpr float fp16_to_float_hip(const fp16_hip_t& x);
18
20constexpr double fp16_to_double_hip(const fp16_hip_t& x);
21
23constexpr fp16_hip_t float_to_fp16_hip(const float& x);
24
26constexpr fp16_hip_t double_to_fp16_hip(const double& x);
27
28#if CK_TILE_USE_CUSTOM_DATA_TYPE
29// HIP use fp16_hip_t as interchangable data type for float16
30struct alignas(2) half_t
31{
32 using raw_type = fp16_raw_t;
33 raw_type data;
34
36 static constexpr half_t bit_cast(raw_type x)
37 {
38 half_t y;
39 y.data = x;
40 return y;
41 }
42
44 constexpr fp16_hip_t to_fp16() const { return ck_tile::bit_cast<fp16_hip_t>(data); }
45
46 // constructor
47 constexpr half_t() : data{} {}
48
49 // construct from HIP half
51 explicit constexpr half_t(const fp16_hip_t& x) : data(ck_tile::bit_cast<raw_type>(x)) {}
52
53 // construct from float
55 explicit constexpr half_t(const float& x) : half_t(float_to_fp16_hip(x)) {}
56
57 // construct from double
59 explicit constexpr half_t(const double& x) : half_t(double_to_fp16_hip(x)) {}
60
61 // construct from int
63 explicit constexpr half_t(const int& x) : half_t(static_cast<fp16_hip_t>(__int2half_rn(x))) {}
64
65 // construct from unsigned int
67 explicit constexpr half_t(const unsigned int& x)
68 : half_t(static_cast<fp16_hip_t>(__uint2half_rn(x)))
69 {
70 }
71
72 // cast to float
74 explicit constexpr operator float() const { return fp16_to_float_hip(to_fp16()); }
75
76 // cast to double
78 explicit constexpr operator double() const { return fp16_to_double_hip(to_fp16()); }
79
80 // cast to int
82 explicit constexpr operator int() const
83 {
84 return static_cast<int>(fp16_to_float_hip(to_fp16()));
85 }
86
88 explicit constexpr operator fp16_hip_t() const { return ck_tile::bit_cast<fp16_hip_t>(data); }
89
90 // internal access
92 constexpr raw_type& get() { return data; }
93
95 constexpr raw_type get() const { return data; }
96};
97
98template <typename>
99struct native_t;
100
101template <>
102struct native_t<half_t>
103{
104 using type = _Float16;
105};
106
107using fp16_t = half_t;
108using fp16_raw_t = typename half_t::raw_type;
109#else
110using fp16_t = _Float16;
111using half_t = _Float16;
112using fp16_raw_t = ushort;
113#endif
114
115// conversions
117constexpr float fp16_to_float_hip(const fp16_hip_t& x)
118{
119 // return __half2float(x);
120 return static_cast<float>(x);
121}
122
124constexpr double fp16_to_double_hip(const fp16_hip_t& x)
125{
126 return static_cast<double>(fp16_to_float_hip(x));
127}
128
130constexpr fp16_hip_t float_to_fp16_hip(const float& x)
131{
132 // return __float2half(x);
133 return static_cast<fp16_hip_t>(x);
134}
135
137constexpr fp16_hip_t double_to_fp16_hip(const double& x)
138{
139 // return __float2half(x);
140 return static_cast<fp16_hip_t>(x);
141}
142
144constexpr float fp16_to_float(const half_t& x) { return static_cast<float>(x); }
145
147constexpr float fp16_to_double(const half_t& x) { return static_cast<float>(x); }
148
150constexpr half_t float_to_fp16(const float& x) { return static_cast<half_t>(x); }
151
153constexpr half_t double_to_fp16(const double& x) { return static_cast<half_t>(x); }
154
155// limits
156template <class T>
157struct numeric;
158
159template <>
161{
162 // minimum finite value, or minimum positive normalized value for float
163 CK_TILE_HOST_DEVICE static constexpr half_t min()
164 {
165 return bit_cast<half_t>(static_cast<fp16_raw_t>(0x0400));
166 }
167
168 // minumum finite value
170 {
171 return bit_cast<half_t>(static_cast<fp16_raw_t>(0xFBFF));
172 }
173
174 // maximum finite value
175 CK_TILE_HOST_DEVICE static constexpr half_t max()
176 {
177 return bit_cast<half_t>(static_cast<fp16_raw_t>(0x7BFF));
178 }
179
180 // difference between 1.0 and next value representable by float
182 {
183 return bit_cast<half_t>(static_cast<fp16_raw_t>(0x1800));
184 }
185
186 // maximum rounding error
187 // bin : f edcba 9876543210
188 // bits: s eeeee mmmmmmmmmm
189 // 0 01110 0000000000 (0.5)
190 //
192 {
193 return bit_cast<half_t>(static_cast<fp16_raw_t>(0x3800));
194 }
195
196 // positive infinity value
198 {
199 return bit_cast<half_t>(static_cast<fp16_raw_t>(0x7C00));
200 }
201
202 // quiet NaN
204 {
205 return bit_cast<half_t>(static_cast<fp16_raw_t>(0x7FFF));
206 }
207
208 // signaling NaN
210 {
211 return bit_cast<half_t>(static_cast<fp16_raw_t>(0x7FFF));
212 }
213
214 // smallest positive subnormal value
216 {
217 return bit_cast<half_t>(static_cast<fp16_raw_t>(0x0001));
218 }
219
220 CK_TILE_HOST_DEVICE static constexpr half_t zero()
221 {
222 return bit_cast<half_t>(static_cast<fp16_raw_t>(0));
223 }
224};
225
226template <>
228{
229 static constexpr int exp = 5;
230 static constexpr int mant = 10;
231 static constexpr int bias = 15;
232 static constexpr uint16_t nan_mask = 0x7C00;
233 static constexpr uint16_t head_mask = 0xFC00;
234 static constexpr uint16_t mant_mask = 0x3FF;
235 static constexpr uint16_t exp_mask = 0x1F;
236 static constexpr uint16_t abs_mask = 0x7FFF;
237 static constexpr uint16_t Inf = 0x7C00;
238 static constexpr uint16_t NegInf = 0xFC00;
239 static constexpr uint16_t NaN = 0x7C01;
240 static constexpr uint16_t Neg0 = 0x8000;
241 static constexpr int PackedSize = 1;
243};
244
245#if CK_TILE_USE_CUSTOM_DATA_TYPE
246// arithmetic
247CK_TILE_DEVICE bool operator==(const half_t& x, const half_t& y)
248{
249 return __heq(x.to_fp16(), y.to_fp16());
250}
251
253bool operator!=(const half_t& x, const half_t& y) { return __hne(x.to_fp16(), y.to_fp16()); }
254
256bool operator<(const half_t& x, const half_t& y) { return __hlt(x.to_fp16(), y.to_fp16()); }
257
259bool operator<=(const half_t& x, const half_t& y) { return __hle(x.to_fp16(), y.to_fp16()); }
260
262bool operator>(const half_t& x, const half_t& y) { return __hgt(x.to_fp16(), y.to_fp16()); }
263
265bool operator>=(const half_t& x, const half_t& y) { return __hge(x.to_fp16(), y.to_fp16()); }
266
267#if 0
269half_t operator+(const half_t& x, const half_t& y)
270{
271 return half_t(__hadd(x.to_fp16(), y.to_fp16()));
272}
273
275half_t operator-(const half_t& x) { return half_t(__hneg(x.to_fp16())); }
276
278half_t operator-(const half_t& x, const half_t& y)
279{
280 return half_t(__hsub(x.to_fp16(), y.to_fp16()));
281}
282
284half_t operator*(const half_t& x, const half_t& y)
285{
286 return half_t(__hmul(x.to_fp16(), y.to_fp16()));
287}
288
290half_t operator/(const half_t& x, const half_t& y)
291{
292 return half_t(__hdiv(x.to_fp16(), y.to_fp16()));
293}
294
296half_t& operator+=(half_t& x, const half_t& y)
297{
298 x = half_t(__hadd(x.to_fp16(), y.to_fp16()));
299 return x;
300}
301
303half_t& operator-=(half_t& x, const half_t& y)
304{
305 x = half_t(__hsub(x.to_fp16(), y.to_fp16()));
306 return x;
307}
308
310half_t& operator*=(half_t& x, const half_t& y)
311{
312 x = half_t(__hmul(x.to_fp16(), y.to_fp16()));
313 return x;
314}
315
317half_t& operator/=(half_t& x, const half_t& y)
318{
319 x = half_t(__hdiv(x.to_fp16(), y.to_fp16()));
320 return x;
321}
322
324half_t& operator++(half_t& x)
325{
326 x = half_t(__hadd(x.to_fp16(), half_t(1.0f).to_fp16()));
327 return x;
328}
329
331half_t& operator--(half_t& x)
332{
333 x = half_t(__hsub(x.to_fp16(), half_t(1.0f).to_fp16()));
334 return x;
335}
336
338half_t operator++(half_t& x, int)
339{
340 half_t y(x);
341 x = half_t(__hadd(x.to_fp16(), half_t(1.0f).to_fp16()));
342 return y;
343}
344
346half_t operator--(half_t& x, int)
347{
348 half_t y(x);
349 x = half_t(__hsub(x.to_fp16(), half_t(1.0f).to_fp16()));
350 return y;
351}
352#endif
353
354#if CK_TILE_USE_CUSTOM_DATA_TYPE
356#endif
357
358// math
360half_t abs(const half_t& x) { return bit_cast<half_t>(x.get() & 0x7fff); }
361
363bool isnan(const half_t& x)
364{
365 uint16_t xx = x.get();
366 return (xx & 0x7FFF) > 0x7C00;
367}
368
371{
372 return static_cast<half_t>(__builtin_amdgcn_sqrtf(static_cast<float>(x)));
373};
374
376half_t exp(half_t x) { return static_cast<half_t>(__ocml_exp_f32(static_cast<float>(x))); };
377
379half_t exp2(half_t x) { return static_cast<half_t>(exp2f(static_cast<float>(x))); };
380
382half_t log(half_t x) { return static_cast<half_t>(__logf(static_cast<float>(x))); };
383#endif
384
385using fp16x2_t = _Float16 __attribute__((ext_vector_type(2)));
386
388{
389 fp16x2_t vector_res;
390
391 vector_res.x = x.x + y.x;
392 vector_res.y = x.y + y.y;
393
394 return vector_res;
395}
396
398{
399 fp16x2_t c;
400 asm volatile("v_pk_add_f16 %0, %1, %2" : "=v"(c) : "v"(x), "v"(y));
401 return c;
402}
403
404} // namespace ck_tile
#define CK_TILE_DEVICE
Definition config.hpp:41
#define CK_TILE_HOST
Definition config.hpp:40
#define CK_TILE_HOST_DEVICE
Definition config.hpp:42
Definition tile/core/algorithm/cluster_descriptor.hpp:13
CK_TILE_HOST_DEVICE constexpr float fp16_to_float(const half_t &x)
Definition half.hpp:144
_Float16 fp16x2_t
Definition half.hpp:385
CK_TILE_DEVICE bfloat16_t log(bfloat16_t x)
Definition bfloat16.hpp:428
uint16_t fp16_raw_t
Definition half.hpp:14
CK_TILE_HOST_DEVICE constexpr auto operator-=(multi_index< NSize > &y, const X &x)
Definition tile/core/container/multi_index.hpp:47
CK_TILE_HOST_DEVICE constexpr auto operator*(const multi_index< NSize > &a, const T &b)
Definition tile/core/container/multi_index.hpp:75
_Float16 half_t
Definition half.hpp:111
CK_TILE_HOST_DEVICE constexpr fp16_hip_t double_to_fp16_hip(const double &x)
Definition half.hpp:137
CK_TILE_HOST_DEVICE constexpr half_t double_to_fp16(const double &x)
Definition half.hpp:153
_Float16 fp16_t
Definition half.hpp:110
_Float16 fp16_hip_t
Definition half.hpp:13
CK_TILE_HOST_DEVICE constexpr fp16_hip_t float_to_fp16_hip(const float &x)
Definition half.hpp:130
CK_TILE_HOST_DEVICE constexpr auto operator+(const multi_index< NSize > &a, const T &b)
Definition tile/core/container/multi_index.hpp:55
CK_TILE_HOST_DEVICE constexpr Y bit_cast(const X &x)
Definition bit_cast.hpp:11
CK_TILE_HOST_DEVICE constexpr auto operator-(const multi_index< NSize > &a, const T &b)
Definition tile/core/container/multi_index.hpp:65
CK_TILE_HOST_DEVICE constexpr float fp16_to_float_hip(const fp16_hip_t &x)
Definition half.hpp:117
CK_TILE_DEVICE bfloat16_t sqrt(bfloat16_t x)
Definition bfloat16.hpp:413
CK_TILE_HOST_DEVICE constexpr float fp16_to_double(const half_t &x)
Definition half.hpp:147
CK_TILE_DEVICE bfloat16_t exp(bfloat16_t x)
Definition bfloat16.hpp:419
CK_TILE_HOST_DEVICE bfloat16_t abs(const bfloat16_t &x)
Definition bfloat16.hpp:400
CK_TILE_HOST_DEVICE constexpr auto operator/(sequence< Xs... >, sequence< Ys... >)
Definition tile/core/container/sequence.hpp:737
CK_TILE_HOST_DEVICE constexpr half_t float_to_fp16(const float &x)
Definition half.hpp:150
CK_TILE_HOST fp16x2_t pk_add_f16(const fp16x2_t &x, const fp16x2_t &y)
Definition half.hpp:387
CK_TILE_HOST_DEVICE bool isnan(const bfloat16_t &x)
Definition bfloat16.hpp:406
CK_TILE_HOST_DEVICE constexpr double fp16_to_double_hip(const fp16_hip_t &x)
Definition half.hpp:124
CK_TILE_HOST_DEVICE constexpr bool operator==(const array< T, Size > &a, const array< T, Size > &b)
Definition tile/core/container/array.hpp:263
CK_TILE_DEVICE bfloat16_t exp2(bfloat16_t x)
Definition bfloat16.hpp:425
CK_TILE_HOST_DEVICE constexpr auto operator+=(multi_index< NSize > &y, const X &x)
Definition tile/core/container/multi_index.hpp:39
CK_TILE_HOST_DEVICE constexpr bool operator!=(const array< T, Size > &a, const array< T, Size > &b)
Definition tile/core/container/array.hpp:280
unsigned short uint16_t
Definition stdint.h:125
Definition vector_type.hpp:26
remove_cvref_t< T > type
Definition vector_type.hpp:27
static CK_TILE_HOST_DEVICE constexpr half_t epsilon()
Definition half.hpp:181
static CK_TILE_HOST_DEVICE constexpr half_t denorm_min()
Definition half.hpp:215
static CK_TILE_HOST_DEVICE constexpr half_t signaling_NaN()
Definition half.hpp:209
static CK_TILE_HOST_DEVICE constexpr half_t infinity()
Definition half.hpp:197
static CK_TILE_HOST_DEVICE constexpr half_t max()
Definition half.hpp:175
static CK_TILE_HOST_DEVICE constexpr half_t zero()
Definition half.hpp:220
static CK_TILE_HOST_DEVICE constexpr half_t quiet_NaN()
Definition half.hpp:203
static CK_TILE_HOST_DEVICE constexpr half_t round_error()
Definition half.hpp:191
static CK_TILE_HOST_DEVICE constexpr half_t lowest()
Definition half.hpp:169
static CK_TILE_HOST_DEVICE constexpr half_t min()
Definition half.hpp:163
static constexpr uint16_t Inf
Definition half.hpp:237
static constexpr int bias
Definition half.hpp:231
static constexpr int PackedSize
Definition half.hpp:241
uint16_t bitwise_type
Definition half.hpp:242
static constexpr uint16_t mant_mask
Definition half.hpp:234
static constexpr uint16_t Neg0
Definition half.hpp:240
static constexpr int mant
Definition half.hpp:230
static constexpr uint16_t NegInf
Definition half.hpp:238
static constexpr uint16_t head_mask
Definition half.hpp:233
static constexpr uint16_t exp_mask
Definition half.hpp:235
static constexpr uint16_t NaN
Definition half.hpp:239
static constexpr int exp
Definition half.hpp:229
static constexpr uint16_t nan_mask
Definition half.hpp:232
static constexpr uint16_t abs_mask
Definition half.hpp:236
Definition tile/core/numeric/numeric.hpp:81
Definition tile/core/numeric/numeric.hpp:18
#define CK_TILE_ARITHMETIC_USING_FLOAT(attr_, type_)
Definition tile/core/numeric/numeric.hpp:106