libstdc++
simd_sve.h
1// Simd SVE specific implementations -*- C++ -*-
2
3// Copyright The GNU Toolchain Authors.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25
26#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_SVE_H_
27#define _GLIBCXX_EXPERIMENTAL_SIMD_SVE_H_
28
29#if __cplusplus >= 201703L
30
31#if !_GLIBCXX_SIMD_HAVE_SVE
32#error "simd_sve.h may only be included when SVE on ARM is available"
33#endif
34
35_GLIBCXX_SIMD_BEGIN_NAMESPACE
36
37// Helper function mapping to sve supported types
38template <typename _Tp>
39 constexpr auto
40 __get_sve_value_type()
41 {
42 if constexpr (is_integral_v<_Tp>)
43 {
44 if constexpr (is_signed_v<_Tp>)
45 {
46 if constexpr (sizeof(_Tp) == 1)
47 return int8_t{};
48 else if constexpr (sizeof(_Tp) == 2)
49 return int16_t{};
50 else if constexpr (sizeof(_Tp) == 4)
51 return int32_t{};
52 else if constexpr (sizeof(_Tp) == 8)
53 return int64_t{};
54 else
55 return _Tp{};
56 }
57 else
58 {
59 if constexpr (sizeof(_Tp) == 1)
60 return uint8_t{};
61 else if constexpr (sizeof(_Tp) == 2)
62 return uint16_t{};
63 else if constexpr (sizeof(_Tp) == 4)
64 return uint32_t{};
65 else if constexpr (sizeof(_Tp) == 8)
66 return uint64_t{};
67 else
68 return _Tp{};
69 }
70 }
71 else
72 {
73 if constexpr (is_floating_point_v<_Tp>)
74 {
75 if constexpr (sizeof(_Tp) == 4)
76 return float32_t{};
77 else if constexpr (sizeof(_Tp) == 8)
78 return float64_t{};
79 else
80 return _Tp{};
81 }
82 }
83 }
84
85template <typename _Tp>
86 using __get_sve_value_type_t = decltype(__get_sve_value_type<_Tp>());
87
88typedef svbool_t __sve_bool_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
89
90template <typename _Tp, size_t _Np>
91 struct __sve_vector_type;
92
93template <typename _Tp, size_t _Np>
94 using __sve_vector_type_t = typename __sve_vector_type<_Tp, _Np>::type;
95
96template <size_t _Np>
97 struct __sve_vector_type<int8_t, _Np>
98 {
99 typedef svint8_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
100
101 inline static __sve_vlst_type
102 __sve_broadcast(int8_t __dup)
103 { return svdup_s8(__dup); }
104
105 inline static __sve_bool_type
106 __sve_active_mask()
107 { return svwhilelt_b8(size_t(0), _Np); };
108
109 using type = __sve_vlst_type;
110 };
111
112template <size_t _Np>
113 struct __sve_vector_type<uint8_t, _Np>
114 {
115 typedef svuint8_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
116
117 inline static __sve_vlst_type
118 __sve_broadcast(uint8_t __dup)
119 { return svdup_u8(__dup); }
120
121 inline static __sve_bool_type
122 __sve_active_mask()
123 { return svwhilelt_b8(size_t(0), _Np); };
124
125 using type = __sve_vlst_type;
126 };
127
128template <size_t _Np>
129 struct __sve_vector_type<int16_t, _Np>
130 {
131 typedef svint16_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
132
133 inline static __sve_vlst_type
134 __sve_broadcast(int16_t __dup)
135 { return svdup_s16(__dup); }
136
137 inline static __sve_bool_type
138 __sve_active_mask()
139 { return svwhilelt_b16(size_t(0), _Np); };
140
141 using type = __sve_vlst_type;
142 };
143
144template <size_t _Np>
145 struct __sve_vector_type<uint16_t, _Np>
146 {
147 typedef svuint16_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
148
149 inline static __sve_vlst_type
150 __sve_broadcast(uint16_t __dup)
151 { return svdup_u16(__dup); }
152
153 inline static __sve_bool_type
154 __sve_active_mask()
155 { return svwhilelt_b16(size_t(0), _Np); };
156
157 using type = __sve_vlst_type;
158 };
159
160template <size_t _Np>
161 struct __sve_vector_type<int32_t, _Np>
162 {
163 typedef svint32_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
164
165 inline static __sve_vlst_type
166 __sve_broadcast(int32_t __dup)
167 { return svdup_s32(__dup); }
168
169 inline static __sve_bool_type
170 __sve_active_mask()
171 { return svwhilelt_b32(size_t(0), _Np); };
172
173 using type = __sve_vlst_type;
174 };
175
176template <size_t _Np>
177 struct __sve_vector_type<uint32_t, _Np>
178 {
179 typedef svuint32_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
180
181 inline static __sve_vlst_type
182 __sve_broadcast(uint32_t __dup)
183 { return svdup_u32(__dup); }
184
185 inline static __sve_bool_type
186 __sve_active_mask()
187 { return svwhilelt_b32(size_t(0), _Np); };
188
189 using type = __sve_vlst_type;
190 };
191
192template <size_t _Np>
193 struct __sve_vector_type<int64_t, _Np>
194 {
195 typedef svint64_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
196
197 inline static __sve_vlst_type
198 __sve_broadcast(int64_t __dup)
199 { return svdup_s64(__dup); }
200
201 inline static __sve_bool_type
202 __sve_active_mask()
203 { return svwhilelt_b64(size_t(0), _Np); };
204
205 using type = __sve_vlst_type;
206 };
207
208template <size_t _Np>
209 struct __sve_vector_type<uint64_t, _Np>
210 {
211 typedef svuint64_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
212
213 inline static __sve_vlst_type
214 __sve_broadcast(uint64_t __dup)
215 { return svdup_u64(__dup); }
216
217 inline static __sve_bool_type
218 __sve_active_mask()
219 { return svwhilelt_b64(size_t(0), _Np); };
220
221 using type = __sve_vlst_type;
222 };
223
224template <size_t _Np>
225 struct __sve_vector_type<float, _Np>
226 {
227 typedef svfloat32_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
228
229 inline static __sve_vlst_type
230 __sve_broadcast(float __dup)
231 { return svdup_f32(__dup); }
232
233 inline static __sve_bool_type
234 __sve_active_mask()
235 { return svwhilelt_b32(size_t(0), _Np); };
236
237 using type = __sve_vlst_type;
238 };
239
240template <size_t _Np>
241 struct __sve_vector_type<double, _Np>
242 {
243 typedef svfloat64_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
244
245 inline static __sve_vlst_type
246 __sve_broadcast(double __dup)
247 { return svdup_f64(__dup); }
248
249 inline static __sve_bool_type
250 __sve_active_mask()
251 { return svwhilelt_b64(size_t(0), _Np); };
252
253 using type = __sve_vlst_type;
254 };
255
256template <typename _Tp, size_t _Np>
257 struct __sve_vector_type
258 : __sve_vector_type<__get_sve_value_type_t<_Tp>, _Np>
259 {};
260
261template <size_t _Size>
262 struct __sve_mask_type
263 {
264 static_assert((_Size & (_Size - 1)) != 0, "This trait may only be used for non-power-of-2 "
265 "sizes. Power-of-2 sizes must be specialized.");
266
267 using type = typename __sve_mask_type<std::__bit_ceil(_Size)>::type;
268 };
269
270template <size_t _Size>
271 using __sve_mask_type_t = typename __sve_mask_type<_Size>::type;
272
273template <>
274 struct __sve_mask_type<1>
275 {
276 using type = __sve_bool_type;
277
278 using __sve_mask_uint_type = uint8_t;
279
280 typedef svuint8_t __sve_mask_vector_type
281 __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
282
283 inline static auto
284 __sve_mask_active_count(type __active_mask, type __pred)
285 { return svcntp_b8(__active_mask, __pred); }
286
287 inline static type
288 __sve_mask_first_true()
289 { return svptrue_pat_b8(SV_VL1); }
290
291 inline static type
292 __sve_mask_next_true(type __active_mask, type __pred)
293 { return svpnext_b8(__active_mask, __pred); }
294
295 inline static bool
296 __sve_mask_get(type __active_mask, size_t __i)
297 { return __sve_mask_vector_type(svdup_u8_z(__active_mask, 1))[__i] != 0;}
298
299 inline static const __sve_mask_vector_type __index0123 = svindex_u8(0, 1);
300 };
301
302template <>
303 struct __sve_mask_type<2>
304 {
305 using type = __sve_bool_type;
306
307 using __sve_mask_uint_type = uint16_t;
308
309 typedef svuint16_t __sve_mask_vector_type
310 __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
311
312 inline static auto
313 __sve_mask_active_count(type __active_mask, type __pred)
314 { return svcntp_b16(__active_mask, __pred); }
315
316 inline static type
317 __sve_mask_first_true()
318 { return svptrue_pat_b16(SV_VL1); }
319
320 inline static type
321 __sve_mask_next_true(type __active_mask, type __pred)
322 { return svpnext_b16(__active_mask, __pred); }
323
324 inline static bool
325 __sve_mask_get(type __active_mask, size_t __i)
326 { return __sve_mask_vector_type(svdup_u16_z(__active_mask, 1))[__i] != 0;}
327
328 inline static const __sve_mask_vector_type __index0123 = svindex_u16(0, 1);
329 };
330
331template <>
332 struct __sve_mask_type<4>
333 {
334 using type = __sve_bool_type;
335
336 using __sve_mask_uint_type = uint32_t;
337
338 typedef svuint32_t __sve_mask_vector_type
339 __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
340
341 inline static auto
342 __sve_mask_active_count(type __active_mask, type __pred)
343 { return svcntp_b32(__active_mask, __pred); }
344
345 inline static type
346 __sve_mask_first_true()
347 { return svptrue_pat_b32(SV_VL1); }
348
349 inline static type
350 __sve_mask_next_true(type __active_mask, type __pred)
351 { return svpnext_b32(__active_mask, __pred); }
352
353 inline static bool
354 __sve_mask_get(type __active_mask, size_t __i)
355 { return __sve_mask_vector_type(svdup_u32_z(__active_mask, 1))[__i] != 0;}
356
357 inline static const __sve_mask_vector_type __index0123 = svindex_u32(0, 1);
358 };
359
360template <>
361 struct __sve_mask_type<8>
362 {
363 using type = __sve_bool_type;
364
365 using __sve_mask_uint_type = uint64_t;
366
367 typedef svuint64_t __sve_mask_vector_type
368 __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
369
370 inline static auto
371 __sve_mask_active_count(type __active_mask, type __pred)
372 { return svcntp_b64(__active_mask, __pred); }
373
374 inline static type
375 __sve_mask_first_true()
376 { return svptrue_pat_b64(SV_VL1); }
377
378 inline static type
379 __sve_mask_next_true(type __active_mask, type __pred)
380 { return svpnext_b64(__active_mask, __pred); }
381
382 inline static bool
383 __sve_mask_get(type __active_mask, size_t __i)
384 { return __sve_mask_vector_type(svdup_u64_z(__active_mask, 1))[__i] != 0;}
385
386 inline static const __sve_mask_vector_type __index0123 = svindex_u64(0, 1);
387 };
388
389template <typename _To, typename _From>
390 _GLIBCXX_SIMD_INTRINSIC constexpr auto
391 __sve_reinterpret_cast(_From __v)
392 {
393 if constexpr (std::is_same_v<_To, int32_t>)
394 return svreinterpret_s32(__v);
395 else if constexpr (std::is_same_v<_To, int64_t>)
396 return svreinterpret_s64(__v);
397 else if constexpr (std::is_same_v<_To, float32_t>)
398 return svreinterpret_f32(__v);
399 else if constexpr (std::is_same_v<_To, float64_t>)
400 return svreinterpret_f64(__v);
401 else
402 __assert_unreachable<_To>(); // add more cases if needed.
403 }
404
405template <typename _Tp, size_t _Width>
406 struct _SveSimdWrapper
407 {
408 static_assert(__is_vectorizable_v<_Tp>);
409
410 static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
411
412 using _BuiltinType = __sve_vector_type_t<_Tp, _Width>;
413
414 using value_type = _Tp;
415
416 static inline constexpr size_t _S_full_size = sizeof(_BuiltinType) / sizeof(value_type);
417
418 static inline constexpr int _S_size = _Width;
419
420 static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
421
422 _BuiltinType _M_data;
423
424 _GLIBCXX_SIMD_INTRINSIC constexpr _SveSimdWrapper<_Tp, _S_full_size>
425 __as_full_vector() const
426 { return _M_data; }
427
428 _GLIBCXX_SIMD_INTRINSIC constexpr
429 _SveSimdWrapper(initializer_list<_Tp> __init)
430 : _M_data(__generate_from_n_evaluations<_Width, _BuiltinType>(
431 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
432 return __init.begin()[__i.value];
433 }))
434 {}
435
436 _GLIBCXX_SIMD_INTRINSIC constexpr
437 _SveSimdWrapper() = default;
438
439 _GLIBCXX_SIMD_INTRINSIC constexpr
440 _SveSimdWrapper(const _SveSimdWrapper&) = default;
441
442 _GLIBCXX_SIMD_INTRINSIC constexpr
443 _SveSimdWrapper(_SveSimdWrapper&&) = default;
444
445 _GLIBCXX_SIMD_INTRINSIC constexpr _SveSimdWrapper&
446 operator=(const _SveSimdWrapper&) = default;
447
448 _GLIBCXX_SIMD_INTRINSIC constexpr _SveSimdWrapper&
449 operator=(_SveSimdWrapper&&) = default;
450
451 _GLIBCXX_SIMD_INTRINSIC constexpr
452 _SveSimdWrapper(__sve_vector_type_t<_Tp, _Width> __x)
453 : _M_data(__x)
454 {}
455
456 template <typename... _As, typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
457 && sizeof...(_As) <= _Width)>>
458 _GLIBCXX_SIMD_INTRINSIC constexpr
459 operator _SimdTuple<_Tp, _As...>() const
460 {
461 return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>(
462 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
463 return _M_data[int(__i)];
464 });
465 }
466
467 _GLIBCXX_SIMD_INTRINSIC constexpr
468 operator const _BuiltinType&() const
469 { return _M_data; }
470
471 _GLIBCXX_SIMD_INTRINSIC constexpr
472 operator _BuiltinType&()
473 { return _M_data; }
474
475 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
476 operator[](size_t __i) const
477 { return _M_data[__i]; }
478
479 template <size_t __i>
480 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
481 operator[](_SizeConstant<__i>) const
482 { return _M_data[__i]; }
483
484 _GLIBCXX_SIMD_INTRINSIC constexpr void
485 _M_set(size_t __i, _Tp __x)
486 {
487 _M_data[__i] = __x;
488 }
489
490 _GLIBCXX_SIMD_INTRINSIC constexpr bool
491 _M_is_constprop() const
492 { return false; }
493
494 _GLIBCXX_SIMD_INTRINSIC constexpr bool
495 _M_is_constprop_none_of() const
496 { return false; }
497
498 _GLIBCXX_SIMD_INTRINSIC constexpr bool
499 _M_is_constprop_all_of() const
500 { return false; }
501 };
502
503template <size_t _Bits, size_t _Width>
504 struct _SveMaskWrapper
505 {
506 using _BuiltinSveMaskType = __sve_mask_type<_Bits>;
507
508 using _BuiltinSveVectorType = __sve_vector_type<__int_with_sizeof_t<_Bits>, _Width>;
509
510 using _BuiltinType = typename _BuiltinSveMaskType::type;
511
512 using value_type = bool;
513
514 static constexpr size_t _S_full_size = sizeof(_BuiltinType);
515
516 _GLIBCXX_SIMD_INTRINSIC constexpr _SveMaskWrapper<_Bits, _S_full_size>
517 __as_full_vector() const
518 { return _M_data; }
519
520 _GLIBCXX_SIMD_INTRINSIC constexpr
521 _SveMaskWrapper() = default;
522
523 _GLIBCXX_SIMD_INTRINSIC constexpr
524 _SveMaskWrapper(_BuiltinType __k)
525 : _M_data(__k)
526 {};
527
528 _GLIBCXX_SIMD_INTRINSIC
529 operator const _BuiltinType&() const
530 { return _M_data; }
531
532 _GLIBCXX_SIMD_INTRINSIC
533 operator _BuiltinType&()
534 { return _M_data; }
535
536 _GLIBCXX_SIMD_INTRINSIC _BuiltinType
537 __intrin() const
538 { return _M_data; }
539
540 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
541 operator[](size_t __i) const
542 {
543 return _BuiltinSveMaskType::__sve_mask_get(_M_data, __i);
544 }
545
546 template <size_t __i>
547 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
548 operator[](_SizeConstant<__i>) const
549 {
550 return _BuiltinSveMaskType::__sve_mask_get(_M_data, __i);
551 }
552
553 _GLIBCXX_SIMD_INTRINSIC constexpr void
554 _M_set(size_t __i, value_type __x)
555 {
556 _BuiltinType __index
557 = svcmpeq(_BuiltinSveVectorType::__sve_active_mask(), _BuiltinSveMaskType::__index0123,
558 typename _BuiltinSveMaskType::__sve_mask_uint_type(__i));
559
560 if (__x)
561 _M_data = svorr_z(_BuiltinSveVectorType::__sve_active_mask(), _M_data, __index);
562 else
563 _M_data = svbic_z(_BuiltinSveVectorType::__sve_active_mask(), _M_data, __index);
564 }
565
566 _GLIBCXX_SIMD_INTRINSIC constexpr bool
567 _M_is_constprop() const
568 { return false; }
569
570 _GLIBCXX_SIMD_INTRINSIC constexpr bool
571 _M_is_constprop_none_of() const
572 { return false; }
573
574 _GLIBCXX_SIMD_INTRINSIC constexpr bool
575 _M_is_constprop_all_of() const
576 { return false; }
577
578 _BuiltinType _M_data;
579 };
580
581struct _CommonImplSve;
582
583template <typename _Abi, typename = __detail::__odr_helper>
584 struct _SimdImplSve;
585
586template <typename _Abi, typename = __detail::__odr_helper>
587 struct _MaskImplSve;
588
589template <int _UsedBytes, int>
590 struct simd_abi::_SveAbi
591 {
592 template <typename _Tp>
593 static constexpr size_t _S_size = _UsedBytes / sizeof(_Tp);
594
595 struct _IsValidAbiTag
596 : __bool_constant<(_UsedBytes > 1)>
597 {};
598
599 template <typename _Tp>
600 struct _IsValidSizeFor
601 : __bool_constant<(_UsedBytes / sizeof(_Tp) > 1 && _UsedBytes % sizeof(_Tp) == 0
602 && _UsedBytes <= __sve_vectorized_size_bytes)>
603 {};
604
605 template <typename _Tp>
606 struct _IsValid
607 : conjunction<_IsValidAbiTag, __bool_constant<__have_sve>,
608 __bool_constant<(__vectorized_sizeof<_Tp>() > sizeof(_Tp))>,
609 _IsValidSizeFor<_Tp>>
610 {};
611
612 template <typename _Tp>
613 static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value;
614
615 using _CommonImpl = _CommonImplSve;
616
617 using _SimdImpl = _SimdImplSve<_SveAbi<_UsedBytes>>;
618
619 using _MaskImpl = _MaskImplSve<_SveAbi<_UsedBytes>>;
620
621 template <typename _Tp>
622 using _MaskMember = _SveMaskWrapper<sizeof(_Tp), _S_size<_Tp>>;
623
624 template <typename _Tp, bool = _S_is_valid_v<_Tp>>
625 struct __traits : _InvalidTraits
626 {};
627
628 template <typename _Tp>
629 struct __traits<_Tp, true>
630 {
631 using _IsValid = true_type;
632 using _SimdImpl = _SimdImplSve<_SveAbi<_UsedBytes>>;
633 using _MaskImpl = _MaskImplSve<_SveAbi<_UsedBytes>>;
634
635 using _SimdMember = _SveSimdWrapper<_Tp, _S_size<_Tp>>; // sve vector type
636 using _MaskMember = _SveMaskWrapper<sizeof(_Tp), _S_size<_Tp>>; // sve mask type
637
638 static constexpr size_t _S_simd_align = alignof(_SimdMember);
639 static constexpr size_t _S_mask_align = alignof(_MaskMember);
640
641 static constexpr size_t _S_full_size = _SimdMember::_S_full_size;
642 static constexpr bool _S_is_partial = _SimdMember::_S_is_partial;
643
644 struct _SimdBase
645 {
646 _GLIBCXX_SIMD_ALWAYS_INLINE explicit
647 operator __sve_vector_type_t<_Tp, _S_size<_Tp>>() const
648 { return __data(*static_cast<const simd<_Tp, _SveAbi<_UsedBytes>>*>(this)); }
649 };
650
651 class _SimdCastType
652 {
653 using _Ap = __sve_vector_type_t<_Tp, _S_size<_Tp>>;
654
655 _SimdMember _M_data;
656
657 public:
658 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr
659 _SimdCastType(_Ap __a)
660 : _M_data(__a)
661 {}
662
663 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr
664 operator _SimdMember() const
665 { return _M_data; }
666 };
667
668 struct _MaskBase
669 {
670 _GLIBCXX_SIMD_ALWAYS_INLINE explicit
671 operator __sve_mask_type_t<sizeof(_Tp)>() const
672 {
673 return __data(*static_cast<const simd_mask<_Tp, _SveAbi<_UsedBytes>>*>(this));
674 }
675 };
676
677 class _MaskCastType
678 {
679 using _Ap = __sve_mask_type_t<sizeof(_Tp)>;
680
681 _Ap _M_data;
682
683 public:
684 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr
685 _MaskCastType(_Ap __a)
686 : _M_data(__a)
687 {}
688
689 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr
690 operator _MaskMember() const
691 { return _M_data; }
692 };
693 };
694
695 template <typename _Tp>
696 static constexpr size_t _S_full_size = __traits<_Tp>::_S_full_size;
697
698 template <typename _Tp>
699 static constexpr bool _S_is_partial = __traits<_Tp>::_S_is_partial;
700 };
701
702template <typename _Tp, size_t _Np>
703 using __sve_mask = __sve_mask_type<sizeof(_Tp)>;
704
705struct _CommonImplSve
706{
707 // _S_converts_via_decomposition
708 // This lists all cases where a __vector_convert needs to fall back to
709 // conversion of individual scalars (i.e. decompose the input vector into
710 // scalars, convert, compose output vector). In those cases, _S_masked_load &
711 // _S_masked_store prefer to use the _S_bit_iteration implementation.
712 template <typename _From, typename _To, size_t _ToSize>
713 static inline constexpr bool __converts_via_decomposition_v = sizeof(_From) != sizeof(_To);
714
715 template <typename _Tp, typename _Up, size_t _Np>
716 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
717 _S_load(const _Up* __p, _SveMaskWrapper<sizeof(_Tp), _Np> __k)
718 {
719 using _STp = __get_sve_value_type_t<_Tp>;
720 using _SUp = __get_sve_value_type_t<_Up>;
721 using _V = __sve_vector_type_t<_Tp, _Np>;
722 const _SUp* __up = reinterpret_cast<const _SUp*>(__p);
723
724 if constexpr (std::is_same_v<_Tp, _Up>)
725 return _V(svld1(__k._M_data, __up));
727 && (sizeof(_Tp) > sizeof(_Up)))
728 {
729 if constexpr (std::is_same_v<_SUp, int8_t>)
730 {
731 if constexpr (std::is_same_v<_STp, int16_t>)
732 return _V(svld1sb_s16(__k._M_data, __up));
733 if constexpr (std::is_same_v<_STp, uint16_t>)
734 return _V(svld1sb_u16(__k._M_data, __up));
735 if constexpr (std::is_same_v<_STp, int32_t>)
736 return _V(svld1sb_s32(__k._M_data, __up));
737 if constexpr (std::is_same_v<_STp, uint32_t>)
738 return _V(svld1sb_u32(__k._M_data, __up));
739 if constexpr (std::is_same_v<_STp, int64_t>)
740 return _V(svld1sb_s64(__k._M_data, __up));
741 if constexpr (std::is_same_v<_STp, uint64_t>)
742 return _V(svld1sb_u64(__k._M_data, __up));
743 }
744 if constexpr (std::is_same_v<_SUp, uint8_t>)
745 {
746 if constexpr (std::is_same_v<_STp, int16_t>)
747 return _V(svld1ub_s16(__k._M_data, __up));
748 if constexpr (std::is_same_v<_STp, uint16_t>)
749 return _V(svld1ub_u16(__k._M_data, __up));
750 if constexpr (std::is_same_v<_STp, int32_t>)
751 return _V(svld1ub_s32(__k._M_data, __up));
752 if constexpr (std::is_same_v<_STp, uint32_t>)
753 return _V(svld1ub_u32(__k._M_data, __up));
754 if constexpr (std::is_same_v<_STp, int64_t>)
755 return _V(svld1ub_s64(__k._M_data, __up));
756 if constexpr (std::is_same_v<_STp, uint64_t>)
757 return _V(svld1ub_u64(__k._M_data, __up));
758 }
759 if constexpr (std::is_same_v<_SUp, int16_t>)
760 {
761 if constexpr (std::is_same_v<_STp, int32_t>)
762 return _V(svld1sh_s32(__k._M_data, __up));
763 if constexpr (std::is_same_v<_STp, uint32_t>)
764 return _V(svld1sh_u32(__k._M_data, __up));
765 if constexpr (std::is_same_v<_STp, int64_t>)
766 return _V(svld1sh_s64(__k._M_data, __up));
767 if constexpr (std::is_same_v<_STp, uint64_t>)
768 return _V(svld1sh_u64(__k._M_data, __up));
769 }
770 if constexpr (std::is_same_v<_SUp, uint16_t>)
771 {
772 if constexpr (std::is_same_v<_STp, int32_t>)
773 return _V(svld1uh_s32(__k._M_data, __up));
774 if constexpr (std::is_same_v<_STp, uint32_t>)
775 return _V(svld1uh_u32(__k._M_data, __up));
776 if constexpr (std::is_same_v<_STp, int64_t>)
777 return _V(svld1uh_s64(__k._M_data, __up));
778 if constexpr (std::is_same_v<_STp, uint64_t>)
779 return _V(svld1uh_u64(__k._M_data, __up));
780 }
781 if constexpr (std::is_same_v<_SUp, int32_t>)
782 {
783 if constexpr (std::is_same_v<_STp, int64_t>)
784 return _V(svld1sw_s64(__k._M_data, __up));
785 if constexpr (std::is_same_v<_STp, uint64_t>)
786 return _V(svld1sw_u64(__k._M_data, __up));
787 }
788 if constexpr (std::is_same_v<_SUp, uint32_t>)
789 {
790 if constexpr (std::is_same_v<_STp, int64_t>)
791 return _V(svld1uw_s64(__k._M_data, __up));
792 if constexpr (std::is_same_v<_STp, uint64_t>)
793 return _V(svld1uw_u64(__k._M_data, __up));
794 }
795 }
796 return __generate_from_n_evaluations<_Np, __sve_vector_type_t<_Tp, _Np>>(
797 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
798 return __k[__i] ? static_cast<_Tp>(__p[__i]) : _Tp{};
799 });
800 }
801
802 template <typename _Tp, typename _Up, size_t _Np>
803 _GLIBCXX_SIMD_INTRINSIC static constexpr void
804 _S_store(_Up* __p, _SveSimdWrapper<_Tp, _Np> __x, _SveMaskWrapper<sizeof(_Tp), _Np> __k)
805 {
806 using _SUp = __get_sve_value_type_t<_Up>;
807 using _STp = __get_sve_value_type_t<_Tp>;
808
809 _SUp* __up = reinterpret_cast<_SUp*>(__p);
810
811 if constexpr (std::is_same_v<_Tp, _Up>)
812 return svst1(__k._M_data, __up, __x);
814 && (sizeof(_Tp) > sizeof(_Up)))
815 {
817 return svst1b(__k._M_data, __up, __x);
819 return svst1b(__k._M_data, __up, __x);
821 return svst1h(__k._M_data, __up, __x);
823 return svst1h(__k._M_data, __up, __x);
825 return svst1w(__k._M_data, __up, __x);
827 return svst1w(__k._M_data, __up, __x);
828 }
829
830 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
831 if (__k[__i])
832 __p[__i] = static_cast<_Up>(__x[__i]);
833 });
834 }
835
836 template <typename _Tp, size_t _Np>
837 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
838 _S_blend(_SveMaskWrapper<sizeof(_Tp), _Np> __k, _SveSimdWrapper<_Tp, _Np> __at0,
839 _SveSimdWrapper<_Tp, _Np> __at1)
840 { return svsel(__k._M_data, __at1._M_data, __at0._M_data); }
841
842 template <size_t _Np, bool _Sanitized>
843 _GLIBCXX_SIMD_INTRINSIC static constexpr void
844 _S_store_bool_array(_BitMask<_Np, _Sanitized> __x, bool* __mem)
845 {
846 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
847 __mem[__i] = __x[__i];
848 });
849 }
850};
851
852template <typename _Abi, typename>
853 struct _SimdImplSve
854 {
855 template <typename _Tp>
856 using _MaskMember = typename _Abi::template _MaskMember<_Tp>;
857
858 template <typename _Tp>
859 using _SimdMember = typename _Abi::template __traits<_Tp>::_SimdMember;
860
861 using _CommonImpl = typename _Abi::_CommonImpl;
862 using _SuperImpl = typename _Abi::_SimdImpl;
863 using _MaskImpl = typename _Abi::_MaskImpl;
864
865 template <typename _Tp>
866 static constexpr size_t _S_full_size = _Abi::template _S_full_size<_Tp>;
867
868 template <typename _Tp>
869 static constexpr size_t _S_size = _Abi::template _S_size<_Tp>;
870
871 template <typename _Tp>
872 using _TypeTag = _Tp*;
873
874 using abi_type = _Abi;
875
876 template <typename _Tp>
877 _GLIBCXX_SIMD_INTRINSIC static constexpr auto
878 _S_broadcast(_Tp __x) noexcept
879 {
880 return __sve_vector_type<_Tp, __sve_vectorized_size_bytes / sizeof(_Tp)>
881 ::__sve_broadcast(__x);
882 }
883
884 template <typename _Fp, typename _Tp>
885 inline static constexpr _SimdMember<_Tp>
886 _S_generator(_Fp&& __gen, _TypeTag<_Tp>)
887 {
888 constexpr size_t _Np = _S_size<_Tp>;
889 _SveSimdWrapper<_Tp, _Np> __ret;
890 __execute_n_times<_S_size<_Tp>>(
891 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __ret._M_set(__i, __gen(__i)); });
892 return __ret;
893 }
894
895 template <typename _Tp, typename _Up>
896 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
897 _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept
898 {
899 constexpr size_t _Np = _S_size<_Tp>;
900 _SimdMember<_Tp> __ret = _CommonImpl::template _S_load<_Tp, _Up, _Np>(
901 __mem, _SveMaskWrapper<sizeof(_Tp), _Np>{
902 __sve_vector_type<_Tp, _Np>::__sve_active_mask()});
903 return __ret;
904 }
905
906 template <typename _Tp, size_t _Np, typename _Up>
907 static constexpr inline _SveSimdWrapper<_Tp, _Np>
908 _S_masked_load(_SveSimdWrapper<_Tp, _Np> __merge, _MaskMember<_Tp> __k, const _Up* __mem)
909 noexcept
910 {
911 __sve_vector_type_t<_Tp, _Np> __v
912 = _CommonImpl::template _S_load<_Tp, _Up, _Np>(__mem, __k);
913 __sve_vector_type_t<_Tp, _Np> __ret = svsel(__k._M_data, __v, __merge._M_data);
914 return __ret;
915 }
916
917 template <typename _Tp, typename _Up>
918 _GLIBCXX_SIMD_INTRINSIC static constexpr void
919 _S_store(_SimdMember<_Tp> __v, _Up* __mem, _TypeTag<_Tp>) noexcept
920 {
921 constexpr size_t _Np = _S_size<_Tp>;
922 _CommonImpl::template _S_store<_Tp, _Up, _Np>(
923 __mem, __v, __sve_vector_type<_Tp, _Np>::__sve_active_mask());
924 }
925
926 template <typename _Tp, typename _Up, size_t _Np>
927 static constexpr inline void
928 _S_masked_store(const _SveSimdWrapper<_Tp, _Np> __v, _Up* __mem,
929 const _SveMaskWrapper<sizeof(_Tp), _Np> __k) noexcept
930 { _CommonImpl::template _S_store<_Tp, _Up, _Np>(__mem, __v, __k); }
931
932 template <typename _Tp, size_t _Np>
933 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
934 _S_negate(_SveSimdWrapper<_Tp, _Np> __x) noexcept
935 {
936 return svcmpeq(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data,
937 __sve_vector_type<_Tp, _Np>::__sve_broadcast(_Tp{}));
938 }
939
940 template <typename _Tp, typename _BinaryOperation>
941 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
942 _S_reduce(simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op)
943 {
944 auto __x_data = __x._M_data;
945 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
946 using __sve_vec_t = __sve_vector_type_t<_Tp, _Np>;
947 std::size_t __i = __x.size();
948 for (; (__i % 2) != 1; __i /= 2)
949 {
950 __x_data = __binary_op(simd<_Tp, _Abi>(
951 __private_init, _SveSimdWrapper<_Tp, _Np>(
952 __sve_vec_t(svuzp1(__x_data, __x_data)))),
953 simd<_Tp, _Abi>(
954 __private_init, _SveSimdWrapper<_Tp, _Np>(
955 __sve_vec_t(svuzp2(__x_data, __x_data))))
956 )._M_data;
957 }
958 _Tp __res = __x_data[0];
959 for (size_t __ri = 1; __ri != __i; __ri++)
960 __res = __binary_op(__x_data[__ri], __res);
961 return __res;
962 }
963
964 template <typename _Tp>
965 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
966 _S_reduce(simd<_Tp, _Abi> __x, plus<>)
967 {
968 return svaddv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
969 }
970
971 template <typename _Tp>
972 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
973 _S_reduce(simd<_Tp, _Abi> __x, bit_and<>)
974 {
975 return svandv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
976 }
977
978 template <typename _Tp>
979 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
980 _S_reduce(simd<_Tp, _Abi> __x, bit_or<>)
981 {
982 return svorv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
983 }
984
985 template <typename _Tp>
986 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
987 _S_reduce(simd<_Tp, _Abi> __x, bit_xor<>)
988 {
989 return sveorv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
990 }
991
992 template <typename _Tp>
993 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
994 _S_reduce(simd<_Tp, _Abi> __x, __detail::_Maximum())
995 {
996 return svmaxv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
997 }
998
999 template <typename _Tp>
1000 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
1001 _S_reduce(simd<_Tp, _Abi> __x, __detail::_Minimum())
1002 {
1003 return svminv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
1004 }
1005
1006 template <typename _Tp, size_t _Np>
1007 _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
1008 __sve_vector_type_t<_Tp, _Np>
1009 _S_min(_SveSimdWrapper<_Tp, _Np> __a, _SveSimdWrapper<_Tp, _Np> __b)
1010 {
1011 return svmin_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __a._M_data, __b._M_data);
1012 }
1013
1014 template <typename _Tp, size_t _Np>
1015 _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
1016 __sve_vector_type_t<_Tp, _Np>
1017 _S_max(_SveSimdWrapper<_Tp, _Np> __a, _SveSimdWrapper<_Tp, _Np> __b)
1018 {
1019 return svmax_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __a._M_data, __b._M_data);
1020 }
1021
1022 template <typename _Tp, size_t _Np>
1023 _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
1024 pair<_SveSimdWrapper<_Tp, _Np>, _SveSimdWrapper<_Tp, _Np>>
1025 _S_minmax(_SveSimdWrapper<_Tp, _Np> __a, _SveSimdWrapper<_Tp, _Np> __b)
1026 {
1027 return {
1028 svmin_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __a._M_data, __b._M_data),
1029 svmax_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __a._M_data, __b._M_data)
1030 };
1031 }
1032
1033 template <typename _Tp, size_t _Np>
1034 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
1035 _S_complement(_SveSimdWrapper<_Tp, _Np> __x) noexcept
1036 {
1037 if constexpr (is_floating_point_v<_Tp>)
1038 {
1039 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
1040 return __sve_reinterpret_cast<_Tp>(
1041 svnot_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
1042 __sve_reinterpret_cast<_Ip>(__x)));
1043 }
1044 else
1045 return svnot_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data);
1046 }
1047
1048 template <typename _Tp, size_t _Np>
1049 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveSimdWrapper<_Tp, _Np>
1050 _S_unary_minus(_SveSimdWrapper<_Tp, _Np> __x) noexcept
1051 {
1052 return svmul_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data,
1053 static_cast<_Tp>(-1));
1054 }
1055
1056 template <typename _Tp, size_t _Np>
1057 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
1058 _S_plus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1059 { return __x._M_data + __y._M_data; }
1060
1061 template <typename _Tp, size_t _Np>
1062 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
1063 _S_minus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1064 { return __x._M_data - __y._M_data; }
1065
1066 template <typename _Tp, size_t _Np>
1067 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
1068 _S_multiplies(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1069 { return __x._M_data * __y._M_data; }
1070
1071 template <typename _Tp, size_t _Np>
1072 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
1073 _S_divides(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1074 {
1075 __sve_vector_type_t<_Tp, _Np> __y_padded
1076 = svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
1077 __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1));
1078 return __x._M_data / __y_padded;
1079 }
1080
1081 template <typename _Tp, size_t _Np>
1082 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
1083 _S_modulus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1084 {
1085 __sve_vector_type_t<_Tp, _Np> __y_padded
1086 = svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
1087 __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1));
1088 return __x._M_data % __y_padded;
1089 }
1090
1091 template <typename _Tp, size_t _Np>
1092 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
1093 _S_bit_and(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1094 {
1095 if constexpr (is_floating_point_v<_Tp>)
1096 {
1097 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
1098 return __sve_reinterpret_cast<_Tp>(
1099 svand_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
1100 __sve_reinterpret_cast<_Ip>(__x), __sve_reinterpret_cast<_Ip>(__y)));
1101 }
1102 else
1103 return svand_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
1104 __x._M_data, __y._M_data);
1105 }
1106
1107 template <typename _Tp, size_t _Np>
1108 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
1109 _S_bit_or(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1110 {
1111 if constexpr (is_floating_point_v<_Tp>)
1112 {
1113 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
1114 return __sve_reinterpret_cast<_Tp>(
1115 svorr_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
1116 __sve_reinterpret_cast<_Ip>(__x), __sve_reinterpret_cast<_Ip>(__y)));
1117 }
1118 else
1119 return svorr_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
1120 __x._M_data, __y._M_data);
1121 }
1122
1123 template <typename _Tp, size_t _Np>
1124 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
1125 _S_bit_xor(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1126 {
1127 if constexpr (is_floating_point_v<_Tp>)
1128 {
1129 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
1130 return __sve_reinterpret_cast<_Tp>(
1131 sveor_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
1132 __sve_reinterpret_cast<_Ip>(__x), __sve_reinterpret_cast<_Ip>(__y)));
1133 }
1134 else
1135 return sveor_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
1136 __x._M_data, __y._M_data);
1137 }
1138
1139 template <typename _Tp, size_t _Np>
1140 _GLIBCXX_SIMD_INTRINSIC static __sve_vector_type_t<_Tp, _Np>
1141 _S_bit_shift_left(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1142 { return __x._M_data << __y._M_data; }
1143
1144 template <typename _Tp, size_t _Np>
1145 _GLIBCXX_SIMD_INTRINSIC static __sve_vector_type_t<_Tp, _Np>
1146 _S_bit_shift_right(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1147 { return __x._M_data >> __y._M_data; }
1148
1149 template <typename _Tp, size_t _Np>
1150 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
1151 _S_bit_shift_left(_SveSimdWrapper<_Tp, _Np> __x, int __y)
1152 { return __x._M_data << __y; }
1153
1154 template <typename _Tp, size_t _Np>
1155 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
1156 _S_bit_shift_right(_SveSimdWrapper<_Tp, _Np> __x, int __y)
1157 { return __x._M_data >> __y; }
1158
1159 template <typename _Tp, size_t _Np>
1160 _GLIBCXX_SIMD_INTRINSIC static constexpr void
1161 _S_increment(_SveSimdWrapper<_Tp, _Np>& __x)
1162 { __x = __x._M_data + 1; }
1163
1164 template <typename _Tp, size_t _Np>
1165 _GLIBCXX_SIMD_INTRINSIC static constexpr void
1166 _S_decrement(_SveSimdWrapper<_Tp, _Np>& __x)
1167 { __x = __x._M_data - 1; }
1168
1169 template <typename _Tp, size_t _Np>
1170 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1171 _S_equal_to(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1172 {
1173 return svcmpeq(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data);
1174 }
1175
1176 template <typename _Tp, size_t _Np>
1177 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1178 _S_not_equal_to(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1179 {
1180 return svcmpne(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data);
1181 }
1182
1183 template <typename _Tp, size_t _Np>
1184 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1185 _S_less(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1186 {
1187 return svcmplt(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data);
1188 }
1189
1190 template <typename _Tp, size_t _Np>
1191 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1192 _S_less_equal(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1193 {
1194 return svcmple(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data);
1195 }
1196
1197 // simd.math
1198#define _GLIBCXX_SIMD_MATH_FALLBACK(__name) \
1199 template <typename _Tp, size_t _Np, typename... _More> \
1200 static _SveSimdWrapper<_Tp, _Np> _S_##__name(const _SveSimdWrapper<_Tp, _Np>& __x, \
1201 const _More&... __more) \
1202 { \
1203 _SveSimdWrapper<_Tp, _Np> __r; \
1204 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \
1205 __r._M_set(__i, __name(__x[__i], __more[__i]...)); \
1206 }); \
1207 return __r; \
1208 }
1209
1210#define _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(_RetTp, __name) \
1211 template <typename _Tp, typename... _More> \
1212 static auto _S_##__name(const _Tp& __x, const _More&... __more) \
1213 { \
1214 return __fixed_size_storage_t<_RetTp, _Tp::_S_size>::_S_generate( \
1215 [&](auto __meta) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \
1216 return __meta._S_generator( \
1217 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \
1218 return __name(__x[__meta._S_offset + __i], \
1219 __more[__meta._S_offset + __i]...); \
1220 }, static_cast<_RetTp*>(nullptr)); \
1221 }); \
1222 }
1223
1224 _GLIBCXX_SIMD_MATH_FALLBACK(acos)
1225 _GLIBCXX_SIMD_MATH_FALLBACK(asin)
1226 _GLIBCXX_SIMD_MATH_FALLBACK(atan)
1227 _GLIBCXX_SIMD_MATH_FALLBACK(atan2)
1228 _GLIBCXX_SIMD_MATH_FALLBACK(cos)
1229 _GLIBCXX_SIMD_MATH_FALLBACK(sin)
1230 _GLIBCXX_SIMD_MATH_FALLBACK(tan)
1231 _GLIBCXX_SIMD_MATH_FALLBACK(acosh)
1232 _GLIBCXX_SIMD_MATH_FALLBACK(asinh)
1233 _GLIBCXX_SIMD_MATH_FALLBACK(atanh)
1234 _GLIBCXX_SIMD_MATH_FALLBACK(cosh)
1235 _GLIBCXX_SIMD_MATH_FALLBACK(sinh)
1236 _GLIBCXX_SIMD_MATH_FALLBACK(tanh)
1237 _GLIBCXX_SIMD_MATH_FALLBACK(exp)
1238 _GLIBCXX_SIMD_MATH_FALLBACK(exp2)
1239 _GLIBCXX_SIMD_MATH_FALLBACK(expm1)
1240 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(int, ilogb)
1241 _GLIBCXX_SIMD_MATH_FALLBACK(log)
1242 _GLIBCXX_SIMD_MATH_FALLBACK(log10)
1243 _GLIBCXX_SIMD_MATH_FALLBACK(log1p)
1244 _GLIBCXX_SIMD_MATH_FALLBACK(log2)
1245 _GLIBCXX_SIMD_MATH_FALLBACK(logb)
1246
1247 // modf implemented in simd_math.h
1248 _GLIBCXX_SIMD_MATH_FALLBACK(scalbn)
1249 _GLIBCXX_SIMD_MATH_FALLBACK(scalbln)
1250 _GLIBCXX_SIMD_MATH_FALLBACK(cbrt)
1251 _GLIBCXX_SIMD_MATH_FALLBACK(pow)
1252 _GLIBCXX_SIMD_MATH_FALLBACK(erf)
1253 _GLIBCXX_SIMD_MATH_FALLBACK(erfc)
1254 _GLIBCXX_SIMD_MATH_FALLBACK(lgamma)
1255 _GLIBCXX_SIMD_MATH_FALLBACK(tgamma)
1256
1257 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lrint)
1258 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llrint)
1259
1260 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lround)
1261 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llround)
1262
1263 _GLIBCXX_SIMD_MATH_FALLBACK(fmod)
1264 _GLIBCXX_SIMD_MATH_FALLBACK(remainder)
1265
1266 template <typename _Tp, size_t _Np>
1267 static _SveSimdWrapper<_Tp, _Np>
1268 _S_remquo(const _SveSimdWrapper<_Tp, _Np> __x, const _SveSimdWrapper<_Tp, _Np> __y,
1269 __fixed_size_storage_t<int, _Np>* __z)
1270 {
1271 _SveSimdWrapper<_Tp, _Np> __r{};
1272 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1273 int __tmp;
1274 __r._M_set(__i, remquo(__x[__i], __y[__i], &__tmp));
1275 __z->_M_set(__i, __tmp);
1276 });
1277 return __r;
1278 }
1279
1280 template <typename _Tp, size_t _Np>
1281 _GLIBCXX_SIMD_INTRINSIC static __fixed_size_storage_t<int, _Np>
1282 _S_fpclassify(_SveSimdWrapper<_Tp, _Np> __x)
1283 {
1284 __fixed_size_storage_t<int, _Np> __r{};
1285 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1286 __r._M_set(__i, std::fpclassify(__x[__i]));
1287 });
1288 return __r;
1289 }
1290
1291 // copysign in simd_math.h
1292 _GLIBCXX_SIMD_MATH_FALLBACK(nextafter)
1293 _GLIBCXX_SIMD_MATH_FALLBACK(fdim)
1294
1295#undef _GLIBCXX_SIMD_MATH_FALLBACK
1296#undef _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET
1297
1298 template <typename _Tp, size_t _Np, typename _Op>
1299 static constexpr _MaskMember<_Tp>
1300 __fp_cmp(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y, _Op __op)
1301 {
1302 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
1303 using _VI = __sve_vector_type_t<_Ip, _Np>;
1304 using _WI = _SveSimdWrapper<_Ip, _Np>;
1305 const _WI __fmv = __sve_vector_type<_Ip, _Np>::__sve_broadcast(__finite_max_v<_Ip>);
1306 const _WI __zerov = __sve_vector_type<_Ip, _Np>::__sve_broadcast(0);
1307 const _WI __xn = _VI(__sve_reinterpret_cast<_Ip>(__x));
1308 const _WI __yn = _VI(__sve_reinterpret_cast<_Ip>(__y));
1309
1310 const _WI __xp
1311 = svsel(_S_less(__xn, __zerov), _S_unary_minus(_WI(_S_bit_and(__xn, __fmv))), __xn);
1312 const _WI __yp
1313 = svsel(_S_less(__yn, __zerov), _S_unary_minus(_WI(_S_bit_and(__yn, __fmv))), __yn);
1314 return svbic_z(__sve_vector_type<_Ip, _Np>::__sve_active_mask(), __op(__xp, __yp)._M_data,
1315 _SuperImpl::_S_isunordered(__x, __y)._M_data);
1316 }
1317
1318 template <typename _Tp, size_t _Np>
1319 static constexpr _MaskMember<_Tp>
1320 _S_isgreater(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) noexcept
1321 { return __fp_cmp(__x, __y, [](auto __xp, auto __yp) { return _S_less(__yp, __xp); }); }
1322
1323 template <typename _Tp, size_t _Np>
1324 static constexpr _MaskMember<_Tp>
1325 _S_isgreaterequal(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) noexcept
1326 { return __fp_cmp(__x, __y, [](auto __xp, auto __yp) { return _S_less_equal(__yp, __xp); }); }
1327
1328 template <typename _Tp, size_t _Np>
1329 static constexpr _MaskMember<_Tp>
1330 _S_isless(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) noexcept
1331 { return __fp_cmp(__x, __y, [](auto __xp, auto __yp) { return _S_less(__xp, __yp); }); }
1332
1333 template <typename _Tp, size_t _Np>
1334 static constexpr _MaskMember<_Tp>
1335 _S_islessequal(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) noexcept
1336 { return __fp_cmp(__x, __y, [](auto __xp, auto __yp) { return _S_less_equal(__xp, __yp); }); }
1337
1338 template <typename _Tp, size_t _Np>
1339 static constexpr _MaskMember<_Tp>
1340 _S_islessgreater(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) noexcept
1341 {
1342 return svbic_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
1343 _SuperImpl::_S_not_equal_to(__x, __y)._M_data,
1344 _SuperImpl::_S_isunordered(__x, __y)._M_data);
1345 }
1346
1347 template <typename _Tp, size_t _Np>
1348 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
1349 _S_abs(_SveSimdWrapper<_Tp, _Np> __x) noexcept
1350 { return svabs_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); }
1351
1352 template <typename _Tp, size_t _Np>
1353 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
1354 _S_fabs(_SveSimdWrapper<_Tp, _Np> __x) noexcept
1355 { return svabs_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); }
1356
1357 template <typename _Tp, size_t _Np>
1358 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
1359 _S_sqrt(_SveSimdWrapper<_Tp, _Np> __x) noexcept
1360 { return svsqrt_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); }
1361
1362 template <typename _Tp, size_t _Np>
1363 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
1364 _S_ldexp(_SveSimdWrapper<_Tp, _Np> __x, __fixed_size_storage_t<int, _Np> __y) noexcept
1365 {
1366 auto __sve_register = __y.first;
1367 if constexpr (std::is_same_v<_Tp, float>)
1368 return svscale_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data,
1369 __sve_register._M_data);
1370 else
1371 {
1372 __sve_vector_type_t<int64_t, _Np> __sve_d_register = svunpklo(__sve_register);
1373 return svscale_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data,
1374 __sve_d_register);
1375 }
1376 }
1377
1378 template <typename _Tp, size_t _Np>
1379 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
1380 _S_fma(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y,
1381 _SveSimdWrapper<_Tp, _Np> __z)
1382 {
1383 return svmad_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data,
1384 __z._M_data);
1385 }
1386
1387 template <typename _Tp, size_t _Np>
1388 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
1389 _S_fmax(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1390 {
1391 return svmaxnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data);
1392 }
1393
1394 template <typename _Tp, size_t _Np>
1395 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
1396 _S_fmin(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1397 {
1398 return svminnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data);
1399 }
1400
1401 template <typename _Tp, size_t _Np>
1402 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
1403 _S_isfinite([[maybe_unused]] _SveSimdWrapper<_Tp, _Np> __x)
1404 {
1405#if __FINITE_MATH_ONLY__
1406 return __sve_vector_type_t<_Tp, _Np>::__sve_all_true_mask();
1407#else
1408 // if all exponent bits are set, __x is either inf or NaN
1409
1410 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
1411 const __sve_vector_type_t<_Ip, _Np> __absn = __sve_reinterpret_cast<_Ip>(_S_abs(__x));
1412 const __sve_vector_type_t<_Ip, _Np> __maxn
1413 = __sve_reinterpret_cast<_Ip>(
1414 __sve_vector_type<_Tp, _Np>::__sve_broadcast(__finite_max_v<_Tp>));
1415
1416 return _S_less_equal(_SveSimdWrapper<_Ip, _Np>{__absn}, _SveSimdWrapper<_Ip, _Np>{__maxn});
1417#endif
1418 }
1419
1420 template <typename _Tp, size_t _Np>
1421 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
1422 _S_isinf([[maybe_unused]] _SveSimdWrapper<_Tp, _Np> __x)
1423 {
1424#if __FINITE_MATH_ONLY__
1425 return {}; // false
1426#else
1427 return _S_equal_to<_Tp, _Np>(_S_abs(__x), _S_broadcast(__infinity_v<_Tp>));
1428#endif
1429 }
1430
1431 template <typename _Tp, size_t _Np>
1432 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
1433 _S_isnan([[maybe_unused]] _SveSimdWrapper<_Tp, _Np> __x)
1434 {
1435#if __FINITE_MATH_ONLY__
1436 return {}; // false
1437#else
1438 return svcmpuo(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __x._M_data);
1439#endif
1440 }
1441
1442 template <typename _Tp, size_t _Np>
1443 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
1444 _S_isnormal(_SveSimdWrapper<_Tp, _Np> __x)
1445 {
1446 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
1447 using _V = __sve_vector_type_t<_Ip, _Np>;
1448 using _VW = _SveSimdWrapper<_Ip, _Np>;
1449
1450 const _V __absn = __sve_reinterpret_cast<_Ip>(_S_abs(__x));
1451 const _V __minn = __sve_reinterpret_cast<_Ip>(
1452 __sve_vector_type<_Tp, _Np>::__sve_broadcast(__norm_min_v<_Tp>));
1453#if __FINITE_MATH_ONLY__
1454 return _S_greater_equal(_VW{__absn}, _VW{__minn});
1455#else
1456 const _V __maxn = __sve_reinterpret_cast<_Ip>(
1457 __sve_vector_type<_Tp, _Np>::__sve_broadcast(__finite_max_v<_Tp>));
1458 return _MaskImpl::_S_bit_and(_S_less_equal(_VW{__minn}, _VW{__absn}),
1459 _S_less_equal(_VW{__absn}, _VW{__maxn}));
1460#endif
1461 }
1462
1463 template <typename _Tp, size_t _Np>
1464 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
1465 _S_signbit(_SveSimdWrapper<_Tp, _Np> __x)
1466 {
1467 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
1468 using _V = __sve_vector_type_t<_Ip, _Np>;
1469 using _VW = _SveSimdWrapper<_Ip, _Np>;
1470
1471 const _V __xn = __sve_reinterpret_cast<_Ip>(__x);
1472 const _V __zeron = __sve_vector_type<_Ip, _Np>::__sve_broadcast(0);
1473 return _S_less(_VW{__xn}, _VW{__zeron});
1474 }
1475
1476 template <typename _Tp, size_t _Np>
1477 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
1478 _S_isunordered(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
1479 {
1480 return svcmpuo(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data);
1481 }
1482
1483 template <typename _Tp, size_t _Np>
1484 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
1485 _S_nearbyint(_SveSimdWrapper<_Tp, _Np> __x) noexcept
1486 { return svrinti_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); }
1487
1488 template <typename _Tp, size_t _Np>
1489 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
1490 _S_rint(_SveSimdWrapper<_Tp, _Np> __x) noexcept
1491 { return _SuperImpl::_S_nearbyint(__x); }
1492
1493 template <typename _Tp, size_t _Np>
1494 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
1495 _S_trunc(_SveSimdWrapper<_Tp, _Np> __x) noexcept
1496 { return svrintz_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); }
1497
1498 template <typename _Tp, size_t _Np>
1499 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
1500 _S_round(_SveSimdWrapper<_Tp, _Np> __x) noexcept
1501 { return svrinta_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); }
1502
1503 template <typename _Tp, size_t _Np>
1504 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
1505 _S_floor(_SveSimdWrapper<_Tp, _Np> __x) noexcept
1506 { return svrintm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); }
1507
1508 template <typename _Tp, size_t _Np>
1509 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
1510 _S_ceil(_SveSimdWrapper<_Tp, _Np> __x) noexcept
1511 { return svrintp_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); }
1512
1513 template <typename _Tp, size_t _Bits, size_t _Np>
1514 _GLIBCXX_SIMD_INTRINSIC static constexpr void
1515 _S_masked_assign(_SveMaskWrapper<_Bits, _Np> __k, _SveSimdWrapper<_Tp, _Np>& __lhs,
1516 __type_identity_t<_SveSimdWrapper<_Tp, _Np>> __rhs)
1517 { __lhs = _CommonImpl::_S_blend(__k, __lhs, __rhs); }
1518
1519 template <typename _Tp, size_t _Bits, size_t _Np>
1520 _GLIBCXX_SIMD_INTRINSIC static constexpr void
1521 _S_masked_assign(_SveMaskWrapper<_Bits, _Np> __k, _SveSimdWrapper<_Tp, _Np>& __lhs,
1522 __type_identity_t<_Tp> __rhs)
1523 { __lhs = _CommonImpl::_S_blend(__k, __lhs, __data(simd<_Tp, _Abi>(__rhs))); }
1524
1525 template <typename _Op, typename _Tp, size_t _Bits, size_t _Np>
1526 _GLIBCXX_SIMD_INTRINSIC static constexpr void
1527 _S_masked_cassign(const _SveMaskWrapper<_Bits, _Np> __k, _SveSimdWrapper<_Tp, _Np>& __lhs,
1528 const __type_identity_t<_SveSimdWrapper<_Tp, _Np>> __rhs, _Op __op)
1529 {
1530 __lhs = _CommonImpl::_S_blend(__k, __lhs,
1531 _SveSimdWrapper<_Tp, _Np>(__op(_SuperImpl{}, __lhs, __rhs)));
1532 }
1533
1534 template <typename _Op, typename _Tp, size_t _Bits, size_t _Np>
1535 _GLIBCXX_SIMD_INTRINSIC static constexpr void
1536 _S_masked_cassign(const _SveMaskWrapper<_Bits, _Np> __k, _SveSimdWrapper<_Tp, _Np>& __lhs,
1537 const __type_identity_t<_Tp> __rhs, _Op __op)
1538 { _S_masked_cassign(__k, __lhs, _S_broadcast(__rhs), __op); }
1539
1540 template <typename _Tp, size_t _Np, typename _Up>
1541 _GLIBCXX_SIMD_INTRINSIC static constexpr void
1542 _S_set(_SveSimdWrapper<_Tp, _Np>& __v, int __i, _Up&& __x) noexcept
1543 { __v._M_set(__i, static_cast<_Up&&>(__x)); }
1544
1545 template <template <typename> class _Op, typename _Tp, size_t _Bits, size_t _Np>
1546 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveSimdWrapper<_Tp, _Np>
1547 _S_masked_unary(const _SveMaskWrapper<_Bits, _Np> __k, const _SveSimdWrapper<_Tp, _Np> __v)
1548 {
1549 auto __vv = simd<_Tp, _Abi>{__private_init, __v};
1550 _Op<decltype(__vv)> __op;
1551 return _CommonImpl::_S_blend(__k, __v, __data(__op(__vv)));
1552 }
1553 };
1554
1555template <typename _Abi, typename>
1556 struct _MaskImplSve
1557 {
1558 template <typename _Tp>
1559 using _MaskMember = typename _Abi::template _MaskMember<_Tp>;
1560
1561 template <typename _Tp>
1562 using _TypeTag = _Tp*;
1563
1564 template <typename _Tp>
1565 static constexpr size_t _S_size = simd_size_v<_Tp, _Abi>;
1566
1567 template <typename _Tp>
1568 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1569 _S_broadcast(bool __x)
1570 {
1571 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
1572 __sve_bool_type __tr = __sve_vector_type<_Tp, _Np>::__sve_active_mask();
1573 __sve_bool_type __fl = svpfalse_b();
1574 return __x ? __tr : __fl;
1575 }
1576
1577 template <typename _Tp>
1578 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1579 _S_load(const bool* __mem)
1580 {
1581 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
1582 const uint8_t* __p = reinterpret_cast<const uint8_t*>(__mem);
1583 __sve_bool_type __u8_active_mask = __sve_vector_type<uint8_t, _Np>::__sve_active_mask();
1584 __sve_vector_type_t<uint8_t, _Np> __u8_vec_mask_load = svld1(__u8_active_mask, __p);
1585 __sve_bool_type __u8_mask = svcmpne(__u8_active_mask, __u8_vec_mask_load, 0);
1586
1587 __sve_bool_type __tp_mask = __u8_mask;
1588 for (size_t __up_size = 1; __up_size != sizeof(_Tp); __up_size *= 2)
1589 {
1590 __tp_mask = svunpklo(__tp_mask);
1591 }
1592
1593 _SveMaskWrapper<sizeof(_Tp), simd_size_v<_Tp, _Abi>> __r{__tp_mask};
1594 return __r;
1595 }
1596
1597 template <size_t _Bits, size_t _Np>
1598 static inline _SveMaskWrapper<_Bits, _Np>
1599 _S_masked_load(_SveMaskWrapper<_Bits, _Np> __merge, _SveMaskWrapper<_Bits, _Np> __mask,
1600 const bool* __mem) noexcept
1601 {
1602 _SveMaskWrapper<_Bits, _Np> __r;
1603
1604 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1605 if (__mask[__i])
1606 __r._M_set(__i, __mem[__i]);
1607 else
1608 __r._M_set(__i, __merge[__i]);
1609 });
1610
1611 return __r;
1612 }
1613
1614 template <size_t _Bits, size_t _Np>
1615 _GLIBCXX_SIMD_INTRINSIC static constexpr void
1616 _S_store(_SveMaskWrapper<_Bits, _Np> __v, bool* __mem) noexcept
1617 {
1618 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1619 __mem[__i] = __v[__i];
1620 });
1621 }
1622
1623 template <size_t _Bits, size_t _Np>
1624 _GLIBCXX_SIMD_INTRINSIC static constexpr void
1625 _S_masked_store(const _SveMaskWrapper<_Bits, _Np> __v, bool* __mem,
1626 const _SveMaskWrapper<_Bits, _Np> __k) noexcept
1627 {
1628 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1629 if (__k[__i])
1630 __mem[__i] = __v[__i];
1631 });
1632 }
1633
1634 template <size_t _Bits, size_t _Np>
1635 _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np>
1636 _S_to_bits(_SveMaskWrapper<_Bits, _Np> __x)
1637 {
1638 _ULLong __r = 0;
1639 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1640 __r |= _ULLong(__x[__i]) << __i;
1641 });
1642 return __r;
1643 }
1644
1645 template <size_t _Np, typename _Tp>
1646 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
1647 _S_from_bitmask(_SanitizedBitMask<_Np> __bits, _TypeTag<_Tp>)
1648 {
1649 _SveMaskWrapper<sizeof(_Tp), _Np> __r;
1650 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1651 __r._M_set(__i, __bits[__i]);
1652 });
1653 return __r;
1654 }
1655
1656 template <typename _Tp, typename _Up, typename _UAbi>
1657 _GLIBCXX_SIMD_INTRINSIC static constexpr auto
1658 _S_convert(simd_mask<_Up, _UAbi> __x)
1659 {
1660 using _R = _SveMaskWrapper<sizeof(_Tp), simd_size_v<_Tp, _Abi>>;
1661 if constexpr (__is_scalar_abi<_UAbi>())
1662 {
1663 _R __r{__sve_bool_type(svpfalse())};
1664 __r._M_set(0, __data(__x));
1665 return __r;
1666 }
1667 if constexpr (__is_sve_abi<_UAbi>())
1668 {
1669 if constexpr (sizeof(_Up) == sizeof(_Tp))
1670 return __data(__x);
1671 if constexpr (sizeof(_Up) < sizeof(_Tp))
1672 {
1673 __sve_bool_type __xmdata = __data(__x)._M_data;
1674 __sve_bool_type __r = __xmdata;
1675 for (size_t __up_size = sizeof(_Up); __up_size != sizeof(_Tp); __up_size *= 2)
1676 {
1677 __r = svunpklo(__r);
1678 }
1679 return _R{__r};
1680 }
1681 else
1682 {
1683 _R __r{__sve_bool_type(svpfalse())};
1684 constexpr size_t __min_size
1685 = std::min(simd_size_v<_Tp, _Abi>, simd_mask<_Up, _UAbi>::size());
1686 __execute_n_times<__min_size>(
1687 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __r._M_set(__i, __x[__i]); });
1688 return __r;
1689 }
1690 }
1691 if constexpr (__is_neon_abi<_UAbi>())
1692 {
1693 _R __r{__sve_bool_type(svpfalse())};
1694 constexpr size_t __min_size
1695 = std::min(simd_size_v<_Tp, _Abi>, simd_mask<_Up, _UAbi>::size());
1696 __execute_n_times<__min_size>(
1697 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __r._M_set(__i, __x[__i]); });
1698 return __r;
1699 }
1700 if constexpr (__is_fixed_size_abi<_UAbi>())
1701 {
1702 return _S_convert<_Tp>(__data(__x));
1703 }
1704 return _R{};
1705 }
1706
1707 template <typename _Tp, size_t _Np, bool _Sanitized>
1708 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1709 _S_convert(_BitMask<_Np, _Sanitized> __x)
1710 {
1711 _MaskMember<_Tp> __r{};
1712 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1713 __r._M_set(__i, __x[__i]);
1714 });
1715 return __r;
1716 }
1717
1718 template <size_t _Bits, size_t _Np>
1719 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveMaskWrapper<_Bits, _Np>
1720 _S_logical_and(const _SveMaskWrapper<_Bits, _Np>& __x, const _SveMaskWrapper<_Bits, _Np>& __y)
1721 {
1722 return svand_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(),
1723 __x._M_data, __y._M_data);
1724 }
1725
1726 template <size_t _Bits, size_t _Np>
1727 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveMaskWrapper<_Bits, _Np>
1728 _S_logical_or(const _SveMaskWrapper<_Bits, _Np>& __x, const _SveMaskWrapper<_Bits, _Np>& __y)
1729 {
1730 return svorr_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(),
1731 __x._M_data, __y._M_data);
1732 }
1733
1734 template <size_t _Bits, size_t _Np>
1735 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveMaskWrapper<_Bits, _Np>
1736 _S_bit_not(const _SveMaskWrapper<_Bits, _Np>& __x)
1737 {
1738 return svnot_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(),
1739 __x._M_data);
1740 }
1741
1742 template <size_t _Bits, size_t _Np>
1743 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveMaskWrapper<_Bits, _Np>
1744 _S_bit_and(const _SveMaskWrapper<_Bits, _Np>& __x, const _SveMaskWrapper<_Bits, _Np>& __y)
1745 {
1746 return svand_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(),
1747 __x._M_data, __y._M_data);
1748 }
1749
1750 template <size_t _Bits, size_t _Np>
1751 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveMaskWrapper<_Bits, _Np>
1752 _S_bit_or(const _SveMaskWrapper<_Bits, _Np>& __x, const _SveMaskWrapper<_Bits, _Np>& __y)
1753 {
1754 return svorr_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(),
1755 __x._M_data, __y._M_data);
1756 }
1757
1758 template <size_t _Bits, size_t _Np>
1759 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveMaskWrapper<_Bits, _Np>
1760 _S_bit_xor(const _SveMaskWrapper<_Bits, _Np>& __x, const _SveMaskWrapper<_Bits, _Np>& __y)
1761 {
1762 return sveor_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(),
1763 __x._M_data, __y._M_data);
1764 }
1765
1766 template <size_t _Bits, size_t _Np>
1767 static constexpr void
1768 _S_set(_SveMaskWrapper<_Bits, _Np>& __k, int __i, bool __x) noexcept
1769 {
1770 auto __index = svcmpeq(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(),
1771 __sve_mask_type<_Bits>::__index0123,
1772 typename __sve_mask_type<_Bits>::__sve_mask_uint_type(__i));
1773 if (__x)
1774 __k._M_data = svorr_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(),
1775 __k._M_data, __index);
1776 else
1777 __k._M_data = svbic_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(),
1778 __k._M_data, __index);
1779 }
1780
1781 template <size_t _Bits, size_t _Np>
1782 _GLIBCXX_SIMD_INTRINSIC static void
1783 _S_masked_assign(_SveMaskWrapper<_Bits, _Np> __k, _SveMaskWrapper<_Bits, _Np>& __lhs,
1784 _SveMaskWrapper<_Bits, _Np> __rhs)
1785 { __lhs._M_data = svsel(__k._M_data, __rhs._M_data, __lhs._M_data); }
1786
1787 template <size_t _Bits, size_t _Np>
1788 _GLIBCXX_SIMD_INTRINSIC static void
1789 _S_masked_assign(_SveMaskWrapper<_Bits, _Np> __k, _SveMaskWrapper<_Bits, _Np>& __lhs,
1790 bool __rhs)
1791 {
1792 __lhs._M_data
1793 = svsel(__k._M_data, _S_broadcast<__int_with_sizeof_t<_Bits>>(__rhs), __lhs._M_data);
1794 }
1795
1796 template <typename _Tp>
1797 _GLIBCXX_SIMD_INTRINSIC static int
1798 _S_popcount(simd_mask<_Tp, _Abi> __k)
1799 {
1800 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
1801
1802 return __sve_mask_type<sizeof(_Tp)>::__sve_mask_active_count(
1803 __sve_vector_type<_Tp, _Np>::__sve_active_mask(), __k._M_data);
1804 }
1805
1806 template <typename _Tp>
1807 _GLIBCXX_SIMD_INTRINSIC static bool
1808 _S_all_of(simd_mask<_Tp, _Abi> __k)
1809 { return _S_popcount(__k) == simd_size_v<_Tp, _Abi>; }
1810
1811 template <typename _Tp>
1812 _GLIBCXX_SIMD_INTRINSIC static bool
1813 _S_any_of(simd_mask<_Tp, _Abi> __k)
1814 {
1815 return svptest_any(__sve_vector_type<_Tp, simd_size_v<_Tp, _Abi>>::__sve_active_mask(),
1816 __k._M_data);
1817 }
1818
1819 template <typename _Tp>
1820 _GLIBCXX_SIMD_INTRINSIC static bool
1821 _S_none_of(simd_mask<_Tp, _Abi> __k)
1822 {
1823 return !svptest_any(__sve_vector_type<_Tp, simd_size_v<_Tp, _Abi>>::__sve_active_mask(),
1824 __k._M_data);
1825 }
1826
1827 template <typename _Tp>
1828 _GLIBCXX_SIMD_INTRINSIC static bool
1829 _S_some_of(simd_mask<_Tp, _Abi> __k)
1830 {
1831 int __msk_count = _S_popcount(__k);
1832 return (__msk_count > 0) && (__msk_count < (int) simd_size_v<_Tp, _Abi>);
1833 }
1834
1835 template <typename _Tp>
1836 _GLIBCXX_SIMD_INTRINSIC static int
1837 _S_find_first_set(simd_mask<_Tp, _Abi> __k)
1838 {
1839 return svclastb(svpfirst(__k._M_data, svpfalse()),
1840 -1, __sve_mask_type<sizeof(_Tp)>::__index0123);
1841 }
1842
1843 template <typename _Tp>
1844 _GLIBCXX_SIMD_INTRINSIC static int
1845 _S_find_last_set(simd_mask<_Tp, _Abi> __k)
1846 { return svclastb(__k._M_data, -1, __sve_mask_type<sizeof(_Tp)>::__index0123); }
1847 };
1848
1849_GLIBCXX_SIMD_END_NAMESPACE
1850#endif // __cplusplus >= 201703L
1851#endif // _GLIBCXX_EXPERIMENTAL_SIMD_SVE_H_
1852// vim: sw=2 noet ts=8 sts=2 tw=100
__bool_constant< true > true_type
The type used as a compile-time boolean with true value.
Definition type_traits:111
typename enable_if< _Cond, _Tp >::type enable_if_t
Alias template for enable_if.
Definition type_traits:2711
_Tp * begin(valarray< _Tp > &__va) noexcept
Return an iterator pointing to the first element of the valarray.
Definition valarray:1227
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.