1use core::hint::unreachable_unchecked;
22
23use crate::core_arch::{simd::*, x86::*};
24use crate::intrinsics::simd::*;
25
26#[cfg(test)]
27use stdarch_test::assert_instr;
28
29#[inline]
33#[target_feature(enable = "avx2")]
34#[cfg_attr(test, assert_instr(vpabsd))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub fn _mm256_abs_epi32(a: __m256i) -> __m256i {
37    unsafe {
38        let a = a.as_i32x8();
39        let r = simd_select::<m32x8, _>(simd_lt(a, i32x8::ZERO), simd_neg(a), a);
40        transmute(r)
41    }
42}
43
44#[inline]
48#[target_feature(enable = "avx2")]
49#[cfg_attr(test, assert_instr(vpabsw))]
50#[stable(feature = "simd_x86", since = "1.27.0")]
51pub fn _mm256_abs_epi16(a: __m256i) -> __m256i {
52    unsafe {
53        let a = a.as_i16x16();
54        let r = simd_select::<m16x16, _>(simd_lt(a, i16x16::ZERO), simd_neg(a), a);
55        transmute(r)
56    }
57}
58
59#[inline]
63#[target_feature(enable = "avx2")]
64#[cfg_attr(test, assert_instr(vpabsb))]
65#[stable(feature = "simd_x86", since = "1.27.0")]
66pub fn _mm256_abs_epi8(a: __m256i) -> __m256i {
67    unsafe {
68        let a = a.as_i8x32();
69        let r = simd_select::<m8x32, _>(simd_lt(a, i8x32::ZERO), simd_neg(a), a);
70        transmute(r)
71    }
72}
73
74#[inline]
78#[target_feature(enable = "avx2")]
79#[cfg_attr(test, assert_instr(vpaddq))]
80#[stable(feature = "simd_x86", since = "1.27.0")]
81pub fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
82    unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) }
83}
84
85#[inline]
89#[target_feature(enable = "avx2")]
90#[cfg_attr(test, assert_instr(vpaddd))]
91#[stable(feature = "simd_x86", since = "1.27.0")]
92pub fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
93    unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) }
94}
95
96#[inline]
100#[target_feature(enable = "avx2")]
101#[cfg_attr(test, assert_instr(vpaddw))]
102#[stable(feature = "simd_x86", since = "1.27.0")]
103pub fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
104    unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) }
105}
106
107#[inline]
111#[target_feature(enable = "avx2")]
112#[cfg_attr(test, assert_instr(vpaddb))]
113#[stable(feature = "simd_x86", since = "1.27.0")]
114pub fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
115    unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) }
116}
117
118#[inline]
122#[target_feature(enable = "avx2")]
123#[cfg_attr(test, assert_instr(vpaddsb))]
124#[stable(feature = "simd_x86", since = "1.27.0")]
125pub fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
126    unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) }
127}
128
129#[inline]
133#[target_feature(enable = "avx2")]
134#[cfg_attr(test, assert_instr(vpaddsw))]
135#[stable(feature = "simd_x86", since = "1.27.0")]
136pub fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
137    unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) }
138}
139
140#[inline]
144#[target_feature(enable = "avx2")]
145#[cfg_attr(test, assert_instr(vpaddusb))]
146#[stable(feature = "simd_x86", since = "1.27.0")]
147pub fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
148    unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) }
149}
150
151#[inline]
155#[target_feature(enable = "avx2")]
156#[cfg_attr(test, assert_instr(vpaddusw))]
157#[stable(feature = "simd_x86", since = "1.27.0")]
158pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
159    unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) }
160}
161
162#[inline]
167#[target_feature(enable = "avx2")]
168#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
169#[rustc_legacy_const_generics(2)]
170#[stable(feature = "simd_x86", since = "1.27.0")]
171pub fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
172    static_assert_uimm_bits!(IMM8, 8);
173    unsafe {
174        if IMM8 >= 32 {
177            return _mm256_setzero_si256();
178        }
179        let (a, b) = if IMM8 > 16 {
182            (_mm256_setzero_si256(), a)
183        } else {
184            (a, b)
185        };
186
187        let a = a.as_i8x32();
188        let b = b.as_i8x32();
189
190        if IMM8 == 16 {
191            return transmute(a);
192        }
193
194        let r: i8x32 = match IMM8 % 16 {
195            0 => simd_shuffle!(
196                b,
197                a,
198                [
199                    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
200                    22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
201                ],
202            ),
203            1 => simd_shuffle!(
204                b,
205                a,
206                [
207                    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22,
208                    23, 24, 25, 26, 27, 28, 29, 30, 31, 48,
209                ],
210            ),
211            2 => simd_shuffle!(
212                b,
213                a,
214                [
215                    2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 18, 19, 20, 21, 22, 23,
216                    24, 25, 26, 27, 28, 29, 30, 31, 48, 49,
217                ],
218            ),
219            3 => simd_shuffle!(
220                b,
221                a,
222                [
223                    3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 19, 20, 21, 22, 23,
224                    24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50,
225                ],
226            ),
227            4 => simd_shuffle!(
228                b,
229                a,
230                [
231                    4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 20, 21, 22, 23, 24,
232                    25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51,
233                ],
234            ),
235            5 => simd_shuffle!(
236                b,
237                a,
238                [
239                    5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 21, 22, 23, 24, 25,
240                    26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52,
241                ],
242            ),
243            6 => simd_shuffle!(
244                b,
245                a,
246                [
247                    6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 22, 23, 24, 25, 26,
248                    27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53,
249                ],
250            ),
251            7 => simd_shuffle!(
252                b,
253                a,
254                [
255                    7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26,
256                    27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54,
257                ],
258            ),
259            8 => simd_shuffle!(
260                b,
261                a,
262                [
263                    8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 24, 25, 26, 27,
264                    28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55,
265                ],
266            ),
267            9 => simd_shuffle!(
268                b,
269                a,
270                [
271                    9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 25, 26, 27, 28,
272                    29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56,
273                ],
274            ),
275            10 => simd_shuffle!(
276                b,
277                a,
278                [
279                    10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 27, 28, 29,
280                    30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
281                ],
282            ),
283            11 => simd_shuffle!(
284                b,
285                a,
286                [
287                    11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 27, 28, 29, 30,
288                    31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
289                ],
290            ),
291            12 => simd_shuffle!(
292                b,
293                a,
294                [
295                    12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 28, 29, 30, 31,
296                    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
297                ],
298            ),
299            13 => simd_shuffle!(
300                b,
301                a,
302                [
303                    13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 29, 30, 31, 48,
304                    49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
305                ],
306            ),
307            14 => simd_shuffle!(
308                b,
309                a,
310                [
311                    14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 30, 31, 48, 49,
312                    50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
313                ],
314            ),
315            15 => simd_shuffle!(
316                b,
317                a,
318                [
319                    15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 31, 48, 49, 50,
320                    51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
321                ],
322            ),
323            _ => unreachable_unchecked(),
324        };
325        transmute(r)
326    }
327}
328
329#[inline]
334#[target_feature(enable = "avx2")]
335#[cfg_attr(test, assert_instr(vandps))]
336#[stable(feature = "simd_x86", since = "1.27.0")]
337pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
338    unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) }
339}
340
341#[inline]
346#[target_feature(enable = "avx2")]
347#[cfg_attr(test, assert_instr(vandnps))]
348#[stable(feature = "simd_x86", since = "1.27.0")]
349pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
350    unsafe {
351        let all_ones = _mm256_set1_epi8(-1);
352        transmute(simd_and(
353            simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
354            b.as_i64x4(),
355        ))
356    }
357}
358
359#[inline]
363#[target_feature(enable = "avx2")]
364#[cfg_attr(test, assert_instr(vpavgw))]
365#[stable(feature = "simd_x86", since = "1.27.0")]
366pub fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
367    unsafe {
368        let a = simd_cast::<_, u32x16>(a.as_u16x16());
369        let b = simd_cast::<_, u32x16>(b.as_u16x16());
370        let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1));
371        transmute(simd_cast::<_, u16x16>(r))
372    }
373}
374
375#[inline]
379#[target_feature(enable = "avx2")]
380#[cfg_attr(test, assert_instr(vpavgb))]
381#[stable(feature = "simd_x86", since = "1.27.0")]
382pub fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
383    unsafe {
384        let a = simd_cast::<_, u16x32>(a.as_u8x32());
385        let b = simd_cast::<_, u16x32>(b.as_u8x32());
386        let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1));
387        transmute(simd_cast::<_, u8x32>(r))
388    }
389}
390
391#[inline]
395#[target_feature(enable = "avx2")]
396#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
397#[rustc_legacy_const_generics(2)]
398#[stable(feature = "simd_x86", since = "1.27.0")]
399pub fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
400    static_assert_uimm_bits!(IMM4, 4);
401    unsafe {
402        let a = a.as_i32x4();
403        let b = b.as_i32x4();
404        let r: i32x4 = simd_shuffle!(
405            a,
406            b,
407            [
408                [0, 4, 0, 4][IMM4 as usize & 0b11],
409                [1, 1, 5, 5][IMM4 as usize & 0b11],
410                [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
411                [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
412            ],
413        );
414        transmute(r)
415    }
416}
417
418#[inline]
422#[target_feature(enable = "avx2")]
423#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
424#[rustc_legacy_const_generics(2)]
425#[stable(feature = "simd_x86", since = "1.27.0")]
426pub fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
427    static_assert_uimm_bits!(IMM8, 8);
428    unsafe {
429        let a = a.as_i32x8();
430        let b = b.as_i32x8();
431        let r: i32x8 = simd_shuffle!(
432            a,
433            b,
434            [
435                [0, 8, 0, 8][IMM8 as usize & 0b11],
436                [1, 1, 9, 9][IMM8 as usize & 0b11],
437                [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
438                [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
439                [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
440                [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
441                [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
442                [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
443            ],
444        );
445        transmute(r)
446    }
447}
448
449#[inline]
453#[target_feature(enable = "avx2")]
454#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
455#[rustc_legacy_const_generics(2)]
456#[stable(feature = "simd_x86", since = "1.27.0")]
457pub fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
458    static_assert_uimm_bits!(IMM8, 8);
459    unsafe {
460        let a = a.as_i16x16();
461        let b = b.as_i16x16();
462
463        let r: i16x16 = simd_shuffle!(
464            a,
465            b,
466            [
467                [0, 16, 0, 16][IMM8 as usize & 0b11],
468                [1, 1, 17, 17][IMM8 as usize & 0b11],
469                [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
470                [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
471                [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
472                [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
473                [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
474                [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
475                [8, 24, 8, 24][IMM8 as usize & 0b11],
476                [9, 9, 25, 25][IMM8 as usize & 0b11],
477                [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
478                [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
479                [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
480                [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
481                [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
482                [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
483            ],
484        );
485        transmute(r)
486    }
487}
488
489#[inline]
493#[target_feature(enable = "avx2")]
494#[cfg_attr(test, assert_instr(vpblendvb))]
495#[stable(feature = "simd_x86", since = "1.27.0")]
496pub fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
497    unsafe {
498        let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO);
499        transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32()))
500    }
501}
502
503#[inline]
508#[target_feature(enable = "avx2")]
509#[cfg_attr(test, assert_instr(vpbroadcastb))]
510#[stable(feature = "simd_x86", since = "1.27.0")]
511pub fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
512    unsafe {
513        let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
514        transmute::<i8x16, _>(ret)
515    }
516}
517
518#[inline]
523#[target_feature(enable = "avx2")]
524#[cfg_attr(test, assert_instr(vpbroadcastb))]
525#[stable(feature = "simd_x86", since = "1.27.0")]
526pub fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
527    unsafe {
528        let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
529        transmute::<i8x32, _>(ret)
530    }
531}
532
533#[inline]
540#[target_feature(enable = "avx2")]
541#[cfg_attr(test, assert_instr(vbroadcastss))]
542#[stable(feature = "simd_x86", since = "1.27.0")]
543pub fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
544    unsafe {
545        let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
546        transmute::<i32x4, _>(ret)
547    }
548}
549
550#[inline]
557#[target_feature(enable = "avx2")]
558#[cfg_attr(test, assert_instr(vbroadcastss))]
559#[stable(feature = "simd_x86", since = "1.27.0")]
560pub fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
561    unsafe {
562        let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
563        transmute::<i32x8, _>(ret)
564    }
565}
566
567#[inline]
572#[target_feature(enable = "avx2")]
573#[cfg_attr(test, assert_instr(vmovddup))]
576#[stable(feature = "simd_x86", since = "1.27.0")]
577pub fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
578    unsafe {
579        let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
580        transmute::<i64x2, _>(ret)
581    }
582}
583
584#[inline]
589#[target_feature(enable = "avx2")]
590#[cfg_attr(test, assert_instr(vbroadcastsd))]
591#[stable(feature = "simd_x86", since = "1.27.0")]
592pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
593    unsafe {
594        let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
595        transmute::<i64x4, _>(ret)
596    }
597}
598
599#[inline]
604#[target_feature(enable = "avx2")]
605#[cfg_attr(test, assert_instr(vmovddup))]
606#[stable(feature = "simd_x86", since = "1.27.0")]
607pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
608    unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
609}
610
611#[inline]
616#[target_feature(enable = "avx2")]
617#[cfg_attr(test, assert_instr(vbroadcastsd))]
618#[stable(feature = "simd_x86", since = "1.27.0")]
619pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
620    unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
621}
622
623#[inline]
628#[target_feature(enable = "avx2")]
629#[stable(feature = "simd_x86_updates", since = "1.82.0")]
630pub fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
631    unsafe {
632        let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
633        transmute::<i64x4, _>(ret)
634    }
635}
636
637#[inline]
644#[target_feature(enable = "avx2")]
645#[stable(feature = "simd_x86", since = "1.27.0")]
646pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
647    unsafe {
648        let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
649        transmute::<i64x4, _>(ret)
650    }
651}
652
653#[inline]
658#[target_feature(enable = "avx2")]
659#[cfg_attr(test, assert_instr(vbroadcastss))]
660#[stable(feature = "simd_x86", since = "1.27.0")]
661pub fn _mm_broadcastss_ps(a: __m128) -> __m128 {
662    unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
663}
664
665#[inline]
670#[target_feature(enable = "avx2")]
671#[cfg_attr(test, assert_instr(vbroadcastss))]
672#[stable(feature = "simd_x86", since = "1.27.0")]
673pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
674    unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
675}
676
677#[inline]
682#[target_feature(enable = "avx2")]
683#[cfg_attr(test, assert_instr(vpbroadcastw))]
684#[stable(feature = "simd_x86", since = "1.27.0")]
685pub fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
686    unsafe {
687        let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
688        transmute::<i16x8, _>(ret)
689    }
690}
691
692#[inline]
697#[target_feature(enable = "avx2")]
698#[cfg_attr(test, assert_instr(vpbroadcastw))]
699#[stable(feature = "simd_x86", since = "1.27.0")]
700pub fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
701    unsafe {
702        let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
703        transmute::<i16x16, _>(ret)
704    }
705}
706
707#[inline]
711#[target_feature(enable = "avx2")]
712#[cfg_attr(test, assert_instr(vpcmpeqq))]
713#[stable(feature = "simd_x86", since = "1.27.0")]
714pub fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
715    unsafe { transmute::<i64x4, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
716}
717
718#[inline]
722#[target_feature(enable = "avx2")]
723#[cfg_attr(test, assert_instr(vpcmpeqd))]
724#[stable(feature = "simd_x86", since = "1.27.0")]
725pub fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
726    unsafe { transmute::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
727}
728
729#[inline]
733#[target_feature(enable = "avx2")]
734#[cfg_attr(test, assert_instr(vpcmpeqw))]
735#[stable(feature = "simd_x86", since = "1.27.0")]
736pub fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
737    unsafe { transmute::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
738}
739
740#[inline]
744#[target_feature(enable = "avx2")]
745#[cfg_attr(test, assert_instr(vpcmpeqb))]
746#[stable(feature = "simd_x86", since = "1.27.0")]
747pub fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
748    unsafe { transmute::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
749}
750
751#[inline]
755#[target_feature(enable = "avx2")]
756#[cfg_attr(test, assert_instr(vpcmpgtq))]
757#[stable(feature = "simd_x86", since = "1.27.0")]
758pub fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
759    unsafe { transmute::<i64x4, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
760}
761
762#[inline]
766#[target_feature(enable = "avx2")]
767#[cfg_attr(test, assert_instr(vpcmpgtd))]
768#[stable(feature = "simd_x86", since = "1.27.0")]
769pub fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
770    unsafe { transmute::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
771}
772
773#[inline]
777#[target_feature(enable = "avx2")]
778#[cfg_attr(test, assert_instr(vpcmpgtw))]
779#[stable(feature = "simd_x86", since = "1.27.0")]
780pub fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
781    unsafe { transmute::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
782}
783
784#[inline]
788#[target_feature(enable = "avx2")]
789#[cfg_attr(test, assert_instr(vpcmpgtb))]
790#[stable(feature = "simd_x86", since = "1.27.0")]
791pub fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
792    unsafe { transmute::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
793}
794
795#[inline]
799#[target_feature(enable = "avx2")]
800#[cfg_attr(test, assert_instr(vpmovsxwd))]
801#[stable(feature = "simd_x86", since = "1.27.0")]
802pub fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
803    unsafe { transmute::<i32x8, _>(simd_cast(a.as_i16x8())) }
804}
805
806#[inline]
810#[target_feature(enable = "avx2")]
811#[cfg_attr(test, assert_instr(vpmovsxwq))]
812#[stable(feature = "simd_x86", since = "1.27.0")]
813pub fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
814    unsafe {
815        let a = a.as_i16x8();
816        let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
817        transmute::<i64x4, _>(simd_cast(v64))
818    }
819}
820
821#[inline]
825#[target_feature(enable = "avx2")]
826#[cfg_attr(test, assert_instr(vpmovsxdq))]
827#[stable(feature = "simd_x86", since = "1.27.0")]
828pub fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
829    unsafe { transmute::<i64x4, _>(simd_cast(a.as_i32x4())) }
830}
831
832#[inline]
836#[target_feature(enable = "avx2")]
837#[cfg_attr(test, assert_instr(vpmovsxbw))]
838#[stable(feature = "simd_x86", since = "1.27.0")]
839pub fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
840    unsafe { transmute::<i16x16, _>(simd_cast(a.as_i8x16())) }
841}
842
843#[inline]
847#[target_feature(enable = "avx2")]
848#[cfg_attr(test, assert_instr(vpmovsxbd))]
849#[stable(feature = "simd_x86", since = "1.27.0")]
850pub fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
851    unsafe {
852        let a = a.as_i8x16();
853        let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
854        transmute::<i32x8, _>(simd_cast(v64))
855    }
856}
857
858#[inline]
862#[target_feature(enable = "avx2")]
863#[cfg_attr(test, assert_instr(vpmovsxbq))]
864#[stable(feature = "simd_x86", since = "1.27.0")]
865pub fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
866    unsafe {
867        let a = a.as_i8x16();
868        let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
869        transmute::<i64x4, _>(simd_cast(v32))
870    }
871}
872
873#[inline]
878#[target_feature(enable = "avx2")]
879#[cfg_attr(test, assert_instr(vpmovzxwd))]
880#[stable(feature = "simd_x86", since = "1.27.0")]
881pub fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
882    unsafe { transmute::<i32x8, _>(simd_cast(a.as_u16x8())) }
883}
884
885#[inline]
890#[target_feature(enable = "avx2")]
891#[cfg_attr(test, assert_instr(vpmovzxwq))]
892#[stable(feature = "simd_x86", since = "1.27.0")]
893pub fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
894    unsafe {
895        let a = a.as_u16x8();
896        let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
897        transmute::<i64x4, _>(simd_cast(v64))
898    }
899}
900
901#[inline]
905#[target_feature(enable = "avx2")]
906#[cfg_attr(test, assert_instr(vpmovzxdq))]
907#[stable(feature = "simd_x86", since = "1.27.0")]
908pub fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
909    unsafe { transmute::<i64x4, _>(simd_cast(a.as_u32x4())) }
910}
911
912#[inline]
916#[target_feature(enable = "avx2")]
917#[cfg_attr(test, assert_instr(vpmovzxbw))]
918#[stable(feature = "simd_x86", since = "1.27.0")]
919pub fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
920    unsafe { transmute::<i16x16, _>(simd_cast(a.as_u8x16())) }
921}
922
923#[inline]
928#[target_feature(enable = "avx2")]
929#[cfg_attr(test, assert_instr(vpmovzxbd))]
930#[stable(feature = "simd_x86", since = "1.27.0")]
931pub fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
932    unsafe {
933        let a = a.as_u8x16();
934        let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
935        transmute::<i32x8, _>(simd_cast(v64))
936    }
937}
938
939#[inline]
944#[target_feature(enable = "avx2")]
945#[cfg_attr(test, assert_instr(vpmovzxbq))]
946#[stable(feature = "simd_x86", since = "1.27.0")]
947pub fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
948    unsafe {
949        let a = a.as_u8x16();
950        let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
951        transmute::<i64x4, _>(simd_cast(v32))
952    }
953}
954
955#[inline]
959#[target_feature(enable = "avx2")]
960#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
961#[rustc_legacy_const_generics(1)]
962#[stable(feature = "simd_x86", since = "1.27.0")]
963pub fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
964    static_assert_uimm_bits!(IMM1, 1);
965    unsafe {
966        let a = a.as_i64x4();
967        let b = i64x4::ZERO;
968        let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
969        transmute(dst)
970    }
971}
972
973#[inline]
977#[target_feature(enable = "avx2")]
978#[cfg_attr(test, assert_instr(vphaddw))]
979#[stable(feature = "simd_x86", since = "1.27.0")]
980pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
981    unsafe { transmute(phaddw(a.as_i16x16(), b.as_i16x16())) }
982}
983
984#[inline]
988#[target_feature(enable = "avx2")]
989#[cfg_attr(test, assert_instr(vphaddd))]
990#[stable(feature = "simd_x86", since = "1.27.0")]
991pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
992    unsafe { transmute(phaddd(a.as_i32x8(), b.as_i32x8())) }
993}
994
995#[inline]
1000#[target_feature(enable = "avx2")]
1001#[cfg_attr(test, assert_instr(vphaddsw))]
1002#[stable(feature = "simd_x86", since = "1.27.0")]
1003pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
1004    unsafe { transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) }
1005}
1006
1007#[inline]
1011#[target_feature(enable = "avx2")]
1012#[cfg_attr(test, assert_instr(vphsubw))]
1013#[stable(feature = "simd_x86", since = "1.27.0")]
1014pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
1015    unsafe { transmute(phsubw(a.as_i16x16(), b.as_i16x16())) }
1016}
1017
1018#[inline]
1022#[target_feature(enable = "avx2")]
1023#[cfg_attr(test, assert_instr(vphsubd))]
1024#[stable(feature = "simd_x86", since = "1.27.0")]
1025pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
1026    unsafe { transmute(phsubd(a.as_i32x8(), b.as_i32x8())) }
1027}
1028
1029#[inline]
1034#[target_feature(enable = "avx2")]
1035#[cfg_attr(test, assert_instr(vphsubsw))]
1036#[stable(feature = "simd_x86", since = "1.27.0")]
1037pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1038    unsafe { transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) }
1039}
1040
1041#[inline]
1047#[target_feature(enable = "avx2")]
1048#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1049#[rustc_legacy_const_generics(2)]
1050#[stable(feature = "simd_x86", since = "1.27.0")]
1051pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
1052    slice: *const i32,
1053    offsets: __m128i,
1054) -> __m128i {
1055    static_assert_imm8_scale!(SCALE);
1056    let zero = i32x4::ZERO;
1057    let neg_one = _mm_set1_epi32(-1).as_i32x4();
1058    let offsets = offsets.as_i32x4();
1059    let slice = slice as *const i8;
1060    let r = pgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1061    transmute(r)
1062}
1063
1064#[inline]
1071#[target_feature(enable = "avx2")]
1072#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1073#[rustc_legacy_const_generics(4)]
1074#[stable(feature = "simd_x86", since = "1.27.0")]
1075pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1076    src: __m128i,
1077    slice: *const i32,
1078    offsets: __m128i,
1079    mask: __m128i,
1080) -> __m128i {
1081    static_assert_imm8_scale!(SCALE);
1082    let src = src.as_i32x4();
1083    let mask = mask.as_i32x4();
1084    let offsets = offsets.as_i32x4();
1085    let slice = slice as *const i8;
1086    let r = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1087    transmute(r)
1088}
1089
1090#[inline]
1096#[target_feature(enable = "avx2")]
1097#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1098#[rustc_legacy_const_generics(2)]
1099#[stable(feature = "simd_x86", since = "1.27.0")]
1100pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1101    slice: *const i32,
1102    offsets: __m256i,
1103) -> __m256i {
1104    static_assert_imm8_scale!(SCALE);
1105    let zero = i32x8::ZERO;
1106    let neg_one = _mm256_set1_epi32(-1).as_i32x8();
1107    let offsets = offsets.as_i32x8();
1108    let slice = slice as *const i8;
1109    let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1110    transmute(r)
1111}
1112
1113#[inline]
1120#[target_feature(enable = "avx2")]
1121#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1122#[rustc_legacy_const_generics(4)]
1123#[stable(feature = "simd_x86", since = "1.27.0")]
1124pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1125    src: __m256i,
1126    slice: *const i32,
1127    offsets: __m256i,
1128    mask: __m256i,
1129) -> __m256i {
1130    static_assert_imm8_scale!(SCALE);
1131    let src = src.as_i32x8();
1132    let mask = mask.as_i32x8();
1133    let offsets = offsets.as_i32x8();
1134    let slice = slice as *const i8;
1135    let r = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1136    transmute(r)
1137}
1138
1139#[inline]
1145#[target_feature(enable = "avx2")]
1146#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1147#[rustc_legacy_const_generics(2)]
1148#[stable(feature = "simd_x86", since = "1.27.0")]
1149pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1150    static_assert_imm8_scale!(SCALE);
1151    let zero = _mm_setzero_ps();
1152    let neg_one = _mm_set1_ps(-1.0);
1153    let offsets = offsets.as_i32x4();
1154    let slice = slice as *const i8;
1155    pgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1156}
1157
1158#[inline]
1165#[target_feature(enable = "avx2")]
1166#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1167#[rustc_legacy_const_generics(4)]
1168#[stable(feature = "simd_x86", since = "1.27.0")]
1169pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1170    src: __m128,
1171    slice: *const f32,
1172    offsets: __m128i,
1173    mask: __m128,
1174) -> __m128 {
1175    static_assert_imm8_scale!(SCALE);
1176    let offsets = offsets.as_i32x4();
1177    let slice = slice as *const i8;
1178    pgatherdps(src, slice, offsets, mask, SCALE as i8)
1179}
1180
1181#[inline]
1187#[target_feature(enable = "avx2")]
1188#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1189#[rustc_legacy_const_generics(2)]
1190#[stable(feature = "simd_x86", since = "1.27.0")]
1191pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1192    static_assert_imm8_scale!(SCALE);
1193    let zero = _mm256_setzero_ps();
1194    let neg_one = _mm256_set1_ps(-1.0);
1195    let offsets = offsets.as_i32x8();
1196    let slice = slice as *const i8;
1197    vpgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1198}
1199
1200#[inline]
1207#[target_feature(enable = "avx2")]
1208#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1209#[rustc_legacy_const_generics(4)]
1210#[stable(feature = "simd_x86", since = "1.27.0")]
1211pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1212    src: __m256,
1213    slice: *const f32,
1214    offsets: __m256i,
1215    mask: __m256,
1216) -> __m256 {
1217    static_assert_imm8_scale!(SCALE);
1218    let offsets = offsets.as_i32x8();
1219    let slice = slice as *const i8;
1220    vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1221}
1222
1223#[inline]
1229#[target_feature(enable = "avx2")]
1230#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1231#[rustc_legacy_const_generics(2)]
1232#[stable(feature = "simd_x86", since = "1.27.0")]
1233pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1234    slice: *const i64,
1235    offsets: __m128i,
1236) -> __m128i {
1237    static_assert_imm8_scale!(SCALE);
1238    let zero = i64x2::ZERO;
1239    let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1240    let offsets = offsets.as_i32x4();
1241    let slice = slice as *const i8;
1242    let r = pgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1243    transmute(r)
1244}
1245
1246#[inline]
1253#[target_feature(enable = "avx2")]
1254#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1255#[rustc_legacy_const_generics(4)]
1256#[stable(feature = "simd_x86", since = "1.27.0")]
1257pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1258    src: __m128i,
1259    slice: *const i64,
1260    offsets: __m128i,
1261    mask: __m128i,
1262) -> __m128i {
1263    static_assert_imm8_scale!(SCALE);
1264    let src = src.as_i64x2();
1265    let mask = mask.as_i64x2();
1266    let offsets = offsets.as_i32x4();
1267    let slice = slice as *const i8;
1268    let r = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1269    transmute(r)
1270}
1271
1272#[inline]
1278#[target_feature(enable = "avx2")]
1279#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1280#[rustc_legacy_const_generics(2)]
1281#[stable(feature = "simd_x86", since = "1.27.0")]
1282pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1283    slice: *const i64,
1284    offsets: __m128i,
1285) -> __m256i {
1286    static_assert_imm8_scale!(SCALE);
1287    let zero = i64x4::ZERO;
1288    let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1289    let offsets = offsets.as_i32x4();
1290    let slice = slice as *const i8;
1291    let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1292    transmute(r)
1293}
1294
1295#[inline]
1302#[target_feature(enable = "avx2")]
1303#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1304#[rustc_legacy_const_generics(4)]
1305#[stable(feature = "simd_x86", since = "1.27.0")]
1306pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1307    src: __m256i,
1308    slice: *const i64,
1309    offsets: __m128i,
1310    mask: __m256i,
1311) -> __m256i {
1312    static_assert_imm8_scale!(SCALE);
1313    let src = src.as_i64x4();
1314    let mask = mask.as_i64x4();
1315    let offsets = offsets.as_i32x4();
1316    let slice = slice as *const i8;
1317    let r = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1318    transmute(r)
1319}
1320
1321#[inline]
1327#[target_feature(enable = "avx2")]
1328#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1329#[rustc_legacy_const_generics(2)]
1330#[stable(feature = "simd_x86", since = "1.27.0")]
1331pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1332    static_assert_imm8_scale!(SCALE);
1333    let zero = _mm_setzero_pd();
1334    let neg_one = _mm_set1_pd(-1.0);
1335    let offsets = offsets.as_i32x4();
1336    let slice = slice as *const i8;
1337    pgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1338}
1339
1340#[inline]
1347#[target_feature(enable = "avx2")]
1348#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1349#[rustc_legacy_const_generics(4)]
1350#[stable(feature = "simd_x86", since = "1.27.0")]
1351pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1352    src: __m128d,
1353    slice: *const f64,
1354    offsets: __m128i,
1355    mask: __m128d,
1356) -> __m128d {
1357    static_assert_imm8_scale!(SCALE);
1358    let offsets = offsets.as_i32x4();
1359    let slice = slice as *const i8;
1360    pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1361}
1362
1363#[inline]
1369#[target_feature(enable = "avx2")]
1370#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1371#[rustc_legacy_const_generics(2)]
1372#[stable(feature = "simd_x86", since = "1.27.0")]
1373pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1374    slice: *const f64,
1375    offsets: __m128i,
1376) -> __m256d {
1377    static_assert_imm8_scale!(SCALE);
1378    let zero = _mm256_setzero_pd();
1379    let neg_one = _mm256_set1_pd(-1.0);
1380    let offsets = offsets.as_i32x4();
1381    let slice = slice as *const i8;
1382    vpgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1383}
1384
1385#[inline]
1392#[target_feature(enable = "avx2")]
1393#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1394#[rustc_legacy_const_generics(4)]
1395#[stable(feature = "simd_x86", since = "1.27.0")]
1396pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1397    src: __m256d,
1398    slice: *const f64,
1399    offsets: __m128i,
1400    mask: __m256d,
1401) -> __m256d {
1402    static_assert_imm8_scale!(SCALE);
1403    let offsets = offsets.as_i32x4();
1404    let slice = slice as *const i8;
1405    vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1406}
1407
1408#[inline]
1414#[target_feature(enable = "avx2")]
1415#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1416#[rustc_legacy_const_generics(2)]
1417#[stable(feature = "simd_x86", since = "1.27.0")]
1418pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1419    slice: *const i32,
1420    offsets: __m128i,
1421) -> __m128i {
1422    static_assert_imm8_scale!(SCALE);
1423    let zero = i32x4::ZERO;
1424    let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1425    let offsets = offsets.as_i64x2();
1426    let slice = slice as *const i8;
1427    let r = pgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1428    transmute(r)
1429}
1430
1431#[inline]
1438#[target_feature(enable = "avx2")]
1439#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1440#[rustc_legacy_const_generics(4)]
1441#[stable(feature = "simd_x86", since = "1.27.0")]
1442pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1443    src: __m128i,
1444    slice: *const i32,
1445    offsets: __m128i,
1446    mask: __m128i,
1447) -> __m128i {
1448    static_assert_imm8_scale!(SCALE);
1449    let src = src.as_i32x4();
1450    let mask = mask.as_i32x4();
1451    let offsets = offsets.as_i64x2();
1452    let slice = slice as *const i8;
1453    let r = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1454    transmute(r)
1455}
1456
1457#[inline]
1463#[target_feature(enable = "avx2")]
1464#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1465#[rustc_legacy_const_generics(2)]
1466#[stable(feature = "simd_x86", since = "1.27.0")]
1467pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1468    slice: *const i32,
1469    offsets: __m256i,
1470) -> __m128i {
1471    static_assert_imm8_scale!(SCALE);
1472    let zero = i32x4::ZERO;
1473    let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1474    let offsets = offsets.as_i64x4();
1475    let slice = slice as *const i8;
1476    let r = vpgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1477    transmute(r)
1478}
1479
1480#[inline]
1487#[target_feature(enable = "avx2")]
1488#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1489#[rustc_legacy_const_generics(4)]
1490#[stable(feature = "simd_x86", since = "1.27.0")]
1491pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1492    src: __m128i,
1493    slice: *const i32,
1494    offsets: __m256i,
1495    mask: __m128i,
1496) -> __m128i {
1497    static_assert_imm8_scale!(SCALE);
1498    let src = src.as_i32x4();
1499    let mask = mask.as_i32x4();
1500    let offsets = offsets.as_i64x4();
1501    let slice = slice as *const i8;
1502    let r = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1503    transmute(r)
1504}
1505
1506#[inline]
1512#[target_feature(enable = "avx2")]
1513#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1514#[rustc_legacy_const_generics(2)]
1515#[stable(feature = "simd_x86", since = "1.27.0")]
1516pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1517    static_assert_imm8_scale!(SCALE);
1518    let zero = _mm_setzero_ps();
1519    let neg_one = _mm_set1_ps(-1.0);
1520    let offsets = offsets.as_i64x2();
1521    let slice = slice as *const i8;
1522    pgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1523}
1524
1525#[inline]
1532#[target_feature(enable = "avx2")]
1533#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1534#[rustc_legacy_const_generics(4)]
1535#[stable(feature = "simd_x86", since = "1.27.0")]
1536pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1537    src: __m128,
1538    slice: *const f32,
1539    offsets: __m128i,
1540    mask: __m128,
1541) -> __m128 {
1542    static_assert_imm8_scale!(SCALE);
1543    let offsets = offsets.as_i64x2();
1544    let slice = slice as *const i8;
1545    pgatherqps(src, slice, offsets, mask, SCALE as i8)
1546}
1547
1548#[inline]
1554#[target_feature(enable = "avx2")]
1555#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1556#[rustc_legacy_const_generics(2)]
1557#[stable(feature = "simd_x86", since = "1.27.0")]
1558pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1559    static_assert_imm8_scale!(SCALE);
1560    let zero = _mm_setzero_ps();
1561    let neg_one = _mm_set1_ps(-1.0);
1562    let offsets = offsets.as_i64x4();
1563    let slice = slice as *const i8;
1564    vpgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1565}
1566
1567#[inline]
1574#[target_feature(enable = "avx2")]
1575#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1576#[rustc_legacy_const_generics(4)]
1577#[stable(feature = "simd_x86", since = "1.27.0")]
1578pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1579    src: __m128,
1580    slice: *const f32,
1581    offsets: __m256i,
1582    mask: __m128,
1583) -> __m128 {
1584    static_assert_imm8_scale!(SCALE);
1585    let offsets = offsets.as_i64x4();
1586    let slice = slice as *const i8;
1587    vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1588}
1589
1590#[inline]
1596#[target_feature(enable = "avx2")]
1597#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1598#[rustc_legacy_const_generics(2)]
1599#[stable(feature = "simd_x86", since = "1.27.0")]
1600pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1601    slice: *const i64,
1602    offsets: __m128i,
1603) -> __m128i {
1604    static_assert_imm8_scale!(SCALE);
1605    let zero = i64x2::ZERO;
1606    let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1607    let slice = slice as *const i8;
1608    let offsets = offsets.as_i64x2();
1609    let r = pgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1610    transmute(r)
1611}
1612
1613#[inline]
1620#[target_feature(enable = "avx2")]
1621#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1622#[rustc_legacy_const_generics(4)]
1623#[stable(feature = "simd_x86", since = "1.27.0")]
1624pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1625    src: __m128i,
1626    slice: *const i64,
1627    offsets: __m128i,
1628    mask: __m128i,
1629) -> __m128i {
1630    static_assert_imm8_scale!(SCALE);
1631    let src = src.as_i64x2();
1632    let mask = mask.as_i64x2();
1633    let offsets = offsets.as_i64x2();
1634    let slice = slice as *const i8;
1635    let r = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1636    transmute(r)
1637}
1638
1639#[inline]
1645#[target_feature(enable = "avx2")]
1646#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1647#[rustc_legacy_const_generics(2)]
1648#[stable(feature = "simd_x86", since = "1.27.0")]
1649pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1650    slice: *const i64,
1651    offsets: __m256i,
1652) -> __m256i {
1653    static_assert_imm8_scale!(SCALE);
1654    let zero = i64x4::ZERO;
1655    let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1656    let slice = slice as *const i8;
1657    let offsets = offsets.as_i64x4();
1658    let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1659    transmute(r)
1660}
1661
1662#[inline]
1669#[target_feature(enable = "avx2")]
1670#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1671#[rustc_legacy_const_generics(4)]
1672#[stable(feature = "simd_x86", since = "1.27.0")]
1673pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1674    src: __m256i,
1675    slice: *const i64,
1676    offsets: __m256i,
1677    mask: __m256i,
1678) -> __m256i {
1679    static_assert_imm8_scale!(SCALE);
1680    let src = src.as_i64x4();
1681    let mask = mask.as_i64x4();
1682    let offsets = offsets.as_i64x4();
1683    let slice = slice as *const i8;
1684    let r = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1685    transmute(r)
1686}
1687
1688#[inline]
1694#[target_feature(enable = "avx2")]
1695#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1696#[rustc_legacy_const_generics(2)]
1697#[stable(feature = "simd_x86", since = "1.27.0")]
1698pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1699    static_assert_imm8_scale!(SCALE);
1700    let zero = _mm_setzero_pd();
1701    let neg_one = _mm_set1_pd(-1.0);
1702    let slice = slice as *const i8;
1703    let offsets = offsets.as_i64x2();
1704    pgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1705}
1706
1707#[inline]
1714#[target_feature(enable = "avx2")]
1715#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1716#[rustc_legacy_const_generics(4)]
1717#[stable(feature = "simd_x86", since = "1.27.0")]
1718pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1719    src: __m128d,
1720    slice: *const f64,
1721    offsets: __m128i,
1722    mask: __m128d,
1723) -> __m128d {
1724    static_assert_imm8_scale!(SCALE);
1725    let slice = slice as *const i8;
1726    let offsets = offsets.as_i64x2();
1727    pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1728}
1729
1730#[inline]
1736#[target_feature(enable = "avx2")]
1737#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1738#[rustc_legacy_const_generics(2)]
1739#[stable(feature = "simd_x86", since = "1.27.0")]
1740pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1741    slice: *const f64,
1742    offsets: __m256i,
1743) -> __m256d {
1744    static_assert_imm8_scale!(SCALE);
1745    let zero = _mm256_setzero_pd();
1746    let neg_one = _mm256_set1_pd(-1.0);
1747    let slice = slice as *const i8;
1748    let offsets = offsets.as_i64x4();
1749    vpgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1750}
1751
1752#[inline]
1759#[target_feature(enable = "avx2")]
1760#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1761#[rustc_legacy_const_generics(4)]
1762#[stable(feature = "simd_x86", since = "1.27.0")]
1763pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1764    src: __m256d,
1765    slice: *const f64,
1766    offsets: __m256i,
1767    mask: __m256d,
1768) -> __m256d {
1769    static_assert_imm8_scale!(SCALE);
1770    let slice = slice as *const i8;
1771    let offsets = offsets.as_i64x4();
1772    vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1773}
1774
1775#[inline]
1780#[target_feature(enable = "avx2")]
1781#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1782#[rustc_legacy_const_generics(2)]
1783#[stable(feature = "simd_x86", since = "1.27.0")]
1784pub fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1785    static_assert_uimm_bits!(IMM1, 1);
1786    unsafe {
1787        let a = a.as_i64x4();
1788        let b = _mm256_castsi128_si256(b).as_i64x4();
1789        let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1790        transmute(dst)
1791    }
1792}
1793
1794#[inline]
1800#[target_feature(enable = "avx2")]
1801#[cfg_attr(test, assert_instr(vpmaddwd))]
1802#[stable(feature = "simd_x86", since = "1.27.0")]
1803pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1804    unsafe { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) }
1805}
1806
1807#[inline]
1814#[target_feature(enable = "avx2")]
1815#[cfg_attr(test, assert_instr(vpmaddubsw))]
1816#[stable(feature = "simd_x86", since = "1.27.0")]
1817pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1818    unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) }
1819}
1820
1821#[inline]
1827#[target_feature(enable = "avx2")]
1828#[cfg_attr(test, assert_instr(vpmaskmovd))]
1829#[stable(feature = "simd_x86", since = "1.27.0")]
1830pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1831    transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4()))
1832}
1833
1834#[inline]
1840#[target_feature(enable = "avx2")]
1841#[cfg_attr(test, assert_instr(vpmaskmovd))]
1842#[stable(feature = "simd_x86", since = "1.27.0")]
1843pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1844    transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8()))
1845}
1846
1847#[inline]
1853#[target_feature(enable = "avx2")]
1854#[cfg_attr(test, assert_instr(vpmaskmovq))]
1855#[stable(feature = "simd_x86", since = "1.27.0")]
1856pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1857    transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2()))
1858}
1859
1860#[inline]
1866#[target_feature(enable = "avx2")]
1867#[cfg_attr(test, assert_instr(vpmaskmovq))]
1868#[stable(feature = "simd_x86", since = "1.27.0")]
1869pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1870    transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4()))
1871}
1872
1873#[inline]
1879#[target_feature(enable = "avx2")]
1880#[cfg_attr(test, assert_instr(vpmaskmovd))]
1881#[stable(feature = "simd_x86", since = "1.27.0")]
1882pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1883    maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4())
1884}
1885
1886#[inline]
1892#[target_feature(enable = "avx2")]
1893#[cfg_attr(test, assert_instr(vpmaskmovd))]
1894#[stable(feature = "simd_x86", since = "1.27.0")]
1895pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1896    maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8())
1897}
1898
1899#[inline]
1905#[target_feature(enable = "avx2")]
1906#[cfg_attr(test, assert_instr(vpmaskmovq))]
1907#[stable(feature = "simd_x86", since = "1.27.0")]
1908pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1909    maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2())
1910}
1911
1912#[inline]
1918#[target_feature(enable = "avx2")]
1919#[cfg_attr(test, assert_instr(vpmaskmovq))]
1920#[stable(feature = "simd_x86", since = "1.27.0")]
1921pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1922    maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4())
1923}
1924
1925#[inline]
1930#[target_feature(enable = "avx2")]
1931#[cfg_attr(test, assert_instr(vpmaxsw))]
1932#[stable(feature = "simd_x86", since = "1.27.0")]
1933pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1934    unsafe {
1935        let a = a.as_i16x16();
1936        let b = b.as_i16x16();
1937        transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1938    }
1939}
1940
1941#[inline]
1946#[target_feature(enable = "avx2")]
1947#[cfg_attr(test, assert_instr(vpmaxsd))]
1948#[stable(feature = "simd_x86", since = "1.27.0")]
1949pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1950    unsafe {
1951        let a = a.as_i32x8();
1952        let b = b.as_i32x8();
1953        transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1954    }
1955}
1956
1957#[inline]
1962#[target_feature(enable = "avx2")]
1963#[cfg_attr(test, assert_instr(vpmaxsb))]
1964#[stable(feature = "simd_x86", since = "1.27.0")]
1965pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1966    unsafe {
1967        let a = a.as_i8x32();
1968        let b = b.as_i8x32();
1969        transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1970    }
1971}
1972
1973#[inline]
1978#[target_feature(enable = "avx2")]
1979#[cfg_attr(test, assert_instr(vpmaxuw))]
1980#[stable(feature = "simd_x86", since = "1.27.0")]
1981pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1982    unsafe {
1983        let a = a.as_u16x16();
1984        let b = b.as_u16x16();
1985        transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1986    }
1987}
1988
1989#[inline]
1994#[target_feature(enable = "avx2")]
1995#[cfg_attr(test, assert_instr(vpmaxud))]
1996#[stable(feature = "simd_x86", since = "1.27.0")]
1997pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
1998    unsafe {
1999        let a = a.as_u32x8();
2000        let b = b.as_u32x8();
2001        transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
2002    }
2003}
2004
2005#[inline]
2010#[target_feature(enable = "avx2")]
2011#[cfg_attr(test, assert_instr(vpmaxub))]
2012#[stable(feature = "simd_x86", since = "1.27.0")]
2013pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
2014    unsafe {
2015        let a = a.as_u8x32();
2016        let b = b.as_u8x32();
2017        transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
2018    }
2019}
2020
2021#[inline]
2026#[target_feature(enable = "avx2")]
2027#[cfg_attr(test, assert_instr(vpminsw))]
2028#[stable(feature = "simd_x86", since = "1.27.0")]
2029pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
2030    unsafe {
2031        let a = a.as_i16x16();
2032        let b = b.as_i16x16();
2033        transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2034    }
2035}
2036
2037#[inline]
2042#[target_feature(enable = "avx2")]
2043#[cfg_attr(test, assert_instr(vpminsd))]
2044#[stable(feature = "simd_x86", since = "1.27.0")]
2045pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
2046    unsafe {
2047        let a = a.as_i32x8();
2048        let b = b.as_i32x8();
2049        transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2050    }
2051}
2052
2053#[inline]
2058#[target_feature(enable = "avx2")]
2059#[cfg_attr(test, assert_instr(vpminsb))]
2060#[stable(feature = "simd_x86", since = "1.27.0")]
2061pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
2062    unsafe {
2063        let a = a.as_i8x32();
2064        let b = b.as_i8x32();
2065        transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2066    }
2067}
2068
2069#[inline]
2074#[target_feature(enable = "avx2")]
2075#[cfg_attr(test, assert_instr(vpminuw))]
2076#[stable(feature = "simd_x86", since = "1.27.0")]
2077pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2078    unsafe {
2079        let a = a.as_u16x16();
2080        let b = b.as_u16x16();
2081        transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2082    }
2083}
2084
2085#[inline]
2090#[target_feature(enable = "avx2")]
2091#[cfg_attr(test, assert_instr(vpminud))]
2092#[stable(feature = "simd_x86", since = "1.27.0")]
2093pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2094    unsafe {
2095        let a = a.as_u32x8();
2096        let b = b.as_u32x8();
2097        transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2098    }
2099}
2100
2101#[inline]
2106#[target_feature(enable = "avx2")]
2107#[cfg_attr(test, assert_instr(vpminub))]
2108#[stable(feature = "simd_x86", since = "1.27.0")]
2109pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2110    unsafe {
2111        let a = a.as_u8x32();
2112        let b = b.as_u8x32();
2113        transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2114    }
2115}
2116
2117#[inline]
2122#[target_feature(enable = "avx2")]
2123#[cfg_attr(test, assert_instr(vpmovmskb))]
2124#[stable(feature = "simd_x86", since = "1.27.0")]
2125pub fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2126    unsafe {
2127        let z = i8x32::ZERO;
2128        let m: i8x32 = simd_lt(a.as_i8x32(), z);
2129        simd_bitmask::<_, u32>(m) as i32
2130    }
2131}
2132
2133#[inline]
2143#[target_feature(enable = "avx2")]
2144#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2145#[rustc_legacy_const_generics(2)]
2146#[stable(feature = "simd_x86", since = "1.27.0")]
2147pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2148    static_assert_uimm_bits!(IMM8, 8);
2149    unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8)) }
2150}
2151
2152#[inline]
2159#[target_feature(enable = "avx2")]
2160#[cfg_attr(test, assert_instr(vpmuldq))]
2161#[stable(feature = "simd_x86", since = "1.27.0")]
2162pub fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2163    unsafe {
2164        let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2165        let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2166        transmute(simd_mul(a, b))
2167    }
2168}
2169
2170#[inline]
2177#[target_feature(enable = "avx2")]
2178#[cfg_attr(test, assert_instr(vpmuludq))]
2179#[stable(feature = "simd_x86", since = "1.27.0")]
2180pub fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2181    unsafe {
2182        let a = a.as_u64x4();
2183        let b = b.as_u64x4();
2184        let mask = u64x4::splat(u32::MAX.into());
2185        transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
2186    }
2187}
2188
2189#[inline]
2195#[target_feature(enable = "avx2")]
2196#[cfg_attr(test, assert_instr(vpmulhw))]
2197#[stable(feature = "simd_x86", since = "1.27.0")]
2198pub fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2199    unsafe {
2200        let a = simd_cast::<_, i32x16>(a.as_i16x16());
2201        let b = simd_cast::<_, i32x16>(b.as_i16x16());
2202        let r = simd_shr(simd_mul(a, b), i32x16::splat(16));
2203        transmute(simd_cast::<i32x16, i16x16>(r))
2204    }
2205}
2206
2207#[inline]
2213#[target_feature(enable = "avx2")]
2214#[cfg_attr(test, assert_instr(vpmulhuw))]
2215#[stable(feature = "simd_x86", since = "1.27.0")]
2216pub fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2217    unsafe {
2218        let a = simd_cast::<_, u32x16>(a.as_u16x16());
2219        let b = simd_cast::<_, u32x16>(b.as_u16x16());
2220        let r = simd_shr(simd_mul(a, b), u32x16::splat(16));
2221        transmute(simd_cast::<u32x16, u16x16>(r))
2222    }
2223}
2224
2225#[inline]
2231#[target_feature(enable = "avx2")]
2232#[cfg_attr(test, assert_instr(vpmullw))]
2233#[stable(feature = "simd_x86", since = "1.27.0")]
2234pub fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2235    unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) }
2236}
2237
2238#[inline]
2244#[target_feature(enable = "avx2")]
2245#[cfg_attr(test, assert_instr(vpmulld))]
2246#[stable(feature = "simd_x86", since = "1.27.0")]
2247pub fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2248    unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) }
2249}
2250
2251#[inline]
2258#[target_feature(enable = "avx2")]
2259#[cfg_attr(test, assert_instr(vpmulhrsw))]
2260#[stable(feature = "simd_x86", since = "1.27.0")]
2261pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2262    unsafe { transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) }
2263}
2264
2265#[inline]
2270#[target_feature(enable = "avx2")]
2271#[cfg_attr(test, assert_instr(vorps))]
2272#[stable(feature = "simd_x86", since = "1.27.0")]
2273pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2274    unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
2275}
2276
2277#[inline]
2282#[target_feature(enable = "avx2")]
2283#[cfg_attr(test, assert_instr(vpacksswb))]
2284#[stable(feature = "simd_x86", since = "1.27.0")]
2285pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2286    unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) }
2287}
2288
2289#[inline]
2294#[target_feature(enable = "avx2")]
2295#[cfg_attr(test, assert_instr(vpackssdw))]
2296#[stable(feature = "simd_x86", since = "1.27.0")]
2297pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2298    unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) }
2299}
2300
2301#[inline]
2306#[target_feature(enable = "avx2")]
2307#[cfg_attr(test, assert_instr(vpackuswb))]
2308#[stable(feature = "simd_x86", since = "1.27.0")]
2309pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2310    unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) }
2311}
2312
2313#[inline]
2318#[target_feature(enable = "avx2")]
2319#[cfg_attr(test, assert_instr(vpackusdw))]
2320#[stable(feature = "simd_x86", since = "1.27.0")]
2321pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2322    unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) }
2323}
2324
2325#[inline]
2332#[target_feature(enable = "avx2")]
2333#[cfg_attr(test, assert_instr(vpermps))]
2334#[stable(feature = "simd_x86", since = "1.27.0")]
2335pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2336    unsafe { transmute(permd(a.as_u32x8(), b.as_u32x8())) }
2337}
2338
2339#[inline]
2343#[target_feature(enable = "avx2")]
2344#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2345#[rustc_legacy_const_generics(1)]
2346#[stable(feature = "simd_x86", since = "1.27.0")]
2347pub fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2348    static_assert_uimm_bits!(IMM8, 8);
2349    unsafe {
2350        let zero = i64x4::ZERO;
2351        let r: i64x4 = simd_shuffle!(
2352            a.as_i64x4(),
2353            zero,
2354            [
2355                IMM8 as u32 & 0b11,
2356                (IMM8 as u32 >> 2) & 0b11,
2357                (IMM8 as u32 >> 4) & 0b11,
2358                (IMM8 as u32 >> 6) & 0b11,
2359            ],
2360        );
2361        transmute(r)
2362    }
2363}
2364
2365#[inline]
2369#[target_feature(enable = "avx2")]
2370#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2371#[rustc_legacy_const_generics(2)]
2372#[stable(feature = "simd_x86", since = "1.27.0")]
2373pub fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2374    static_assert_uimm_bits!(IMM8, 8);
2375    unsafe { transmute(vperm2i128(a.as_i64x4(), b.as_i64x4(), IMM8 as i8)) }
2376}
2377
2378#[inline]
2383#[target_feature(enable = "avx2")]
2384#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2385#[rustc_legacy_const_generics(1)]
2386#[stable(feature = "simd_x86", since = "1.27.0")]
2387pub fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2388    static_assert_uimm_bits!(IMM8, 8);
2389    unsafe {
2390        simd_shuffle!(
2391            a,
2392            _mm256_undefined_pd(),
2393            [
2394                IMM8 as u32 & 0b11,
2395                (IMM8 as u32 >> 2) & 0b11,
2396                (IMM8 as u32 >> 4) & 0b11,
2397                (IMM8 as u32 >> 6) & 0b11,
2398            ],
2399        )
2400    }
2401}
2402
2403#[inline]
2408#[target_feature(enable = "avx2")]
2409#[cfg_attr(test, assert_instr(vpermps))]
2410#[stable(feature = "simd_x86", since = "1.27.0")]
2411pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2412    unsafe { permps(a, idx.as_i32x8()) }
2413}
2414
2415#[inline]
2422#[target_feature(enable = "avx2")]
2423#[cfg_attr(test, assert_instr(vpsadbw))]
2424#[stable(feature = "simd_x86", since = "1.27.0")]
2425pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2426    unsafe { transmute(psadbw(a.as_u8x32(), b.as_u8x32())) }
2427}
2428
2429#[inline]
2460#[target_feature(enable = "avx2")]
2461#[cfg_attr(test, assert_instr(vpshufb))]
2462#[stable(feature = "simd_x86", since = "1.27.0")]
2463pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2464    unsafe { transmute(pshufb(a.as_u8x32(), b.as_u8x32())) }
2465}
2466
2467#[inline]
2498#[target_feature(enable = "avx2")]
2499#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2500#[rustc_legacy_const_generics(1)]
2501#[stable(feature = "simd_x86", since = "1.27.0")]
2502pub fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2503    static_assert_uimm_bits!(MASK, 8);
2504    unsafe {
2505        let r: i32x8 = simd_shuffle!(
2506            a.as_i32x8(),
2507            a.as_i32x8(),
2508            [
2509                MASK as u32 & 0b11,
2510                (MASK as u32 >> 2) & 0b11,
2511                (MASK as u32 >> 4) & 0b11,
2512                (MASK as u32 >> 6) & 0b11,
2513                (MASK as u32 & 0b11) + 4,
2514                ((MASK as u32 >> 2) & 0b11) + 4,
2515                ((MASK as u32 >> 4) & 0b11) + 4,
2516                ((MASK as u32 >> 6) & 0b11) + 4,
2517            ],
2518        );
2519        transmute(r)
2520    }
2521}
2522
2523#[inline]
2529#[target_feature(enable = "avx2")]
2530#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2531#[rustc_legacy_const_generics(1)]
2532#[stable(feature = "simd_x86", since = "1.27.0")]
2533pub fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2534    static_assert_uimm_bits!(IMM8, 8);
2535    unsafe {
2536        let a = a.as_i16x16();
2537        let r: i16x16 = simd_shuffle!(
2538            a,
2539            a,
2540            [
2541                0,
2542                1,
2543                2,
2544                3,
2545                4 + (IMM8 as u32 & 0b11),
2546                4 + ((IMM8 as u32 >> 2) & 0b11),
2547                4 + ((IMM8 as u32 >> 4) & 0b11),
2548                4 + ((IMM8 as u32 >> 6) & 0b11),
2549                8,
2550                9,
2551                10,
2552                11,
2553                12 + (IMM8 as u32 & 0b11),
2554                12 + ((IMM8 as u32 >> 2) & 0b11),
2555                12 + ((IMM8 as u32 >> 4) & 0b11),
2556                12 + ((IMM8 as u32 >> 6) & 0b11),
2557            ],
2558        );
2559        transmute(r)
2560    }
2561}
2562
2563#[inline]
2569#[target_feature(enable = "avx2")]
2570#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2571#[rustc_legacy_const_generics(1)]
2572#[stable(feature = "simd_x86", since = "1.27.0")]
2573pub fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2574    static_assert_uimm_bits!(IMM8, 8);
2575    unsafe {
2576        let a = a.as_i16x16();
2577        let r: i16x16 = simd_shuffle!(
2578            a,
2579            a,
2580            [
2581                0 + (IMM8 as u32 & 0b11),
2582                0 + ((IMM8 as u32 >> 2) & 0b11),
2583                0 + ((IMM8 as u32 >> 4) & 0b11),
2584                0 + ((IMM8 as u32 >> 6) & 0b11),
2585                4,
2586                5,
2587                6,
2588                7,
2589                8 + (IMM8 as u32 & 0b11),
2590                8 + ((IMM8 as u32 >> 2) & 0b11),
2591                8 + ((IMM8 as u32 >> 4) & 0b11),
2592                8 + ((IMM8 as u32 >> 6) & 0b11),
2593                12,
2594                13,
2595                14,
2596                15,
2597            ],
2598        );
2599        transmute(r)
2600    }
2601}
2602
2603#[inline]
2609#[target_feature(enable = "avx2")]
2610#[cfg_attr(test, assert_instr(vpsignw))]
2611#[stable(feature = "simd_x86", since = "1.27.0")]
2612pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2613    unsafe { transmute(psignw(a.as_i16x16(), b.as_i16x16())) }
2614}
2615
2616#[inline]
2622#[target_feature(enable = "avx2")]
2623#[cfg_attr(test, assert_instr(vpsignd))]
2624#[stable(feature = "simd_x86", since = "1.27.0")]
2625pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2626    unsafe { transmute(psignd(a.as_i32x8(), b.as_i32x8())) }
2627}
2628
2629#[inline]
2635#[target_feature(enable = "avx2")]
2636#[cfg_attr(test, assert_instr(vpsignb))]
2637#[stable(feature = "simd_x86", since = "1.27.0")]
2638pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2639    unsafe { transmute(psignb(a.as_i8x32(), b.as_i8x32())) }
2640}
2641
2642#[inline]
2647#[target_feature(enable = "avx2")]
2648#[cfg_attr(test, assert_instr(vpsllw))]
2649#[stable(feature = "simd_x86", since = "1.27.0")]
2650pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2651    unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) }
2652}
2653
2654#[inline]
2659#[target_feature(enable = "avx2")]
2660#[cfg_attr(test, assert_instr(vpslld))]
2661#[stable(feature = "simd_x86", since = "1.27.0")]
2662pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2663    unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) }
2664}
2665
2666#[inline]
2671#[target_feature(enable = "avx2")]
2672#[cfg_attr(test, assert_instr(vpsllq))]
2673#[stable(feature = "simd_x86", since = "1.27.0")]
2674pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2675    unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) }
2676}
2677
2678#[inline]
2683#[target_feature(enable = "avx2")]
2684#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2685#[rustc_legacy_const_generics(1)]
2686#[stable(feature = "simd_x86", since = "1.27.0")]
2687pub fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2688    static_assert_uimm_bits!(IMM8, 8);
2689    unsafe {
2690        if IMM8 >= 16 {
2691            _mm256_setzero_si256()
2692        } else {
2693            transmute(simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
2694        }
2695    }
2696}
2697
2698#[inline]
2703#[target_feature(enable = "avx2")]
2704#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2705#[rustc_legacy_const_generics(1)]
2706#[stable(feature = "simd_x86", since = "1.27.0")]
2707pub fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2708    unsafe {
2709        static_assert_uimm_bits!(IMM8, 8);
2710        if IMM8 >= 32 {
2711            _mm256_setzero_si256()
2712        } else {
2713            transmute(simd_shl(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
2714        }
2715    }
2716}
2717
2718#[inline]
2723#[target_feature(enable = "avx2")]
2724#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2725#[rustc_legacy_const_generics(1)]
2726#[stable(feature = "simd_x86", since = "1.27.0")]
2727pub fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2728    unsafe {
2729        static_assert_uimm_bits!(IMM8, 8);
2730        if IMM8 >= 64 {
2731            _mm256_setzero_si256()
2732        } else {
2733            transmute(simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
2734        }
2735    }
2736}
2737
2738#[inline]
2742#[target_feature(enable = "avx2")]
2743#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2744#[rustc_legacy_const_generics(1)]
2745#[stable(feature = "simd_x86", since = "1.27.0")]
2746pub fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2747    static_assert_uimm_bits!(IMM8, 8);
2748    _mm256_bslli_epi128::<IMM8>(a)
2749}
2750
2751#[inline]
2755#[target_feature(enable = "avx2")]
2756#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2757#[rustc_legacy_const_generics(1)]
2758#[stable(feature = "simd_x86", since = "1.27.0")]
2759pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2760    static_assert_uimm_bits!(IMM8, 8);
2761    const fn mask(shift: i32, i: u32) -> u32 {
2762        let shift = shift as u32 & 0xff;
2763        if shift > 15 || i % 16 < shift {
2764            0
2765        } else {
2766            32 + (i - shift)
2767        }
2768    }
2769    unsafe {
2770        let a = a.as_i8x32();
2771        let r: i8x32 = simd_shuffle!(
2772            i8x32::ZERO,
2773            a,
2774            [
2775                mask(IMM8, 0),
2776                mask(IMM8, 1),
2777                mask(IMM8, 2),
2778                mask(IMM8, 3),
2779                mask(IMM8, 4),
2780                mask(IMM8, 5),
2781                mask(IMM8, 6),
2782                mask(IMM8, 7),
2783                mask(IMM8, 8),
2784                mask(IMM8, 9),
2785                mask(IMM8, 10),
2786                mask(IMM8, 11),
2787                mask(IMM8, 12),
2788                mask(IMM8, 13),
2789                mask(IMM8, 14),
2790                mask(IMM8, 15),
2791                mask(IMM8, 16),
2792                mask(IMM8, 17),
2793                mask(IMM8, 18),
2794                mask(IMM8, 19),
2795                mask(IMM8, 20),
2796                mask(IMM8, 21),
2797                mask(IMM8, 22),
2798                mask(IMM8, 23),
2799                mask(IMM8, 24),
2800                mask(IMM8, 25),
2801                mask(IMM8, 26),
2802                mask(IMM8, 27),
2803                mask(IMM8, 28),
2804                mask(IMM8, 29),
2805                mask(IMM8, 30),
2806                mask(IMM8, 31),
2807            ],
2808        );
2809        transmute(r)
2810    }
2811}
2812
2813#[inline]
2819#[target_feature(enable = "avx2")]
2820#[cfg_attr(test, assert_instr(vpsllvd))]
2821#[stable(feature = "simd_x86", since = "1.27.0")]
2822pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2823    unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) }
2824}
2825
2826#[inline]
2832#[target_feature(enable = "avx2")]
2833#[cfg_attr(test, assert_instr(vpsllvd))]
2834#[stable(feature = "simd_x86", since = "1.27.0")]
2835pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2836    unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) }
2837}
2838
2839#[inline]
2845#[target_feature(enable = "avx2")]
2846#[cfg_attr(test, assert_instr(vpsllvq))]
2847#[stable(feature = "simd_x86", since = "1.27.0")]
2848pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2849    unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) }
2850}
2851
2852#[inline]
2858#[target_feature(enable = "avx2")]
2859#[cfg_attr(test, assert_instr(vpsllvq))]
2860#[stable(feature = "simd_x86", since = "1.27.0")]
2861pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2862    unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) }
2863}
2864
2865#[inline]
2870#[target_feature(enable = "avx2")]
2871#[cfg_attr(test, assert_instr(vpsraw))]
2872#[stable(feature = "simd_x86", since = "1.27.0")]
2873pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2874    unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) }
2875}
2876
2877#[inline]
2882#[target_feature(enable = "avx2")]
2883#[cfg_attr(test, assert_instr(vpsrad))]
2884#[stable(feature = "simd_x86", since = "1.27.0")]
2885pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2886    unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) }
2887}
2888
2889#[inline]
2894#[target_feature(enable = "avx2")]
2895#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
2896#[rustc_legacy_const_generics(1)]
2897#[stable(feature = "simd_x86", since = "1.27.0")]
2898pub fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2899    static_assert_uimm_bits!(IMM8, 8);
2900    unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) }
2901}
2902
2903#[inline]
2908#[target_feature(enable = "avx2")]
2909#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
2910#[rustc_legacy_const_generics(1)]
2911#[stable(feature = "simd_x86", since = "1.27.0")]
2912pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2913    static_assert_uimm_bits!(IMM8, 8);
2914    unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) }
2915}
2916
2917#[inline]
2922#[target_feature(enable = "avx2")]
2923#[cfg_attr(test, assert_instr(vpsravd))]
2924#[stable(feature = "simd_x86", since = "1.27.0")]
2925pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2926    unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) }
2927}
2928
2929#[inline]
2934#[target_feature(enable = "avx2")]
2935#[cfg_attr(test, assert_instr(vpsravd))]
2936#[stable(feature = "simd_x86", since = "1.27.0")]
2937pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2938    unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) }
2939}
2940
2941#[inline]
2945#[target_feature(enable = "avx2")]
2946#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2947#[rustc_legacy_const_generics(1)]
2948#[stable(feature = "simd_x86", since = "1.27.0")]
2949pub fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2950    static_assert_uimm_bits!(IMM8, 8);
2951    _mm256_bsrli_epi128::<IMM8>(a)
2952}
2953
2954#[inline]
2958#[target_feature(enable = "avx2")]
2959#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2960#[rustc_legacy_const_generics(1)]
2961#[stable(feature = "simd_x86", since = "1.27.0")]
2962pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2963    static_assert_uimm_bits!(IMM8, 8);
2964    unsafe {
2965        let a = a.as_i8x32();
2966        let zero = i8x32::ZERO;
2967        let r: i8x32 = match IMM8 % 16 {
2968            0 => simd_shuffle!(
2969                a,
2970                zero,
2971                [
2972                    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
2973                    22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2974                ],
2975            ),
2976            1 => simd_shuffle!(
2977                a,
2978                zero,
2979                [
2980                    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22,
2981                    23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
2982                ],
2983            ),
2984            2 => simd_shuffle!(
2985                a,
2986                zero,
2987                [
2988                    2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 18, 19, 20, 21, 22, 23,
2989                    24, 25, 26, 27, 28, 29, 30, 31, 32, 32,
2990                ],
2991            ),
2992            3 => simd_shuffle!(
2993                a,
2994                zero,
2995                [
2996                    3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 19, 20, 21, 22, 23,
2997                    24, 25, 26, 27, 28, 29, 30, 31, 32, 32, 32,
2998                ],
2999            ),
3000            4 => simd_shuffle!(
3001                a,
3002                zero,
3003                [
3004                    4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 20, 21, 22, 23, 24,
3005                    25, 26, 27, 28, 29, 30, 31, 32, 32, 32, 32,
3006                ],
3007            ),
3008            5 => simd_shuffle!(
3009                a,
3010                zero,
3011                [
3012                    5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 21, 22, 23, 24, 25,
3013                    26, 27, 28, 29, 30, 31, 32, 32, 32, 32, 32,
3014                ],
3015            ),
3016            6 => simd_shuffle!(
3017                a,
3018                zero,
3019                [
3020                    6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 22, 23, 24, 25, 26,
3021                    27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32,
3022                ],
3023            ),
3024            7 => simd_shuffle!(
3025                a,
3026                zero,
3027                [
3028                    7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 23, 24, 25, 26,
3029                    27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32,
3030                ],
3031            ),
3032            8 => simd_shuffle!(
3033                a,
3034                zero,
3035                [
3036                    8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 24, 25, 26, 27,
3037                    28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32,
3038                ],
3039            ),
3040            9 => simd_shuffle!(
3041                a,
3042                zero,
3043                [
3044                    9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 25, 26, 27, 28,
3045                    29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3046                ],
3047            ),
3048            10 => simd_shuffle!(
3049                a,
3050                zero,
3051                [
3052                    10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 26, 27, 28, 29,
3053                    30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3054                ],
3055            ),
3056            11 => simd_shuffle!(
3057                a,
3058                zero,
3059                [
3060                    11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 27, 28, 29, 30,
3061                    31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3062                ],
3063            ),
3064            12 => simd_shuffle!(
3065                a,
3066                zero,
3067                [
3068                    12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 28, 29, 30, 31,
3069                    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3070                ],
3071            ),
3072            13 => simd_shuffle!(
3073                a,
3074                zero,
3075                [
3076                    13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 29, 30, 31, 32,
3077                    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3078                ],
3079            ),
3080            14 => simd_shuffle!(
3081                a,
3082                zero,
3083                [
3084                    14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 30, 31, 32, 32,
3085                    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3086                ],
3087            ),
3088            15 => simd_shuffle!(
3089                a,
3090                zero,
3091                [
3092                    15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32,
3093                    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3094                ],
3095            ),
3096            _ => zero,
3097        };
3098        transmute(r)
3099    }
3100}
3101
3102#[inline]
3107#[target_feature(enable = "avx2")]
3108#[cfg_attr(test, assert_instr(vpsrlw))]
3109#[stable(feature = "simd_x86", since = "1.27.0")]
3110pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
3111    unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) }
3112}
3113
3114#[inline]
3119#[target_feature(enable = "avx2")]
3120#[cfg_attr(test, assert_instr(vpsrld))]
3121#[stable(feature = "simd_x86", since = "1.27.0")]
3122pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3123    unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) }
3124}
3125
3126#[inline]
3131#[target_feature(enable = "avx2")]
3132#[cfg_attr(test, assert_instr(vpsrlq))]
3133#[stable(feature = "simd_x86", since = "1.27.0")]
3134pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3135    unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) }
3136}
3137
3138#[inline]
3143#[target_feature(enable = "avx2")]
3144#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
3145#[rustc_legacy_const_generics(1)]
3146#[stable(feature = "simd_x86", since = "1.27.0")]
3147pub fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3148    static_assert_uimm_bits!(IMM8, 8);
3149    unsafe {
3150        if IMM8 >= 16 {
3151            _mm256_setzero_si256()
3152        } else {
3153            transmute(simd_shr(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
3154        }
3155    }
3156}
3157
3158#[inline]
3163#[target_feature(enable = "avx2")]
3164#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
3165#[rustc_legacy_const_generics(1)]
3166#[stable(feature = "simd_x86", since = "1.27.0")]
3167pub fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3168    static_assert_uimm_bits!(IMM8, 8);
3169    unsafe {
3170        if IMM8 >= 32 {
3171            _mm256_setzero_si256()
3172        } else {
3173            transmute(simd_shr(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
3174        }
3175    }
3176}
3177
3178#[inline]
3183#[target_feature(enable = "avx2")]
3184#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3185#[rustc_legacy_const_generics(1)]
3186#[stable(feature = "simd_x86", since = "1.27.0")]
3187pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3188    static_assert_uimm_bits!(IMM8, 8);
3189    unsafe {
3190        if IMM8 >= 64 {
3191            _mm256_setzero_si256()
3192        } else {
3193            transmute(simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
3194        }
3195    }
3196}
3197
3198#[inline]
3203#[target_feature(enable = "avx2")]
3204#[cfg_attr(test, assert_instr(vpsrlvd))]
3205#[stable(feature = "simd_x86", since = "1.27.0")]
3206pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3207    unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) }
3208}
3209
3210#[inline]
3215#[target_feature(enable = "avx2")]
3216#[cfg_attr(test, assert_instr(vpsrlvd))]
3217#[stable(feature = "simd_x86", since = "1.27.0")]
3218pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3219    unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) }
3220}
3221
3222#[inline]
3227#[target_feature(enable = "avx2")]
3228#[cfg_attr(test, assert_instr(vpsrlvq))]
3229#[stable(feature = "simd_x86", since = "1.27.0")]
3230pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3231    unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) }
3232}
3233
3234#[inline]
3239#[target_feature(enable = "avx2")]
3240#[cfg_attr(test, assert_instr(vpsrlvq))]
3241#[stable(feature = "simd_x86", since = "1.27.0")]
3242pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3243    unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) }
3244}
3245
3246#[inline]
3252#[target_feature(enable = "avx2")]
3253#[cfg_attr(test, assert_instr(vmovntdqa))]
3254#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3255pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
3256    let dst: __m256i;
3257    crate::arch::asm!(
3258        vpl!("vmovntdqa {a}"),
3259        a = out(ymm_reg) dst,
3260        p = in(reg) mem_addr,
3261        options(pure, readonly, nostack, preserves_flags),
3262    );
3263    dst
3264}
3265
3266#[inline]
3270#[target_feature(enable = "avx2")]
3271#[cfg_attr(test, assert_instr(vpsubw))]
3272#[stable(feature = "simd_x86", since = "1.27.0")]
3273pub fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3274    unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) }
3275}
3276
3277#[inline]
3281#[target_feature(enable = "avx2")]
3282#[cfg_attr(test, assert_instr(vpsubd))]
3283#[stable(feature = "simd_x86", since = "1.27.0")]
3284pub fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3285    unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) }
3286}
3287
3288#[inline]
3292#[target_feature(enable = "avx2")]
3293#[cfg_attr(test, assert_instr(vpsubq))]
3294#[stable(feature = "simd_x86", since = "1.27.0")]
3295pub fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3296    unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) }
3297}
3298
3299#[inline]
3303#[target_feature(enable = "avx2")]
3304#[cfg_attr(test, assert_instr(vpsubb))]
3305#[stable(feature = "simd_x86", since = "1.27.0")]
3306pub fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3307    unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) }
3308}
3309
3310#[inline]
3315#[target_feature(enable = "avx2")]
3316#[cfg_attr(test, assert_instr(vpsubsw))]
3317#[stable(feature = "simd_x86", since = "1.27.0")]
3318pub fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3319    unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) }
3320}
3321
3322#[inline]
3327#[target_feature(enable = "avx2")]
3328#[cfg_attr(test, assert_instr(vpsubsb))]
3329#[stable(feature = "simd_x86", since = "1.27.0")]
3330pub fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3331    unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) }
3332}
3333
3334#[inline]
3339#[target_feature(enable = "avx2")]
3340#[cfg_attr(test, assert_instr(vpsubusw))]
3341#[stable(feature = "simd_x86", since = "1.27.0")]
3342pub fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3343    unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) }
3344}
3345
3346#[inline]
3351#[target_feature(enable = "avx2")]
3352#[cfg_attr(test, assert_instr(vpsubusb))]
3353#[stable(feature = "simd_x86", since = "1.27.0")]
3354pub fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3355    unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) }
3356}
3357
3358#[inline]
3398#[target_feature(enable = "avx2")]
3399#[cfg_attr(test, assert_instr(vpunpckhbw))]
3400#[stable(feature = "simd_x86", since = "1.27.0")]
3401pub fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3402    unsafe {
3403        #[rustfmt::skip]
3404        let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3405                8, 40, 9, 41, 10, 42, 11, 43,
3406                12, 44, 13, 45, 14, 46, 15, 47,
3407                24, 56, 25, 57, 26, 58, 27, 59,
3408                28, 60, 29, 61, 30, 62, 31, 63,
3409        ]);
3410        transmute(r)
3411    }
3412}
3413
3414#[inline]
3453#[target_feature(enable = "avx2")]
3454#[cfg_attr(test, assert_instr(vpunpcklbw))]
3455#[stable(feature = "simd_x86", since = "1.27.0")]
3456pub fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3457    unsafe {
3458        #[rustfmt::skip]
3459        let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3460            0, 32, 1, 33, 2, 34, 3, 35,
3461            4, 36, 5, 37, 6, 38, 7, 39,
3462            16, 48, 17, 49, 18, 50, 19, 51,
3463            20, 52, 21, 53, 22, 54, 23, 55,
3464        ]);
3465        transmute(r)
3466    }
3467}
3468
3469#[inline]
3504#[target_feature(enable = "avx2")]
3505#[cfg_attr(test, assert_instr(vpunpckhwd))]
3506#[stable(feature = "simd_x86", since = "1.27.0")]
3507pub fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3508    unsafe {
3509        let r: i16x16 = simd_shuffle!(
3510            a.as_i16x16(),
3511            b.as_i16x16(),
3512            [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3513        );
3514        transmute(r)
3515    }
3516}
3517
3518#[inline]
3554#[target_feature(enable = "avx2")]
3555#[cfg_attr(test, assert_instr(vpunpcklwd))]
3556#[stable(feature = "simd_x86", since = "1.27.0")]
3557pub fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3558    unsafe {
3559        let r: i16x16 = simd_shuffle!(
3560            a.as_i16x16(),
3561            b.as_i16x16(),
3562            [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3563        );
3564        transmute(r)
3565    }
3566}
3567
3568#[inline]
3597#[target_feature(enable = "avx2")]
3598#[cfg_attr(test, assert_instr(vunpckhps))]
3599#[stable(feature = "simd_x86", since = "1.27.0")]
3600pub fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3601    unsafe {
3602        let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3603        transmute(r)
3604    }
3605}
3606
3607#[inline]
3636#[target_feature(enable = "avx2")]
3637#[cfg_attr(test, assert_instr(vunpcklps))]
3638#[stable(feature = "simd_x86", since = "1.27.0")]
3639pub fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3640    unsafe {
3641        let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3642        transmute(r)
3643    }
3644}
3645
3646#[inline]
3675#[target_feature(enable = "avx2")]
3676#[cfg_attr(test, assert_instr(vunpckhpd))]
3677#[stable(feature = "simd_x86", since = "1.27.0")]
3678pub fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3679    unsafe {
3680        let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3681        transmute(r)
3682    }
3683}
3684
3685#[inline]
3714#[target_feature(enable = "avx2")]
3715#[cfg_attr(test, assert_instr(vunpcklpd))]
3716#[stable(feature = "simd_x86", since = "1.27.0")]
3717pub fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3718    unsafe {
3719        let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3720        transmute(r)
3721    }
3722}
3723
3724#[inline]
3729#[target_feature(enable = "avx2")]
3730#[cfg_attr(test, assert_instr(vxorps))]
3731#[stable(feature = "simd_x86", since = "1.27.0")]
3732pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3733    unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
3734}
3735
3736#[inline]
3743#[target_feature(enable = "avx2")]
3744#[rustc_legacy_const_generics(1)]
3746#[stable(feature = "simd_x86", since = "1.27.0")]
3747pub fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3748    static_assert_uimm_bits!(INDEX, 5);
3749    unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
3750}
3751
3752#[inline]
3759#[target_feature(enable = "avx2")]
3760#[rustc_legacy_const_generics(1)]
3762#[stable(feature = "simd_x86", since = "1.27.0")]
3763pub fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3764    static_assert_uimm_bits!(INDEX, 4);
3765    unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
3766}
3767
3768#[allow(improper_ctypes)]
3769unsafe extern "C" {
3770    #[link_name = "llvm.x86.avx2.phadd.w"]
3771    fn phaddw(a: i16x16, b: i16x16) -> i16x16;
3772    #[link_name = "llvm.x86.avx2.phadd.d"]
3773    fn phaddd(a: i32x8, b: i32x8) -> i32x8;
3774    #[link_name = "llvm.x86.avx2.phadd.sw"]
3775    fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3776    #[link_name = "llvm.x86.avx2.phsub.w"]
3777    fn phsubw(a: i16x16, b: i16x16) -> i16x16;
3778    #[link_name = "llvm.x86.avx2.phsub.d"]
3779    fn phsubd(a: i32x8, b: i32x8) -> i32x8;
3780    #[link_name = "llvm.x86.avx2.phsub.sw"]
3781    fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3782    #[link_name = "llvm.x86.avx2.pmadd.wd"]
3783    fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
3784    #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3785    fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
3786    #[link_name = "llvm.x86.avx2.maskload.d"]
3787    fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4;
3788    #[link_name = "llvm.x86.avx2.maskload.d.256"]
3789    fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8;
3790    #[link_name = "llvm.x86.avx2.maskload.q"]
3791    fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2;
3792    #[link_name = "llvm.x86.avx2.maskload.q.256"]
3793    fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4;
3794    #[link_name = "llvm.x86.avx2.maskstore.d"]
3795    fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4);
3796    #[link_name = "llvm.x86.avx2.maskstore.d.256"]
3797    fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8);
3798    #[link_name = "llvm.x86.avx2.maskstore.q"]
3799    fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
3800    #[link_name = "llvm.x86.avx2.maskstore.q.256"]
3801    fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
3802    #[link_name = "llvm.x86.avx2.mpsadbw"]
3803    fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
3804    #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3805    fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3806    #[link_name = "llvm.x86.avx2.packsswb"]
3807    fn packsswb(a: i16x16, b: i16x16) -> i8x32;
3808    #[link_name = "llvm.x86.avx2.packssdw"]
3809    fn packssdw(a: i32x8, b: i32x8) -> i16x16;
3810    #[link_name = "llvm.x86.avx2.packuswb"]
3811    fn packuswb(a: i16x16, b: i16x16) -> u8x32;
3812    #[link_name = "llvm.x86.avx2.packusdw"]
3813    fn packusdw(a: i32x8, b: i32x8) -> u16x16;
3814    #[link_name = "llvm.x86.avx2.psad.bw"]
3815    fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3816    #[link_name = "llvm.x86.avx2.psign.b"]
3817    fn psignb(a: i8x32, b: i8x32) -> i8x32;
3818    #[link_name = "llvm.x86.avx2.psign.w"]
3819    fn psignw(a: i16x16, b: i16x16) -> i16x16;
3820    #[link_name = "llvm.x86.avx2.psign.d"]
3821    fn psignd(a: i32x8, b: i32x8) -> i32x8;
3822    #[link_name = "llvm.x86.avx2.psll.w"]
3823    fn psllw(a: i16x16, count: i16x8) -> i16x16;
3824    #[link_name = "llvm.x86.avx2.psll.d"]
3825    fn pslld(a: i32x8, count: i32x4) -> i32x8;
3826    #[link_name = "llvm.x86.avx2.psll.q"]
3827    fn psllq(a: i64x4, count: i64x2) -> i64x4;
3828    #[link_name = "llvm.x86.avx2.psllv.d"]
3829    fn psllvd(a: i32x4, count: i32x4) -> i32x4;
3830    #[link_name = "llvm.x86.avx2.psllv.d.256"]
3831    fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
3832    #[link_name = "llvm.x86.avx2.psllv.q"]
3833    fn psllvq(a: i64x2, count: i64x2) -> i64x2;
3834    #[link_name = "llvm.x86.avx2.psllv.q.256"]
3835    fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
3836    #[link_name = "llvm.x86.avx2.psra.w"]
3837    fn psraw(a: i16x16, count: i16x8) -> i16x16;
3838    #[link_name = "llvm.x86.avx2.psra.d"]
3839    fn psrad(a: i32x8, count: i32x4) -> i32x8;
3840    #[link_name = "llvm.x86.avx2.psrav.d"]
3841    fn psravd(a: i32x4, count: i32x4) -> i32x4;
3842    #[link_name = "llvm.x86.avx2.psrav.d.256"]
3843    fn psravd256(a: i32x8, count: i32x8) -> i32x8;
3844    #[link_name = "llvm.x86.avx2.psrl.w"]
3845    fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3846    #[link_name = "llvm.x86.avx2.psrl.d"]
3847    fn psrld(a: i32x8, count: i32x4) -> i32x8;
3848    #[link_name = "llvm.x86.avx2.psrl.q"]
3849    fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3850    #[link_name = "llvm.x86.avx2.psrlv.d"]
3851    fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
3852    #[link_name = "llvm.x86.avx2.psrlv.d.256"]
3853    fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
3854    #[link_name = "llvm.x86.avx2.psrlv.q"]
3855    fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
3856    #[link_name = "llvm.x86.avx2.psrlv.q.256"]
3857    fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
3858    #[link_name = "llvm.x86.avx2.pshuf.b"]
3859    fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3860    #[link_name = "llvm.x86.avx2.permd"]
3861    fn permd(a: u32x8, b: u32x8) -> u32x8;
3862    #[link_name = "llvm.x86.avx2.permps"]
3863    fn permps(a: __m256, b: i32x8) -> __m256;
3864    #[link_name = "llvm.x86.avx2.vperm2i128"]
3865    fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4;
3866    #[link_name = "llvm.x86.avx2.gather.d.d"]
3867    fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3868    #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3869    fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3870    #[link_name = "llvm.x86.avx2.gather.d.q"]
3871    fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3872    #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3873    fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3874    #[link_name = "llvm.x86.avx2.gather.q.d"]
3875    fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3876    #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3877    fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3878    #[link_name = "llvm.x86.avx2.gather.q.q"]
3879    fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3880    #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3881    fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3882    #[link_name = "llvm.x86.avx2.gather.d.pd"]
3883    fn pgatherdpd(
3884        src: __m128d,
3885        slice: *const i8,
3886        offsets: i32x4,
3887        mask: __m128d,
3888        scale: i8,
3889    ) -> __m128d;
3890    #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3891    fn vpgatherdpd(
3892        src: __m256d,
3893        slice: *const i8,
3894        offsets: i32x4,
3895        mask: __m256d,
3896        scale: i8,
3897    ) -> __m256d;
3898    #[link_name = "llvm.x86.avx2.gather.q.pd"]
3899    fn pgatherqpd(
3900        src: __m128d,
3901        slice: *const i8,
3902        offsets: i64x2,
3903        mask: __m128d,
3904        scale: i8,
3905    ) -> __m128d;
3906    #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3907    fn vpgatherqpd(
3908        src: __m256d,
3909        slice: *const i8,
3910        offsets: i64x4,
3911        mask: __m256d,
3912        scale: i8,
3913    ) -> __m256d;
3914    #[link_name = "llvm.x86.avx2.gather.d.ps"]
3915    fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3916    -> __m128;
3917    #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3918    fn vpgatherdps(
3919        src: __m256,
3920        slice: *const i8,
3921        offsets: i32x8,
3922        mask: __m256,
3923        scale: i8,
3924    ) -> __m256;
3925    #[link_name = "llvm.x86.avx2.gather.q.ps"]
3926    fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3927    -> __m128;
3928    #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3929    fn vpgatherqps(
3930        src: __m128,
3931        slice: *const i8,
3932        offsets: i64x4,
3933        mask: __m128,
3934        scale: i8,
3935    ) -> __m128;
3936}
3937
3938#[cfg(test)]
3939mod tests {
3940
3941    use stdarch_test::simd_test;
3942
3943    use crate::core_arch::x86::*;
3944
3945    #[simd_test(enable = "avx2")]
3946    unsafe fn test_mm256_abs_epi32() {
3947        #[rustfmt::skip]
3948        let a = _mm256_setr_epi32(
3949            0, 1, -1, i32::MAX,
3950            i32::MIN, 100, -100, -32,
3951        );
3952        let r = _mm256_abs_epi32(a);
3953        #[rustfmt::skip]
3954        let e = _mm256_setr_epi32(
3955            0, 1, 1, i32::MAX,
3956            i32::MAX.wrapping_add(1), 100, 100, 32,
3957        );
3958        assert_eq_m256i(r, e);
3959    }
3960
3961    #[simd_test(enable = "avx2")]
3962    unsafe fn test_mm256_abs_epi16() {
3963        #[rustfmt::skip]
3964        let a = _mm256_setr_epi16(
3965            0,  1, -1, 2, -2, 3, -3, 4,
3966            -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
3967        );
3968        let r = _mm256_abs_epi16(a);
3969        #[rustfmt::skip]
3970        let e = _mm256_setr_epi16(
3971            0, 1, 1, 2, 2, 3, 3, 4,
3972            4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
3973        );
3974        assert_eq_m256i(r, e);
3975    }
3976
3977    #[simd_test(enable = "avx2")]
3978    unsafe fn test_mm256_abs_epi8() {
3979        #[rustfmt::skip]
3980        let a = _mm256_setr_epi8(
3981            0, 1, -1, 2, -2, 3, -3, 4,
3982            -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3983            0, 1, -1, 2, -2, 3, -3, 4,
3984            -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3985        );
3986        let r = _mm256_abs_epi8(a);
3987        #[rustfmt::skip]
3988        let e = _mm256_setr_epi8(
3989            0, 1, 1, 2, 2, 3, 3, 4,
3990            4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3991            0, 1, 1, 2, 2, 3, 3, 4,
3992            4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3993        );
3994        assert_eq_m256i(r, e);
3995    }
3996
3997    #[simd_test(enable = "avx2")]
3998    unsafe fn test_mm256_add_epi64() {
3999        let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
4000        let b = _mm256_setr_epi64x(-1, 0, 1, 2);
4001        let r = _mm256_add_epi64(a, b);
4002        let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
4003        assert_eq_m256i(r, e);
4004    }
4005
4006    #[simd_test(enable = "avx2")]
4007    unsafe fn test_mm256_add_epi32() {
4008        let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
4009        let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4010        let r = _mm256_add_epi32(a, b);
4011        let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
4012        assert_eq_m256i(r, e);
4013    }
4014
4015    #[simd_test(enable = "avx2")]
4016    unsafe fn test_mm256_add_epi16() {
4017        #[rustfmt::skip]
4018        let a = _mm256_setr_epi16(
4019            0, 1, 2, 3, 4, 5, 6, 7,
4020            8, 9, 10, 11, 12, 13, 14, 15,
4021        );
4022        #[rustfmt::skip]
4023        let b = _mm256_setr_epi16(
4024            0, 1, 2, 3, 4, 5, 6, 7,
4025            8, 9, 10, 11, 12, 13, 14, 15,
4026        );
4027        let r = _mm256_add_epi16(a, b);
4028        #[rustfmt::skip]
4029        let e = _mm256_setr_epi16(
4030            0, 2, 4, 6, 8, 10, 12, 14,
4031            16, 18, 20, 22, 24, 26, 28, 30,
4032        );
4033        assert_eq_m256i(r, e);
4034    }
4035
4036    #[simd_test(enable = "avx2")]
4037    unsafe fn test_mm256_add_epi8() {
4038        #[rustfmt::skip]
4039        let a = _mm256_setr_epi8(
4040            0, 1, 2, 3, 4, 5, 6, 7,
4041            8, 9, 10, 11, 12, 13, 14, 15,
4042            16, 17, 18, 19, 20, 21, 22, 23,
4043            24, 25, 26, 27, 28, 29, 30, 31,
4044        );
4045        #[rustfmt::skip]
4046        let b = _mm256_setr_epi8(
4047            0, 1, 2, 3, 4, 5, 6, 7,
4048            8, 9, 10, 11, 12, 13, 14, 15,
4049            16, 17, 18, 19, 20, 21, 22, 23,
4050            24, 25, 26, 27, 28, 29, 30, 31,
4051        );
4052        let r = _mm256_add_epi8(a, b);
4053        #[rustfmt::skip]
4054        let e = _mm256_setr_epi8(
4055            0, 2, 4, 6, 8, 10, 12, 14,
4056            16, 18, 20, 22, 24, 26, 28, 30,
4057            32, 34, 36, 38, 40, 42, 44, 46,
4058            48, 50, 52, 54, 56, 58, 60, 62,
4059        );
4060        assert_eq_m256i(r, e);
4061    }
4062
4063    #[simd_test(enable = "avx2")]
4064    unsafe fn test_mm256_adds_epi8() {
4065        #[rustfmt::skip]
4066        let a = _mm256_setr_epi8(
4067            0, 1, 2, 3, 4, 5, 6, 7,
4068            8, 9, 10, 11, 12, 13, 14, 15,
4069            16, 17, 18, 19, 20, 21, 22, 23,
4070            24, 25, 26, 27, 28, 29, 30, 31,
4071        );
4072        #[rustfmt::skip]
4073        let b = _mm256_setr_epi8(
4074            32, 33, 34, 35, 36, 37, 38, 39,
4075            40, 41, 42, 43, 44, 45, 46, 47,
4076            48, 49, 50, 51, 52, 53, 54, 55,
4077            56, 57, 58, 59, 60, 61, 62, 63,
4078        );
4079        let r = _mm256_adds_epi8(a, b);
4080        #[rustfmt::skip]
4081        let e = _mm256_setr_epi8(
4082            32, 34, 36, 38, 40, 42, 44, 46,
4083            48, 50, 52, 54, 56, 58, 60, 62,
4084            64, 66, 68, 70, 72, 74, 76, 78,
4085            80, 82, 84, 86, 88, 90, 92, 94,
4086        );
4087        assert_eq_m256i(r, e);
4088    }
4089
4090    #[simd_test(enable = "avx2")]
4091    unsafe fn test_mm256_adds_epi8_saturate_positive() {
4092        let a = _mm256_set1_epi8(0x7F);
4093        let b = _mm256_set1_epi8(1);
4094        let r = _mm256_adds_epi8(a, b);
4095        assert_eq_m256i(r, a);
4096    }
4097
4098    #[simd_test(enable = "avx2")]
4099    unsafe fn test_mm256_adds_epi8_saturate_negative() {
4100        let a = _mm256_set1_epi8(-0x80);
4101        let b = _mm256_set1_epi8(-1);
4102        let r = _mm256_adds_epi8(a, b);
4103        assert_eq_m256i(r, a);
4104    }
4105
4106    #[simd_test(enable = "avx2")]
4107    unsafe fn test_mm256_adds_epi16() {
4108        #[rustfmt::skip]
4109        let a = _mm256_setr_epi16(
4110            0, 1, 2, 3, 4, 5, 6, 7,
4111            8, 9, 10, 11, 12, 13, 14, 15,
4112        );
4113        #[rustfmt::skip]
4114        let b = _mm256_setr_epi16(
4115            32, 33, 34, 35, 36, 37, 38, 39,
4116            40, 41, 42, 43, 44, 45, 46, 47,
4117        );
4118        let r = _mm256_adds_epi16(a, b);
4119        #[rustfmt::skip]
4120        let e = _mm256_setr_epi16(
4121            32, 34, 36, 38, 40, 42, 44, 46,
4122            48, 50, 52, 54, 56, 58, 60, 62,
4123        );
4124
4125        assert_eq_m256i(r, e);
4126    }
4127
4128    #[simd_test(enable = "avx2")]
4129    unsafe fn test_mm256_adds_epi16_saturate_positive() {
4130        let a = _mm256_set1_epi16(0x7FFF);
4131        let b = _mm256_set1_epi16(1);
4132        let r = _mm256_adds_epi16(a, b);
4133        assert_eq_m256i(r, a);
4134    }
4135
4136    #[simd_test(enable = "avx2")]
4137    unsafe fn test_mm256_adds_epi16_saturate_negative() {
4138        let a = _mm256_set1_epi16(-0x8000);
4139        let b = _mm256_set1_epi16(-1);
4140        let r = _mm256_adds_epi16(a, b);
4141        assert_eq_m256i(r, a);
4142    }
4143
4144    #[simd_test(enable = "avx2")]
4145    unsafe fn test_mm256_adds_epu8() {
4146        #[rustfmt::skip]
4147        let a = _mm256_setr_epi8(
4148            0, 1, 2, 3, 4, 5, 6, 7,
4149            8, 9, 10, 11, 12, 13, 14, 15,
4150            16, 17, 18, 19, 20, 21, 22, 23,
4151            24, 25, 26, 27, 28, 29, 30, 31,
4152        );
4153        #[rustfmt::skip]
4154        let b = _mm256_setr_epi8(
4155            32, 33, 34, 35, 36, 37, 38, 39,
4156            40, 41, 42, 43, 44, 45, 46, 47,
4157            48, 49, 50, 51, 52, 53, 54, 55,
4158            56, 57, 58, 59, 60, 61, 62, 63,
4159        );
4160        let r = _mm256_adds_epu8(a, b);
4161        #[rustfmt::skip]
4162        let e = _mm256_setr_epi8(
4163            32, 34, 36, 38, 40, 42, 44, 46,
4164            48, 50, 52, 54, 56, 58, 60, 62,
4165            64, 66, 68, 70, 72, 74, 76, 78,
4166            80, 82, 84, 86, 88, 90, 92, 94,
4167        );
4168        assert_eq_m256i(r, e);
4169    }
4170
4171    #[simd_test(enable = "avx2")]
4172    unsafe fn test_mm256_adds_epu8_saturate() {
4173        let a = _mm256_set1_epi8(!0);
4174        let b = _mm256_set1_epi8(1);
4175        let r = _mm256_adds_epu8(a, b);
4176        assert_eq_m256i(r, a);
4177    }
4178
4179    #[simd_test(enable = "avx2")]
4180    unsafe fn test_mm256_adds_epu16() {
4181        #[rustfmt::skip]
4182        let a = _mm256_setr_epi16(
4183            0, 1, 2, 3, 4, 5, 6, 7,
4184            8, 9, 10, 11, 12, 13, 14, 15,
4185        );
4186        #[rustfmt::skip]
4187        let b = _mm256_setr_epi16(
4188            32, 33, 34, 35, 36, 37, 38, 39,
4189            40, 41, 42, 43, 44, 45, 46, 47,
4190        );
4191        let r = _mm256_adds_epu16(a, b);
4192        #[rustfmt::skip]
4193        let e = _mm256_setr_epi16(
4194            32, 34, 36, 38, 40, 42, 44, 46,
4195            48, 50, 52, 54, 56, 58, 60, 62,
4196        );
4197
4198        assert_eq_m256i(r, e);
4199    }
4200
4201    #[simd_test(enable = "avx2")]
4202    unsafe fn test_mm256_adds_epu16_saturate() {
4203        let a = _mm256_set1_epi16(!0);
4204        let b = _mm256_set1_epi16(1);
4205        let r = _mm256_adds_epu16(a, b);
4206        assert_eq_m256i(r, a);
4207    }
4208
4209    #[simd_test(enable = "avx2")]
4210    unsafe fn test_mm256_and_si256() {
4211        let a = _mm256_set1_epi8(5);
4212        let b = _mm256_set1_epi8(3);
4213        let got = _mm256_and_si256(a, b);
4214        assert_eq_m256i(got, _mm256_set1_epi8(1));
4215    }
4216
4217    #[simd_test(enable = "avx2")]
4218    unsafe fn test_mm256_andnot_si256() {
4219        let a = _mm256_set1_epi8(5);
4220        let b = _mm256_set1_epi8(3);
4221        let got = _mm256_andnot_si256(a, b);
4222        assert_eq_m256i(got, _mm256_set1_epi8(2));
4223    }
4224
4225    #[simd_test(enable = "avx2")]
4226    unsafe fn test_mm256_avg_epu8() {
4227        let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4228        let r = _mm256_avg_epu8(a, b);
4229        assert_eq_m256i(r, _mm256_set1_epi8(6));
4230    }
4231
4232    #[simd_test(enable = "avx2")]
4233    unsafe fn test_mm256_avg_epu16() {
4234        let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4235        let r = _mm256_avg_epu16(a, b);
4236        assert_eq_m256i(r, _mm256_set1_epi16(6));
4237    }
4238
4239    #[simd_test(enable = "avx2")]
4240    unsafe fn test_mm_blend_epi32() {
4241        let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4242        let e = _mm_setr_epi32(9, 3, 3, 3);
4243        let r = _mm_blend_epi32::<0x01>(a, b);
4244        assert_eq_m128i(r, e);
4245
4246        let r = _mm_blend_epi32::<0x0E>(b, a);
4247        assert_eq_m128i(r, e);
4248    }
4249
4250    #[simd_test(enable = "avx2")]
4251    unsafe fn test_mm256_blend_epi32() {
4252        let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4253        let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4254        let r = _mm256_blend_epi32::<0x01>(a, b);
4255        assert_eq_m256i(r, e);
4256
4257        let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4258        let r = _mm256_blend_epi32::<0x82>(a, b);
4259        assert_eq_m256i(r, e);
4260
4261        let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4262        let r = _mm256_blend_epi32::<0x7C>(a, b);
4263        assert_eq_m256i(r, e);
4264    }
4265
4266    #[simd_test(enable = "avx2")]
4267    unsafe fn test_mm256_blend_epi16() {
4268        let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4269        let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4270        let r = _mm256_blend_epi16::<0x01>(a, b);
4271        assert_eq_m256i(r, e);
4272
4273        let r = _mm256_blend_epi16::<0xFE>(b, a);
4274        assert_eq_m256i(r, e);
4275    }
4276
4277    #[simd_test(enable = "avx2")]
4278    unsafe fn test_mm256_blendv_epi8() {
4279        let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4280        let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4281        let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4282        let r = _mm256_blendv_epi8(a, b, mask);
4283        assert_eq_m256i(r, e);
4284    }
4285
4286    #[simd_test(enable = "avx2")]
4287    unsafe fn test_mm_broadcastb_epi8() {
4288        let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4289        let res = _mm_broadcastb_epi8(a);
4290        assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4291    }
4292
4293    #[simd_test(enable = "avx2")]
4294    unsafe fn test_mm256_broadcastb_epi8() {
4295        let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4296        let res = _mm256_broadcastb_epi8(a);
4297        assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4298    }
4299
4300    #[simd_test(enable = "avx2")]
4301    unsafe fn test_mm_broadcastd_epi32() {
4302        let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4303        let res = _mm_broadcastd_epi32(a);
4304        assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4305    }
4306
4307    #[simd_test(enable = "avx2")]
4308    unsafe fn test_mm256_broadcastd_epi32() {
4309        let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4310        let res = _mm256_broadcastd_epi32(a);
4311        assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4312    }
4313
4314    #[simd_test(enable = "avx2")]
4315    unsafe fn test_mm_broadcastq_epi64() {
4316        let a = _mm_setr_epi64x(0x1ffffffff, 0);
4317        let res = _mm_broadcastq_epi64(a);
4318        assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4319    }
4320
4321    #[simd_test(enable = "avx2")]
4322    unsafe fn test_mm256_broadcastq_epi64() {
4323        let a = _mm_setr_epi64x(0x1ffffffff, 0);
4324        let res = _mm256_broadcastq_epi64(a);
4325        assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4326    }
4327
4328    #[simd_test(enable = "avx2")]
4329    unsafe fn test_mm_broadcastsd_pd() {
4330        let a = _mm_setr_pd(6.88, 3.44);
4331        let res = _mm_broadcastsd_pd(a);
4332        assert_eq_m128d(res, _mm_set1_pd(6.88));
4333    }
4334
4335    #[simd_test(enable = "avx2")]
4336    unsafe fn test_mm256_broadcastsd_pd() {
4337        let a = _mm_setr_pd(6.88, 3.44);
4338        let res = _mm256_broadcastsd_pd(a);
4339        assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
4340    }
4341
4342    #[simd_test(enable = "avx2")]
4343    unsafe fn test_mm_broadcastsi128_si256() {
4344        let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4345        let res = _mm_broadcastsi128_si256(a);
4346        let retval = _mm256_setr_epi64x(
4347            0x0987654321012334,
4348            0x5678909876543210,
4349            0x0987654321012334,
4350            0x5678909876543210,
4351        );
4352        assert_eq_m256i(res, retval);
4353    }
4354
4355    #[simd_test(enable = "avx2")]
4356    unsafe fn test_mm256_broadcastsi128_si256() {
4357        let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4358        let res = _mm256_broadcastsi128_si256(a);
4359        let retval = _mm256_setr_epi64x(
4360            0x0987654321012334,
4361            0x5678909876543210,
4362            0x0987654321012334,
4363            0x5678909876543210,
4364        );
4365        assert_eq_m256i(res, retval);
4366    }
4367
4368    #[simd_test(enable = "avx2")]
4369    unsafe fn test_mm_broadcastss_ps() {
4370        let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4371        let res = _mm_broadcastss_ps(a);
4372        assert_eq_m128(res, _mm_set1_ps(6.88));
4373    }
4374
4375    #[simd_test(enable = "avx2")]
4376    unsafe fn test_mm256_broadcastss_ps() {
4377        let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4378        let res = _mm256_broadcastss_ps(a);
4379        assert_eq_m256(res, _mm256_set1_ps(6.88));
4380    }
4381
4382    #[simd_test(enable = "avx2")]
4383    unsafe fn test_mm_broadcastw_epi16() {
4384        let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4385        let res = _mm_broadcastw_epi16(a);
4386        assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4387    }
4388
4389    #[simd_test(enable = "avx2")]
4390    unsafe fn test_mm256_broadcastw_epi16() {
4391        let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4392        let res = _mm256_broadcastw_epi16(a);
4393        assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4394    }
4395
4396    #[simd_test(enable = "avx2")]
4397    unsafe fn test_mm256_cmpeq_epi8() {
4398        #[rustfmt::skip]
4399        let a = _mm256_setr_epi8(
4400            0, 1, 2, 3, 4, 5, 6, 7,
4401            8, 9, 10, 11, 12, 13, 14, 15,
4402            16, 17, 18, 19, 20, 21, 22, 23,
4403            24, 25, 26, 27, 28, 29, 30, 31,
4404        );
4405        #[rustfmt::skip]
4406        let b = _mm256_setr_epi8(
4407            31, 30, 2, 28, 27, 26, 25, 24,
4408            23, 22, 21, 20, 19, 18, 17, 16,
4409            15, 14, 13, 12, 11, 10, 9, 8,
4410            7, 6, 5, 4, 3, 2, 1, 0,
4411        );
4412        let r = _mm256_cmpeq_epi8(a, b);
4413        assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4414    }
4415
4416    #[simd_test(enable = "avx2")]
4417    unsafe fn test_mm256_cmpeq_epi16() {
4418        #[rustfmt::skip]
4419        let a = _mm256_setr_epi16(
4420            0, 1, 2, 3, 4, 5, 6, 7,
4421            8, 9, 10, 11, 12, 13, 14, 15,
4422        );
4423        #[rustfmt::skip]
4424        let b = _mm256_setr_epi16(
4425            15, 14, 2, 12, 11, 10, 9, 8,
4426            7, 6, 5, 4, 3, 2, 1, 0,
4427        );
4428        let r = _mm256_cmpeq_epi16(a, b);
4429        assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4430    }
4431
4432    #[simd_test(enable = "avx2")]
4433    unsafe fn test_mm256_cmpeq_epi32() {
4434        let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4435        let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4436        let r = _mm256_cmpeq_epi32(a, b);
4437        let e = _mm256_set1_epi32(0);
4438        let e = _mm256_insert_epi32::<2>(e, !0);
4439        assert_eq_m256i(r, e);
4440    }
4441
4442    #[simd_test(enable = "avx2")]
4443    unsafe fn test_mm256_cmpeq_epi64() {
4444        let a = _mm256_setr_epi64x(0, 1, 2, 3);
4445        let b = _mm256_setr_epi64x(3, 2, 2, 0);
4446        let r = _mm256_cmpeq_epi64(a, b);
4447        assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4448    }
4449
4450    #[simd_test(enable = "avx2")]
4451    unsafe fn test_mm256_cmpgt_epi8() {
4452        let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4453        let b = _mm256_set1_epi8(0);
4454        let r = _mm256_cmpgt_epi8(a, b);
4455        assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4456    }
4457
4458    #[simd_test(enable = "avx2")]
4459    unsafe fn test_mm256_cmpgt_epi16() {
4460        let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4461        let b = _mm256_set1_epi16(0);
4462        let r = _mm256_cmpgt_epi16(a, b);
4463        assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4464    }
4465
4466    #[simd_test(enable = "avx2")]
4467    unsafe fn test_mm256_cmpgt_epi32() {
4468        let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4469        let b = _mm256_set1_epi32(0);
4470        let r = _mm256_cmpgt_epi32(a, b);
4471        assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4472    }
4473
4474    #[simd_test(enable = "avx2")]
4475    unsafe fn test_mm256_cmpgt_epi64() {
4476        let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4477        let b = _mm256_set1_epi64x(0);
4478        let r = _mm256_cmpgt_epi64(a, b);
4479        assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4480    }
4481
4482    #[simd_test(enable = "avx2")]
4483    unsafe fn test_mm256_cvtepi8_epi16() {
4484        #[rustfmt::skip]
4485        let a = _mm_setr_epi8(
4486            0, 0, -1, 1, -2, 2, -3, 3,
4487            -4, 4, -5, 5, -6, 6, -7, 7,
4488        );
4489        #[rustfmt::skip]
4490        let r = _mm256_setr_epi16(
4491            0, 0, -1, 1, -2, 2, -3, 3,
4492            -4, 4, -5, 5, -6, 6, -7, 7,
4493        );
4494        assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4495    }
4496
4497    #[simd_test(enable = "avx2")]
4498    unsafe fn test_mm256_cvtepi8_epi32() {
4499        #[rustfmt::skip]
4500        let a = _mm_setr_epi8(
4501            0, 0, -1, 1, -2, 2, -3, 3,
4502            -4, 4, -5, 5, -6, 6, -7, 7,
4503        );
4504        let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4505        assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4506    }
4507
4508    #[simd_test(enable = "avx2")]
4509    unsafe fn test_mm256_cvtepi8_epi64() {
4510        #[rustfmt::skip]
4511        let a = _mm_setr_epi8(
4512            0, 0, -1, 1, -2, 2, -3, 3,
4513            -4, 4, -5, 5, -6, 6, -7, 7,
4514        );
4515        let r = _mm256_setr_epi64x(0, 0, -1, 1);
4516        assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4517    }
4518
4519    #[simd_test(enable = "avx2")]
4520    unsafe fn test_mm256_cvtepi16_epi32() {
4521        let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4522        let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4523        assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4524    }
4525
4526    #[simd_test(enable = "avx2")]
4527    unsafe fn test_mm256_cvtepi16_epi64() {
4528        let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4529        let r = _mm256_setr_epi64x(0, 0, -1, 1);
4530        assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4531    }
4532
4533    #[simd_test(enable = "avx2")]
4534    unsafe fn test_mm256_cvtepi32_epi64() {
4535        let a = _mm_setr_epi32(0, 0, -1, 1);
4536        let r = _mm256_setr_epi64x(0, 0, -1, 1);
4537        assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4538    }
4539
4540    #[simd_test(enable = "avx2")]
4541    unsafe fn test_mm256_cvtepu16_epi32() {
4542        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4543        let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4544        assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4545    }
4546
4547    #[simd_test(enable = "avx2")]
4548    unsafe fn test_mm256_cvtepu16_epi64() {
4549        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4550        let r = _mm256_setr_epi64x(0, 1, 2, 3);
4551        assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4552    }
4553
4554    #[simd_test(enable = "avx2")]
4555    unsafe fn test_mm256_cvtepu32_epi64() {
4556        let a = _mm_setr_epi32(0, 1, 2, 3);
4557        let r = _mm256_setr_epi64x(0, 1, 2, 3);
4558        assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4559    }
4560
4561    #[simd_test(enable = "avx2")]
4562    unsafe fn test_mm256_cvtepu8_epi16() {
4563        #[rustfmt::skip]
4564        let a = _mm_setr_epi8(
4565            0, 1, 2, 3, 4, 5, 6, 7,
4566            8, 9, 10, 11, 12, 13, 14, 15,
4567        );
4568        #[rustfmt::skip]
4569        let r = _mm256_setr_epi16(
4570            0, 1, 2, 3, 4, 5, 6, 7,
4571            8, 9, 10, 11, 12, 13, 14, 15,
4572        );
4573        assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4574    }
4575
4576    #[simd_test(enable = "avx2")]
4577    unsafe fn test_mm256_cvtepu8_epi32() {
4578        #[rustfmt::skip]
4579        let a = _mm_setr_epi8(
4580            0, 1, 2, 3, 4, 5, 6, 7,
4581            8, 9, 10, 11, 12, 13, 14, 15,
4582        );
4583        let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4584        assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4585    }
4586
4587    #[simd_test(enable = "avx2")]
4588    unsafe fn test_mm256_cvtepu8_epi64() {
4589        #[rustfmt::skip]
4590        let a = _mm_setr_epi8(
4591            0, 1, 2, 3, 4, 5, 6, 7,
4592            8, 9, 10, 11, 12, 13, 14, 15,
4593        );
4594        let r = _mm256_setr_epi64x(0, 1, 2, 3);
4595        assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4596    }
4597
4598    #[simd_test(enable = "avx2")]
4599    unsafe fn test_mm256_extracti128_si256() {
4600        let a = _mm256_setr_epi64x(1, 2, 3, 4);
4601        let r = _mm256_extracti128_si256::<1>(a);
4602        let e = _mm_setr_epi64x(3, 4);
4603        assert_eq_m128i(r, e);
4604    }
4605
4606    #[simd_test(enable = "avx2")]
4607    unsafe fn test_mm256_hadd_epi16() {
4608        let a = _mm256_set1_epi16(2);
4609        let b = _mm256_set1_epi16(4);
4610        let r = _mm256_hadd_epi16(a, b);
4611        let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4612        assert_eq_m256i(r, e);
4613    }
4614
4615    #[simd_test(enable = "avx2")]
4616    unsafe fn test_mm256_hadd_epi32() {
4617        let a = _mm256_set1_epi32(2);
4618        let b = _mm256_set1_epi32(4);
4619        let r = _mm256_hadd_epi32(a, b);
4620        let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4621        assert_eq_m256i(r, e);
4622    }
4623
4624    #[simd_test(enable = "avx2")]
4625    unsafe fn test_mm256_hadds_epi16() {
4626        let a = _mm256_set1_epi16(2);
4627        let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4628        let a = _mm256_insert_epi16::<1>(a, 1);
4629        let b = _mm256_set1_epi16(4);
4630        let r = _mm256_hadds_epi16(a, b);
4631        #[rustfmt::skip]
4632        let e = _mm256_setr_epi16(
4633            0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4634            4, 4, 4, 4, 8, 8, 8, 8,
4635        );
4636        assert_eq_m256i(r, e);
4637    }
4638
4639    #[simd_test(enable = "avx2")]
4640    unsafe fn test_mm256_hsub_epi16() {
4641        let a = _mm256_set1_epi16(2);
4642        let b = _mm256_set1_epi16(4);
4643        let r = _mm256_hsub_epi16(a, b);
4644        let e = _mm256_set1_epi16(0);
4645        assert_eq_m256i(r, e);
4646    }
4647
4648    #[simd_test(enable = "avx2")]
4649    unsafe fn test_mm256_hsub_epi32() {
4650        let a = _mm256_set1_epi32(2);
4651        let b = _mm256_set1_epi32(4);
4652        let r = _mm256_hsub_epi32(a, b);
4653        let e = _mm256_set1_epi32(0);
4654        assert_eq_m256i(r, e);
4655    }
4656
4657    #[simd_test(enable = "avx2")]
4658    unsafe fn test_mm256_hsubs_epi16() {
4659        let a = _mm256_set1_epi16(2);
4660        let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4661        let a = _mm256_insert_epi16::<1>(a, -1);
4662        let b = _mm256_set1_epi16(4);
4663        let r = _mm256_hsubs_epi16(a, b);
4664        let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4665        assert_eq_m256i(r, e);
4666    }
4667
4668    #[simd_test(enable = "avx2")]
4669    unsafe fn test_mm256_madd_epi16() {
4670        let a = _mm256_set1_epi16(2);
4671        let b = _mm256_set1_epi16(4);
4672        let r = _mm256_madd_epi16(a, b);
4673        let e = _mm256_set1_epi32(16);
4674        assert_eq_m256i(r, e);
4675    }
4676
4677    #[simd_test(enable = "avx2")]
4678    unsafe fn test_mm256_inserti128_si256() {
4679        let a = _mm256_setr_epi64x(1, 2, 3, 4);
4680        let b = _mm_setr_epi64x(7, 8);
4681        let r = _mm256_inserti128_si256::<1>(a, b);
4682        let e = _mm256_setr_epi64x(1, 2, 7, 8);
4683        assert_eq_m256i(r, e);
4684    }
4685
4686    #[simd_test(enable = "avx2")]
4687    unsafe fn test_mm256_maddubs_epi16() {
4688        let a = _mm256_set1_epi8(2);
4689        let b = _mm256_set1_epi8(4);
4690        let r = _mm256_maddubs_epi16(a, b);
4691        let e = _mm256_set1_epi16(16);
4692        assert_eq_m256i(r, e);
4693    }
4694
4695    #[simd_test(enable = "avx2")]
4696    unsafe fn test_mm_maskload_epi32() {
4697        let nums = [1, 2, 3, 4];
4698        let a = &nums as *const i32;
4699        let mask = _mm_setr_epi32(-1, 0, 0, -1);
4700        let r = _mm_maskload_epi32(a, mask);
4701        let e = _mm_setr_epi32(1, 0, 0, 4);
4702        assert_eq_m128i(r, e);
4703    }
4704
4705    #[simd_test(enable = "avx2")]
4706    unsafe fn test_mm256_maskload_epi32() {
4707        let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4708        let a = &nums as *const i32;
4709        let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4710        let r = _mm256_maskload_epi32(a, mask);
4711        let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4712        assert_eq_m256i(r, e);
4713    }
4714
4715    #[simd_test(enable = "avx2")]
4716    unsafe fn test_mm_maskload_epi64() {
4717        let nums = [1_i64, 2_i64];
4718        let a = &nums as *const i64;
4719        let mask = _mm_setr_epi64x(0, -1);
4720        let r = _mm_maskload_epi64(a, mask);
4721        let e = _mm_setr_epi64x(0, 2);
4722        assert_eq_m128i(r, e);
4723    }
4724
4725    #[simd_test(enable = "avx2")]
4726    unsafe fn test_mm256_maskload_epi64() {
4727        let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4728        let a = &nums as *const i64;
4729        let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4730        let r = _mm256_maskload_epi64(a, mask);
4731        let e = _mm256_setr_epi64x(0, 2, 3, 0);
4732        assert_eq_m256i(r, e);
4733    }
4734
4735    #[simd_test(enable = "avx2")]
4736    unsafe fn test_mm_maskstore_epi32() {
4737        let a = _mm_setr_epi32(1, 2, 3, 4);
4738        let mut arr = [-1, -1, -1, -1];
4739        let mask = _mm_setr_epi32(-1, 0, 0, -1);
4740        _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4741        let e = [1, -1, -1, 4];
4742        assert_eq!(arr, e);
4743    }
4744
4745    #[simd_test(enable = "avx2")]
4746    unsafe fn test_mm256_maskstore_epi32() {
4747        let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4748        let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4749        let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4750        _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4751        let e = [1, -1, -1, 42, -1, 6, 7, -1];
4752        assert_eq!(arr, e);
4753    }
4754
4755    #[simd_test(enable = "avx2")]
4756    unsafe fn test_mm_maskstore_epi64() {
4757        let a = _mm_setr_epi64x(1_i64, 2_i64);
4758        let mut arr = [-1_i64, -1_i64];
4759        let mask = _mm_setr_epi64x(0, -1);
4760        _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4761        let e = [-1, 2];
4762        assert_eq!(arr, e);
4763    }
4764
4765    #[simd_test(enable = "avx2")]
4766    unsafe fn test_mm256_maskstore_epi64() {
4767        let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4768        let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4769        let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4770        _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4771        let e = [-1, 2, 3, -1];
4772        assert_eq!(arr, e);
4773    }
4774
4775    #[simd_test(enable = "avx2")]
4776    unsafe fn test_mm256_max_epi16() {
4777        let a = _mm256_set1_epi16(2);
4778        let b = _mm256_set1_epi16(4);
4779        let r = _mm256_max_epi16(a, b);
4780        assert_eq_m256i(r, b);
4781    }
4782
4783    #[simd_test(enable = "avx2")]
4784    unsafe fn test_mm256_max_epi32() {
4785        let a = _mm256_set1_epi32(2);
4786        let b = _mm256_set1_epi32(4);
4787        let r = _mm256_max_epi32(a, b);
4788        assert_eq_m256i(r, b);
4789    }
4790
4791    #[simd_test(enable = "avx2")]
4792    unsafe fn test_mm256_max_epi8() {
4793        let a = _mm256_set1_epi8(2);
4794        let b = _mm256_set1_epi8(4);
4795        let r = _mm256_max_epi8(a, b);
4796        assert_eq_m256i(r, b);
4797    }
4798
4799    #[simd_test(enable = "avx2")]
4800    unsafe fn test_mm256_max_epu16() {
4801        let a = _mm256_set1_epi16(2);
4802        let b = _mm256_set1_epi16(4);
4803        let r = _mm256_max_epu16(a, b);
4804        assert_eq_m256i(r, b);
4805    }
4806
4807    #[simd_test(enable = "avx2")]
4808    unsafe fn test_mm256_max_epu32() {
4809        let a = _mm256_set1_epi32(2);
4810        let b = _mm256_set1_epi32(4);
4811        let r = _mm256_max_epu32(a, b);
4812        assert_eq_m256i(r, b);
4813    }
4814
4815    #[simd_test(enable = "avx2")]
4816    unsafe fn test_mm256_max_epu8() {
4817        let a = _mm256_set1_epi8(2);
4818        let b = _mm256_set1_epi8(4);
4819        let r = _mm256_max_epu8(a, b);
4820        assert_eq_m256i(r, b);
4821    }
4822
4823    #[simd_test(enable = "avx2")]
4824    unsafe fn test_mm256_min_epi16() {
4825        let a = _mm256_set1_epi16(2);
4826        let b = _mm256_set1_epi16(4);
4827        let r = _mm256_min_epi16(a, b);
4828        assert_eq_m256i(r, a);
4829    }
4830
4831    #[simd_test(enable = "avx2")]
4832    unsafe fn test_mm256_min_epi32() {
4833        let a = _mm256_set1_epi32(2);
4834        let b = _mm256_set1_epi32(4);
4835        let r = _mm256_min_epi32(a, b);
4836        assert_eq_m256i(r, a);
4837    }
4838
4839    #[simd_test(enable = "avx2")]
4840    unsafe fn test_mm256_min_epi8() {
4841        let a = _mm256_set1_epi8(2);
4842        let b = _mm256_set1_epi8(4);
4843        let r = _mm256_min_epi8(a, b);
4844        assert_eq_m256i(r, a);
4845    }
4846
4847    #[simd_test(enable = "avx2")]
4848    unsafe fn test_mm256_min_epu16() {
4849        let a = _mm256_set1_epi16(2);
4850        let b = _mm256_set1_epi16(4);
4851        let r = _mm256_min_epu16(a, b);
4852        assert_eq_m256i(r, a);
4853    }
4854
4855    #[simd_test(enable = "avx2")]
4856    unsafe fn test_mm256_min_epu32() {
4857        let a = _mm256_set1_epi32(2);
4858        let b = _mm256_set1_epi32(4);
4859        let r = _mm256_min_epu32(a, b);
4860        assert_eq_m256i(r, a);
4861    }
4862
4863    #[simd_test(enable = "avx2")]
4864    unsafe fn test_mm256_min_epu8() {
4865        let a = _mm256_set1_epi8(2);
4866        let b = _mm256_set1_epi8(4);
4867        let r = _mm256_min_epu8(a, b);
4868        assert_eq_m256i(r, a);
4869    }
4870
4871    #[simd_test(enable = "avx2")]
4872    unsafe fn test_mm256_movemask_epi8() {
4873        let a = _mm256_set1_epi8(-1);
4874        let r = _mm256_movemask_epi8(a);
4875        let e = -1;
4876        assert_eq!(r, e);
4877    }
4878
4879    #[simd_test(enable = "avx2")]
4880    unsafe fn test_mm256_mpsadbw_epu8() {
4881        let a = _mm256_set1_epi8(2);
4882        let b = _mm256_set1_epi8(4);
4883        let r = _mm256_mpsadbw_epu8::<0>(a, b);
4884        let e = _mm256_set1_epi16(8);
4885        assert_eq_m256i(r, e);
4886    }
4887
4888    #[simd_test(enable = "avx2")]
4889    unsafe fn test_mm256_mul_epi32() {
4890        let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4891        let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4892        let r = _mm256_mul_epi32(a, b);
4893        let e = _mm256_setr_epi64x(0, 0, 10, 14);
4894        assert_eq_m256i(r, e);
4895    }
4896
4897    #[simd_test(enable = "avx2")]
4898    unsafe fn test_mm256_mul_epu32() {
4899        let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4900        let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4901        let r = _mm256_mul_epu32(a, b);
4902        let e = _mm256_setr_epi64x(0, 0, 10, 14);
4903        assert_eq_m256i(r, e);
4904    }
4905
4906    #[simd_test(enable = "avx2")]
4907    unsafe fn test_mm256_mulhi_epi16() {
4908        let a = _mm256_set1_epi16(6535);
4909        let b = _mm256_set1_epi16(6535);
4910        let r = _mm256_mulhi_epi16(a, b);
4911        let e = _mm256_set1_epi16(651);
4912        assert_eq_m256i(r, e);
4913    }
4914
4915    #[simd_test(enable = "avx2")]
4916    unsafe fn test_mm256_mulhi_epu16() {
4917        let a = _mm256_set1_epi16(6535);
4918        let b = _mm256_set1_epi16(6535);
4919        let r = _mm256_mulhi_epu16(a, b);
4920        let e = _mm256_set1_epi16(651);
4921        assert_eq_m256i(r, e);
4922    }
4923
4924    #[simd_test(enable = "avx2")]
4925    unsafe fn test_mm256_mullo_epi16() {
4926        let a = _mm256_set1_epi16(2);
4927        let b = _mm256_set1_epi16(4);
4928        let r = _mm256_mullo_epi16(a, b);
4929        let e = _mm256_set1_epi16(8);
4930        assert_eq_m256i(r, e);
4931    }
4932
4933    #[simd_test(enable = "avx2")]
4934    unsafe fn test_mm256_mullo_epi32() {
4935        let a = _mm256_set1_epi32(2);
4936        let b = _mm256_set1_epi32(4);
4937        let r = _mm256_mullo_epi32(a, b);
4938        let e = _mm256_set1_epi32(8);
4939        assert_eq_m256i(r, e);
4940    }
4941
4942    #[simd_test(enable = "avx2")]
4943    unsafe fn test_mm256_mulhrs_epi16() {
4944        let a = _mm256_set1_epi16(2);
4945        let b = _mm256_set1_epi16(4);
4946        let r = _mm256_mullo_epi16(a, b);
4947        let e = _mm256_set1_epi16(8);
4948        assert_eq_m256i(r, e);
4949    }
4950
4951    #[simd_test(enable = "avx2")]
4952    unsafe fn test_mm256_or_si256() {
4953        let a = _mm256_set1_epi8(-1);
4954        let b = _mm256_set1_epi8(0);
4955        let r = _mm256_or_si256(a, b);
4956        assert_eq_m256i(r, a);
4957    }
4958
4959    #[simd_test(enable = "avx2")]
4960    unsafe fn test_mm256_packs_epi16() {
4961        let a = _mm256_set1_epi16(2);
4962        let b = _mm256_set1_epi16(4);
4963        let r = _mm256_packs_epi16(a, b);
4964        #[rustfmt::skip]
4965        let e = _mm256_setr_epi8(
4966            2, 2, 2, 2, 2, 2, 2, 2,
4967            4, 4, 4, 4, 4, 4, 4, 4,
4968            2, 2, 2, 2, 2, 2, 2, 2,
4969            4, 4, 4, 4, 4, 4, 4, 4,
4970        );
4971
4972        assert_eq_m256i(r, e);
4973    }
4974
4975    #[simd_test(enable = "avx2")]
4976    unsafe fn test_mm256_packs_epi32() {
4977        let a = _mm256_set1_epi32(2);
4978        let b = _mm256_set1_epi32(4);
4979        let r = _mm256_packs_epi32(a, b);
4980        let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4981
4982        assert_eq_m256i(r, e);
4983    }
4984
4985    #[simd_test(enable = "avx2")]
4986    unsafe fn test_mm256_packus_epi16() {
4987        let a = _mm256_set1_epi16(2);
4988        let b = _mm256_set1_epi16(4);
4989        let r = _mm256_packus_epi16(a, b);
4990        #[rustfmt::skip]
4991        let e = _mm256_setr_epi8(
4992            2, 2, 2, 2, 2, 2, 2, 2,
4993            4, 4, 4, 4, 4, 4, 4, 4,
4994            2, 2, 2, 2, 2, 2, 2, 2,
4995            4, 4, 4, 4, 4, 4, 4, 4,
4996        );
4997
4998        assert_eq_m256i(r, e);
4999    }
5000
5001    #[simd_test(enable = "avx2")]
5002    unsafe fn test_mm256_packus_epi32() {
5003        let a = _mm256_set1_epi32(2);
5004        let b = _mm256_set1_epi32(4);
5005        let r = _mm256_packus_epi32(a, b);
5006        let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
5007
5008        assert_eq_m256i(r, e);
5009    }
5010
5011    #[simd_test(enable = "avx2")]
5012    unsafe fn test_mm256_sad_epu8() {
5013        let a = _mm256_set1_epi8(2);
5014        let b = _mm256_set1_epi8(4);
5015        let r = _mm256_sad_epu8(a, b);
5016        let e = _mm256_set1_epi64x(16);
5017        assert_eq_m256i(r, e);
5018    }
5019
5020    #[simd_test(enable = "avx2")]
5021    unsafe fn test_mm256_shufflehi_epi16() {
5022        #[rustfmt::skip]
5023        let a = _mm256_setr_epi16(
5024            0, 1, 2, 3, 11, 22, 33, 44,
5025            4, 5, 6, 7, 55, 66, 77, 88,
5026        );
5027        #[rustfmt::skip]
5028        let e = _mm256_setr_epi16(
5029            0, 1, 2, 3, 44, 22, 22, 11,
5030            4, 5, 6, 7, 88, 66, 66, 55,
5031        );
5032        let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
5033        assert_eq_m256i(r, e);
5034    }
5035
5036    #[simd_test(enable = "avx2")]
5037    unsafe fn test_mm256_shufflelo_epi16() {
5038        #[rustfmt::skip]
5039        let a = _mm256_setr_epi16(
5040            11, 22, 33, 44, 0, 1, 2, 3,
5041            55, 66, 77, 88, 4, 5, 6, 7,
5042        );
5043        #[rustfmt::skip]
5044        let e = _mm256_setr_epi16(
5045            44, 22, 22, 11, 0, 1, 2, 3,
5046            88, 66, 66, 55, 4, 5, 6, 7,
5047        );
5048        let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
5049        assert_eq_m256i(r, e);
5050    }
5051
5052    #[simd_test(enable = "avx2")]
5053    unsafe fn test_mm256_sign_epi16() {
5054        let a = _mm256_set1_epi16(2);
5055        let b = _mm256_set1_epi16(-1);
5056        let r = _mm256_sign_epi16(a, b);
5057        let e = _mm256_set1_epi16(-2);
5058        assert_eq_m256i(r, e);
5059    }
5060
5061    #[simd_test(enable = "avx2")]
5062    unsafe fn test_mm256_sign_epi32() {
5063        let a = _mm256_set1_epi32(2);
5064        let b = _mm256_set1_epi32(-1);
5065        let r = _mm256_sign_epi32(a, b);
5066        let e = _mm256_set1_epi32(-2);
5067        assert_eq_m256i(r, e);
5068    }
5069
5070    #[simd_test(enable = "avx2")]
5071    unsafe fn test_mm256_sign_epi8() {
5072        let a = _mm256_set1_epi8(2);
5073        let b = _mm256_set1_epi8(-1);
5074        let r = _mm256_sign_epi8(a, b);
5075        let e = _mm256_set1_epi8(-2);
5076        assert_eq_m256i(r, e);
5077    }
5078
5079    #[simd_test(enable = "avx2")]
5080    unsafe fn test_mm256_sll_epi16() {
5081        let a = _mm256_set1_epi16(0xFF);
5082        let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5083        let r = _mm256_sll_epi16(a, b);
5084        assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
5085    }
5086
5087    #[simd_test(enable = "avx2")]
5088    unsafe fn test_mm256_sll_epi32() {
5089        let a = _mm256_set1_epi32(0xFFFF);
5090        let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5091        let r = _mm256_sll_epi32(a, b);
5092        assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
5093    }
5094
5095    #[simd_test(enable = "avx2")]
5096    unsafe fn test_mm256_sll_epi64() {
5097        let a = _mm256_set1_epi64x(0xFFFFFFFF);
5098        let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
5099        let r = _mm256_sll_epi64(a, b);
5100        assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
5101    }
5102
5103    #[simd_test(enable = "avx2")]
5104    unsafe fn test_mm256_slli_epi16() {
5105        assert_eq_m256i(
5106            _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5107            _mm256_set1_epi16(0xFF0),
5108        );
5109    }
5110
5111    #[simd_test(enable = "avx2")]
5112    unsafe fn test_mm256_slli_epi32() {
5113        assert_eq_m256i(
5114            _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5115            _mm256_set1_epi32(0xFFFF0),
5116        );
5117    }
5118
5119    #[simd_test(enable = "avx2")]
5120    unsafe fn test_mm256_slli_epi64() {
5121        assert_eq_m256i(
5122            _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5123            _mm256_set1_epi64x(0xFFFFFFFF0),
5124        );
5125    }
5126
5127    #[simd_test(enable = "avx2")]
5128    unsafe fn test_mm256_slli_si256() {
5129        let a = _mm256_set1_epi64x(0xFFFFFFFF);
5130        let r = _mm256_slli_si256::<3>(a);
5131        assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
5132    }
5133
5134    #[simd_test(enable = "avx2")]
5135    unsafe fn test_mm_sllv_epi32() {
5136        let a = _mm_set1_epi32(2);
5137        let b = _mm_set1_epi32(1);
5138        let r = _mm_sllv_epi32(a, b);
5139        let e = _mm_set1_epi32(4);
5140        assert_eq_m128i(r, e);
5141    }
5142
5143    #[simd_test(enable = "avx2")]
5144    unsafe fn test_mm256_sllv_epi32() {
5145        let a = _mm256_set1_epi32(2);
5146        let b = _mm256_set1_epi32(1);
5147        let r = _mm256_sllv_epi32(a, b);
5148        let e = _mm256_set1_epi32(4);
5149        assert_eq_m256i(r, e);
5150    }
5151
5152    #[simd_test(enable = "avx2")]
5153    unsafe fn test_mm_sllv_epi64() {
5154        let a = _mm_set1_epi64x(2);
5155        let b = _mm_set1_epi64x(1);
5156        let r = _mm_sllv_epi64(a, b);
5157        let e = _mm_set1_epi64x(4);
5158        assert_eq_m128i(r, e);
5159    }
5160
5161    #[simd_test(enable = "avx2")]
5162    unsafe fn test_mm256_sllv_epi64() {
5163        let a = _mm256_set1_epi64x(2);
5164        let b = _mm256_set1_epi64x(1);
5165        let r = _mm256_sllv_epi64(a, b);
5166        let e = _mm256_set1_epi64x(4);
5167        assert_eq_m256i(r, e);
5168    }
5169
5170    #[simd_test(enable = "avx2")]
5171    unsafe fn test_mm256_sra_epi16() {
5172        let a = _mm256_set1_epi16(-1);
5173        let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5174        let r = _mm256_sra_epi16(a, b);
5175        assert_eq_m256i(r, _mm256_set1_epi16(-1));
5176    }
5177
5178    #[simd_test(enable = "avx2")]
5179    unsafe fn test_mm256_sra_epi32() {
5180        let a = _mm256_set1_epi32(-1);
5181        let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5182        let r = _mm256_sra_epi32(a, b);
5183        assert_eq_m256i(r, _mm256_set1_epi32(-1));
5184    }
5185
5186    #[simd_test(enable = "avx2")]
5187    unsafe fn test_mm256_srai_epi16() {
5188        assert_eq_m256i(
5189            _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5190            _mm256_set1_epi16(-1),
5191        );
5192    }
5193
5194    #[simd_test(enable = "avx2")]
5195    unsafe fn test_mm256_srai_epi32() {
5196        assert_eq_m256i(
5197            _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5198            _mm256_set1_epi32(-1),
5199        );
5200    }
5201
5202    #[simd_test(enable = "avx2")]
5203    unsafe fn test_mm_srav_epi32() {
5204        let a = _mm_set1_epi32(4);
5205        let count = _mm_set1_epi32(1);
5206        let r = _mm_srav_epi32(a, count);
5207        let e = _mm_set1_epi32(2);
5208        assert_eq_m128i(r, e);
5209    }
5210
5211    #[simd_test(enable = "avx2")]
5212    unsafe fn test_mm256_srav_epi32() {
5213        let a = _mm256_set1_epi32(4);
5214        let count = _mm256_set1_epi32(1);
5215        let r = _mm256_srav_epi32(a, count);
5216        let e = _mm256_set1_epi32(2);
5217        assert_eq_m256i(r, e);
5218    }
5219
5220    #[simd_test(enable = "avx2")]
5221    unsafe fn test_mm256_srli_si256() {
5222        #[rustfmt::skip]
5223        let a = _mm256_setr_epi8(
5224            1, 2, 3, 4, 5, 6, 7, 8,
5225            9, 10, 11, 12, 13, 14, 15, 16,
5226            17, 18, 19, 20, 21, 22, 23, 24,
5227            25, 26, 27, 28, 29, 30, 31, 32,
5228        );
5229        let r = _mm256_srli_si256::<3>(a);
5230        #[rustfmt::skip]
5231        let e = _mm256_setr_epi8(
5232            4, 5, 6, 7, 8, 9, 10, 11,
5233            12, 13, 14, 15, 16, 0, 0, 0,
5234            20, 21, 22, 23, 24, 25, 26, 27,
5235            28, 29, 30, 31, 32, 0, 0, 0,
5236        );
5237        assert_eq_m256i(r, e);
5238    }
5239
5240    #[simd_test(enable = "avx2")]
5241    unsafe fn test_mm256_srl_epi16() {
5242        let a = _mm256_set1_epi16(0xFF);
5243        let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5244        let r = _mm256_srl_epi16(a, b);
5245        assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5246    }
5247
5248    #[simd_test(enable = "avx2")]
5249    unsafe fn test_mm256_srl_epi32() {
5250        let a = _mm256_set1_epi32(0xFFFF);
5251        let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5252        let r = _mm256_srl_epi32(a, b);
5253        assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5254    }
5255
5256    #[simd_test(enable = "avx2")]
5257    unsafe fn test_mm256_srl_epi64() {
5258        let a = _mm256_set1_epi64x(0xFFFFFFFF);
5259        let b = _mm_setr_epi64x(4, 0);
5260        let r = _mm256_srl_epi64(a, b);
5261        assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5262    }
5263
5264    #[simd_test(enable = "avx2")]
5265    unsafe fn test_mm256_srli_epi16() {
5266        assert_eq_m256i(
5267            _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5268            _mm256_set1_epi16(0xF),
5269        );
5270    }
5271
5272    #[simd_test(enable = "avx2")]
5273    unsafe fn test_mm256_srli_epi32() {
5274        assert_eq_m256i(
5275            _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5276            _mm256_set1_epi32(0xFFF),
5277        );
5278    }
5279
5280    #[simd_test(enable = "avx2")]
5281    unsafe fn test_mm256_srli_epi64() {
5282        assert_eq_m256i(
5283            _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5284            _mm256_set1_epi64x(0xFFFFFFF),
5285        );
5286    }
5287
5288    #[simd_test(enable = "avx2")]
5289    unsafe fn test_mm_srlv_epi32() {
5290        let a = _mm_set1_epi32(2);
5291        let count = _mm_set1_epi32(1);
5292        let r = _mm_srlv_epi32(a, count);
5293        let e = _mm_set1_epi32(1);
5294        assert_eq_m128i(r, e);
5295    }
5296
5297    #[simd_test(enable = "avx2")]
5298    unsafe fn test_mm256_srlv_epi32() {
5299        let a = _mm256_set1_epi32(2);
5300        let count = _mm256_set1_epi32(1);
5301        let r = _mm256_srlv_epi32(a, count);
5302        let e = _mm256_set1_epi32(1);
5303        assert_eq_m256i(r, e);
5304    }
5305
5306    #[simd_test(enable = "avx2")]
5307    unsafe fn test_mm_srlv_epi64() {
5308        let a = _mm_set1_epi64x(2);
5309        let count = _mm_set1_epi64x(1);
5310        let r = _mm_srlv_epi64(a, count);
5311        let e = _mm_set1_epi64x(1);
5312        assert_eq_m128i(r, e);
5313    }
5314
5315    #[simd_test(enable = "avx2")]
5316    unsafe fn test_mm256_srlv_epi64() {
5317        let a = _mm256_set1_epi64x(2);
5318        let count = _mm256_set1_epi64x(1);
5319        let r = _mm256_srlv_epi64(a, count);
5320        let e = _mm256_set1_epi64x(1);
5321        assert_eq_m256i(r, e);
5322    }
5323
5324    #[simd_test(enable = "avx2")]
5325    unsafe fn test_mm256_stream_load_si256() {
5326        let a = _mm256_set_epi64x(5, 6, 7, 8);
5327        let r = _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _);
5328        assert_eq_m256i(a, r);
5329    }
5330
5331    #[simd_test(enable = "avx2")]
5332    unsafe fn test_mm256_sub_epi16() {
5333        let a = _mm256_set1_epi16(4);
5334        let b = _mm256_set1_epi16(2);
5335        let r = _mm256_sub_epi16(a, b);
5336        assert_eq_m256i(r, b);
5337    }
5338
5339    #[simd_test(enable = "avx2")]
5340    unsafe fn test_mm256_sub_epi32() {
5341        let a = _mm256_set1_epi32(4);
5342        let b = _mm256_set1_epi32(2);
5343        let r = _mm256_sub_epi32(a, b);
5344        assert_eq_m256i(r, b);
5345    }
5346
5347    #[simd_test(enable = "avx2")]
5348    unsafe fn test_mm256_sub_epi64() {
5349        let a = _mm256_set1_epi64x(4);
5350        let b = _mm256_set1_epi64x(2);
5351        let r = _mm256_sub_epi64(a, b);
5352        assert_eq_m256i(r, b);
5353    }
5354
5355    #[simd_test(enable = "avx2")]
5356    unsafe fn test_mm256_sub_epi8() {
5357        let a = _mm256_set1_epi8(4);
5358        let b = _mm256_set1_epi8(2);
5359        let r = _mm256_sub_epi8(a, b);
5360        assert_eq_m256i(r, b);
5361    }
5362
5363    #[simd_test(enable = "avx2")]
5364    unsafe fn test_mm256_subs_epi16() {
5365        let a = _mm256_set1_epi16(4);
5366        let b = _mm256_set1_epi16(2);
5367        let r = _mm256_subs_epi16(a, b);
5368        assert_eq_m256i(r, b);
5369    }
5370
5371    #[simd_test(enable = "avx2")]
5372    unsafe fn test_mm256_subs_epi8() {
5373        let a = _mm256_set1_epi8(4);
5374        let b = _mm256_set1_epi8(2);
5375        let r = _mm256_subs_epi8(a, b);
5376        assert_eq_m256i(r, b);
5377    }
5378
5379    #[simd_test(enable = "avx2")]
5380    unsafe fn test_mm256_subs_epu16() {
5381        let a = _mm256_set1_epi16(4);
5382        let b = _mm256_set1_epi16(2);
5383        let r = _mm256_subs_epu16(a, b);
5384        assert_eq_m256i(r, b);
5385    }
5386
5387    #[simd_test(enable = "avx2")]
5388    unsafe fn test_mm256_subs_epu8() {
5389        let a = _mm256_set1_epi8(4);
5390        let b = _mm256_set1_epi8(2);
5391        let r = _mm256_subs_epu8(a, b);
5392        assert_eq_m256i(r, b);
5393    }
5394
5395    #[simd_test(enable = "avx2")]
5396    unsafe fn test_mm256_xor_si256() {
5397        let a = _mm256_set1_epi8(5);
5398        let b = _mm256_set1_epi8(3);
5399        let r = _mm256_xor_si256(a, b);
5400        assert_eq_m256i(r, _mm256_set1_epi8(6));
5401    }
5402
5403    #[simd_test(enable = "avx2")]
5404    unsafe fn test_mm256_alignr_epi8() {
5405        #[rustfmt::skip]
5406        let a = _mm256_setr_epi8(
5407            1, 2, 3, 4, 5, 6, 7, 8,
5408            9, 10, 11, 12, 13, 14, 15, 16,
5409            17, 18, 19, 20, 21, 22, 23, 24,
5410            25, 26, 27, 28, 29, 30, 31, 32,
5411        );
5412        #[rustfmt::skip]
5413        let b = _mm256_setr_epi8(
5414            -1, -2, -3, -4, -5, -6, -7, -8,
5415            -9, -10, -11, -12, -13, -14, -15, -16,
5416            -17, -18, -19, -20, -21, -22, -23, -24,
5417            -25, -26, -27, -28, -29, -30, -31, -32,
5418        );
5419        let r = _mm256_alignr_epi8::<33>(a, b);
5420        assert_eq_m256i(r, _mm256_set1_epi8(0));
5421
5422        let r = _mm256_alignr_epi8::<17>(a, b);
5423        #[rustfmt::skip]
5424        let expected = _mm256_setr_epi8(
5425            2, 3, 4, 5, 6, 7, 8, 9,
5426            10, 11, 12, 13, 14, 15, 16, 0,
5427            18, 19, 20, 21, 22, 23, 24, 25,
5428            26, 27, 28, 29, 30, 31, 32, 0,
5429        );
5430        assert_eq_m256i(r, expected);
5431
5432        let r = _mm256_alignr_epi8::<4>(a, b);
5433        #[rustfmt::skip]
5434        let expected = _mm256_setr_epi8(
5435            -5, -6, -7, -8, -9, -10, -11, -12,
5436            -13, -14, -15, -16, 1, 2, 3, 4,
5437            -21, -22, -23, -24, -25, -26, -27, -28,
5438            -29, -30, -31, -32, 17, 18, 19, 20,
5439        );
5440        assert_eq_m256i(r, expected);
5441
5442        let r = _mm256_alignr_epi8::<15>(a, b);
5443        #[rustfmt::skip]
5444        let expected = _mm256_setr_epi8(
5445            -16, 1, 2, 3, 4, 5, 6, 7,
5446            8, 9, 10, 11, 12, 13, 14, 15,
5447            -32, 17, 18, 19, 20, 21, 22, 23,
5448            24, 25, 26, 27, 28, 29, 30, 31,
5449        );
5450        assert_eq_m256i(r, expected);
5451
5452        let r = _mm256_alignr_epi8::<0>(a, b);
5453        assert_eq_m256i(r, b);
5454
5455        let r = _mm256_alignr_epi8::<16>(a, b);
5456        assert_eq_m256i(r, a);
5457    }
5458
5459    #[simd_test(enable = "avx2")]
5460    unsafe fn test_mm256_shuffle_epi8() {
5461        #[rustfmt::skip]
5462        let a = _mm256_setr_epi8(
5463            1, 2, 3, 4, 5, 6, 7, 8,
5464            9, 10, 11, 12, 13, 14, 15, 16,
5465            17, 18, 19, 20, 21, 22, 23, 24,
5466            25, 26, 27, 28, 29, 30, 31, 32,
5467        );
5468        #[rustfmt::skip]
5469        let b = _mm256_setr_epi8(
5470            4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5471            12, 5, 5, 10, 4, 1, 8, 0,
5472            4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5473            12, 5, 5, 10, 4, 1, 8, 0,
5474        );
5475        #[rustfmt::skip]
5476        let expected = _mm256_setr_epi8(
5477            5, 0, 5, 4, 9, 13, 7, 4,
5478            13, 6, 6, 11, 5, 2, 9, 1,
5479            21, 0, 21, 20, 25, 29, 23, 20,
5480            29, 22, 22, 27, 21, 18, 25, 17,
5481        );
5482        let r = _mm256_shuffle_epi8(a, b);
5483        assert_eq_m256i(r, expected);
5484    }
5485
5486    #[simd_test(enable = "avx2")]
5487    unsafe fn test_mm256_permutevar8x32_epi32() {
5488        let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5489        let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5490        let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5491        let r = _mm256_permutevar8x32_epi32(a, b);
5492        assert_eq_m256i(r, expected);
5493    }
5494
5495    #[simd_test(enable = "avx2")]
5496    unsafe fn test_mm256_permute4x64_epi64() {
5497        let a = _mm256_setr_epi64x(100, 200, 300, 400);
5498        let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5499        let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5500        assert_eq_m256i(r, expected);
5501    }
5502
5503    #[simd_test(enable = "avx2")]
5504    unsafe fn test_mm256_permute2x128_si256() {
5505        let a = _mm256_setr_epi64x(100, 200, 500, 600);
5506        let b = _mm256_setr_epi64x(300, 400, 700, 800);
5507        let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5508        let e = _mm256_setr_epi64x(700, 800, 500, 600);
5509        assert_eq_m256i(r, e);
5510    }
5511
5512    #[simd_test(enable = "avx2")]
5513    unsafe fn test_mm256_permute4x64_pd() {
5514        let a = _mm256_setr_pd(1., 2., 3., 4.);
5515        let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5516        let e = _mm256_setr_pd(4., 1., 2., 1.);
5517        assert_eq_m256d(r, e);
5518    }
5519
5520    #[simd_test(enable = "avx2")]
5521    unsafe fn test_mm256_permutevar8x32_ps() {
5522        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5523        let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5524        let r = _mm256_permutevar8x32_ps(a, b);
5525        let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5526        assert_eq_m256(r, e);
5527    }
5528
5529    #[simd_test(enable = "avx2")]
5530    unsafe fn test_mm_i32gather_epi32() {
5531        let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5532        let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5534        assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5535    }
5536
5537    #[simd_test(enable = "avx2")]
5538    unsafe fn test_mm_mask_i32gather_epi32() {
5539        let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5540        let r = _mm_mask_i32gather_epi32::<4>(
5542            _mm_set1_epi32(256),
5543            arr.as_ptr(),
5544            _mm_setr_epi32(0, 16, 64, 96),
5545            _mm_setr_epi32(-1, -1, -1, 0),
5546        );
5547        assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5548    }
5549
5550    #[simd_test(enable = "avx2")]
5551    unsafe fn test_mm256_i32gather_epi32() {
5552        let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5553        let r =
5555            _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5556        assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5557    }
5558
5559    #[simd_test(enable = "avx2")]
5560    unsafe fn test_mm256_mask_i32gather_epi32() {
5561        let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5562        let r = _mm256_mask_i32gather_epi32::<4>(
5564            _mm256_set1_epi32(256),
5565            arr.as_ptr(),
5566            _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5567            _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5568        );
5569        assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5570    }
5571
5572    #[simd_test(enable = "avx2")]
5573    unsafe fn test_mm_i32gather_ps() {
5574        let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5575        let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5577        assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5578    }
5579
5580    #[simd_test(enable = "avx2")]
5581    unsafe fn test_mm_mask_i32gather_ps() {
5582        let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5583        let r = _mm_mask_i32gather_ps::<4>(
5585            _mm_set1_ps(256.0),
5586            arr.as_ptr(),
5587            _mm_setr_epi32(0, 16, 64, 96),
5588            _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5589        );
5590        assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5591    }
5592
5593    #[simd_test(enable = "avx2")]
5594    unsafe fn test_mm256_i32gather_ps() {
5595        let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5596        let r =
5598            _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5599        assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5600    }
5601
5602    #[simd_test(enable = "avx2")]
5603    unsafe fn test_mm256_mask_i32gather_ps() {
5604        let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5605        let r = _mm256_mask_i32gather_ps::<4>(
5607            _mm256_set1_ps(256.0),
5608            arr.as_ptr(),
5609            _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5610            _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5611        );
5612        assert_eq_m256(
5613            r,
5614            _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5615        );
5616    }
5617
5618    #[simd_test(enable = "avx2")]
5619    unsafe fn test_mm_i32gather_epi64() {
5620        let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5621        let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5623        assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5624    }
5625
5626    #[simd_test(enable = "avx2")]
5627    unsafe fn test_mm_mask_i32gather_epi64() {
5628        let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5629        let r = _mm_mask_i32gather_epi64::<8>(
5631            _mm_set1_epi64x(256),
5632            arr.as_ptr(),
5633            _mm_setr_epi32(16, 16, 16, 16),
5634            _mm_setr_epi64x(-1, 0),
5635        );
5636        assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5637    }
5638
5639    #[simd_test(enable = "avx2")]
5640    unsafe fn test_mm256_i32gather_epi64() {
5641        let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5642        let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5644        assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5645    }
5646
5647    #[simd_test(enable = "avx2")]
5648    unsafe fn test_mm256_mask_i32gather_epi64() {
5649        let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5650        let r = _mm256_mask_i32gather_epi64::<8>(
5652            _mm256_set1_epi64x(256),
5653            arr.as_ptr(),
5654            _mm_setr_epi32(0, 16, 64, 96),
5655            _mm256_setr_epi64x(-1, -1, -1, 0),
5656        );
5657        assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5658    }
5659
5660    #[simd_test(enable = "avx2")]
5661    unsafe fn test_mm_i32gather_pd() {
5662        let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5663        let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5665        assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5666    }
5667
5668    #[simd_test(enable = "avx2")]
5669    unsafe fn test_mm_mask_i32gather_pd() {
5670        let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5671        let r = _mm_mask_i32gather_pd::<8>(
5673            _mm_set1_pd(256.0),
5674            arr.as_ptr(),
5675            _mm_setr_epi32(16, 16, 16, 16),
5676            _mm_setr_pd(-1.0, 0.0),
5677        );
5678        assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5679    }
5680
5681    #[simd_test(enable = "avx2")]
5682    unsafe fn test_mm256_i32gather_pd() {
5683        let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5684        let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5686        assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5687    }
5688
5689    #[simd_test(enable = "avx2")]
5690    unsafe fn test_mm256_mask_i32gather_pd() {
5691        let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5692        let r = _mm256_mask_i32gather_pd::<8>(
5694            _mm256_set1_pd(256.0),
5695            arr.as_ptr(),
5696            _mm_setr_epi32(0, 16, 64, 96),
5697            _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5698        );
5699        assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5700    }
5701
5702    #[simd_test(enable = "avx2")]
5703    unsafe fn test_mm_i64gather_epi32() {
5704        let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5705        let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5707        assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5708    }
5709
5710    #[simd_test(enable = "avx2")]
5711    unsafe fn test_mm_mask_i64gather_epi32() {
5712        let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5713        let r = _mm_mask_i64gather_epi32::<4>(
5715            _mm_set1_epi32(256),
5716            arr.as_ptr(),
5717            _mm_setr_epi64x(0, 16),
5718            _mm_setr_epi32(-1, 0, -1, 0),
5719        );
5720        assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5721    }
5722
5723    #[simd_test(enable = "avx2")]
5724    unsafe fn test_mm256_i64gather_epi32() {
5725        let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5726        let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5728        assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5729    }
5730
5731    #[simd_test(enable = "avx2")]
5732    unsafe fn test_mm256_mask_i64gather_epi32() {
5733        let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5734        let r = _mm256_mask_i64gather_epi32::<4>(
5736            _mm_set1_epi32(256),
5737            arr.as_ptr(),
5738            _mm256_setr_epi64x(0, 16, 64, 96),
5739            _mm_setr_epi32(-1, -1, -1, 0),
5740        );
5741        assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5742    }
5743
5744    #[simd_test(enable = "avx2")]
5745    unsafe fn test_mm_i64gather_ps() {
5746        let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5747        let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5749        assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5750    }
5751
5752    #[simd_test(enable = "avx2")]
5753    unsafe fn test_mm_mask_i64gather_ps() {
5754        let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5755        let r = _mm_mask_i64gather_ps::<4>(
5757            _mm_set1_ps(256.0),
5758            arr.as_ptr(),
5759            _mm_setr_epi64x(0, 16),
5760            _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5761        );
5762        assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5763    }
5764
5765    #[simd_test(enable = "avx2")]
5766    unsafe fn test_mm256_i64gather_ps() {
5767        let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5768        let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5770        assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5771    }
5772
5773    #[simd_test(enable = "avx2")]
5774    unsafe fn test_mm256_mask_i64gather_ps() {
5775        let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5776        let r = _mm256_mask_i64gather_ps::<4>(
5778            _mm_set1_ps(256.0),
5779            arr.as_ptr(),
5780            _mm256_setr_epi64x(0, 16, 64, 96),
5781            _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5782        );
5783        assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5784    }
5785
5786    #[simd_test(enable = "avx2")]
5787    unsafe fn test_mm_i64gather_epi64() {
5788        let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5789        let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5791        assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5792    }
5793
5794    #[simd_test(enable = "avx2")]
5795    unsafe fn test_mm_mask_i64gather_epi64() {
5796        let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5797        let r = _mm_mask_i64gather_epi64::<8>(
5799            _mm_set1_epi64x(256),
5800            arr.as_ptr(),
5801            _mm_setr_epi64x(16, 16),
5802            _mm_setr_epi64x(-1, 0),
5803        );
5804        assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5805    }
5806
5807    #[simd_test(enable = "avx2")]
5808    unsafe fn test_mm256_i64gather_epi64() {
5809        let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5810        let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5812        assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5813    }
5814
5815    #[simd_test(enable = "avx2")]
5816    unsafe fn test_mm256_mask_i64gather_epi64() {
5817        let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5818        let r = _mm256_mask_i64gather_epi64::<8>(
5820            _mm256_set1_epi64x(256),
5821            arr.as_ptr(),
5822            _mm256_setr_epi64x(0, 16, 64, 96),
5823            _mm256_setr_epi64x(-1, -1, -1, 0),
5824        );
5825        assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5826    }
5827
5828    #[simd_test(enable = "avx2")]
5829    unsafe fn test_mm_i64gather_pd() {
5830        let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5831        let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5833        assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5834    }
5835
5836    #[simd_test(enable = "avx2")]
5837    unsafe fn test_mm_mask_i64gather_pd() {
5838        let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5839        let r = _mm_mask_i64gather_pd::<8>(
5841            _mm_set1_pd(256.0),
5842            arr.as_ptr(),
5843            _mm_setr_epi64x(16, 16),
5844            _mm_setr_pd(-1.0, 0.0),
5845        );
5846        assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5847    }
5848
5849    #[simd_test(enable = "avx2")]
5850    unsafe fn test_mm256_i64gather_pd() {
5851        let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5852        let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5854        assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5855    }
5856
5857    #[simd_test(enable = "avx2")]
5858    unsafe fn test_mm256_mask_i64gather_pd() {
5859        let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5860        let r = _mm256_mask_i64gather_pd::<8>(
5862            _mm256_set1_pd(256.0),
5863            arr.as_ptr(),
5864            _mm256_setr_epi64x(0, 16, 64, 96),
5865            _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5866        );
5867        assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5868    }
5869
5870    #[simd_test(enable = "avx")]
5871    unsafe fn test_mm256_extract_epi8() {
5872        #[rustfmt::skip]
5873        let a = _mm256_setr_epi8(
5874            -1, 1, 2, 3, 4, 5, 6, 7,
5875            8, 9, 10, 11, 12, 13, 14, 15,
5876            16, 17, 18, 19, 20, 21, 22, 23,
5877            24, 25, 26, 27, 28, 29, 30, 31
5878        );
5879        let r1 = _mm256_extract_epi8::<0>(a);
5880        let r2 = _mm256_extract_epi8::<3>(a);
5881        assert_eq!(r1, 0xFF);
5882        assert_eq!(r2, 3);
5883    }
5884
5885    #[simd_test(enable = "avx2")]
5886    unsafe fn test_mm256_extract_epi16() {
5887        #[rustfmt::skip]
5888        let a = _mm256_setr_epi16(
5889            -1, 1, 2, 3, 4, 5, 6, 7,
5890            8, 9, 10, 11, 12, 13, 14, 15,
5891        );
5892        let r1 = _mm256_extract_epi16::<0>(a);
5893        let r2 = _mm256_extract_epi16::<3>(a);
5894        assert_eq!(r1, 0xFFFF);
5895        assert_eq!(r2, 3);
5896    }
5897}