1use crate::{
4    core_arch::{simd::*, x86::*},
5    intrinsics::simd::*,
6};
7
8#[cfg(test)]
9use stdarch_test::assert_instr;
10
11#[inline]
16#[target_feature(enable = "ssse3")]
17#[cfg_attr(test, assert_instr(pabsb))]
18#[stable(feature = "simd_x86", since = "1.27.0")]
19pub fn _mm_abs_epi8(a: __m128i) -> __m128i {
20    unsafe {
21        let a = a.as_i8x16();
22        let zero = i8x16::ZERO;
23        let r = simd_select::<m8x16, _>(simd_lt(a, zero), simd_neg(a), a);
24        transmute(r)
25    }
26}
27
28#[inline]
34#[target_feature(enable = "ssse3")]
35#[cfg_attr(test, assert_instr(pabsw))]
36#[stable(feature = "simd_x86", since = "1.27.0")]
37pub fn _mm_abs_epi16(a: __m128i) -> __m128i {
38    unsafe {
39        let a = a.as_i16x8();
40        let zero = i16x8::ZERO;
41        let r = simd_select::<m16x8, _>(simd_lt(a, zero), simd_neg(a), a);
42        transmute(r)
43    }
44}
45
46#[inline]
52#[target_feature(enable = "ssse3")]
53#[cfg_attr(test, assert_instr(pabsd))]
54#[stable(feature = "simd_x86", since = "1.27.0")]
55pub fn _mm_abs_epi32(a: __m128i) -> __m128i {
56    unsafe {
57        let a = a.as_i32x4();
58        let zero = i32x4::ZERO;
59        let r = simd_select::<m32x4, _>(simd_lt(a, zero), simd_neg(a), a);
60        transmute(r)
61    }
62}
63
64#[inline]
91#[target_feature(enable = "ssse3")]
92#[cfg_attr(test, assert_instr(pshufb))]
93#[stable(feature = "simd_x86", since = "1.27.0")]
94pub fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
95    unsafe { transmute(pshufb128(a.as_u8x16(), b.as_u8x16())) }
96}
97
98#[inline]
103#[target_feature(enable = "ssse3")]
104#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
105#[rustc_legacy_const_generics(2)]
106#[stable(feature = "simd_x86", since = "1.27.0")]
107pub fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
108    static_assert_uimm_bits!(IMM8, 8);
109    if IMM8 > 32 {
112        return _mm_setzero_si128();
113    }
114    let (a, b) = if IMM8 > 16 {
117        (_mm_setzero_si128(), a)
118    } else {
119        (a, b)
120    };
121    const fn mask(shift: u32, i: u32) -> u32 {
122        if shift > 32 {
123            i
125        } else if shift > 16 {
126            shift - 16 + i
127        } else {
128            shift + i
129        }
130    }
131    unsafe {
132        let r: i8x16 = simd_shuffle!(
133            b.as_i8x16(),
134            a.as_i8x16(),
135            [
136                mask(IMM8 as u32, 0),
137                mask(IMM8 as u32, 1),
138                mask(IMM8 as u32, 2),
139                mask(IMM8 as u32, 3),
140                mask(IMM8 as u32, 4),
141                mask(IMM8 as u32, 5),
142                mask(IMM8 as u32, 6),
143                mask(IMM8 as u32, 7),
144                mask(IMM8 as u32, 8),
145                mask(IMM8 as u32, 9),
146                mask(IMM8 as u32, 10),
147                mask(IMM8 as u32, 11),
148                mask(IMM8 as u32, 12),
149                mask(IMM8 as u32, 13),
150                mask(IMM8 as u32, 14),
151                mask(IMM8 as u32, 15),
152            ],
153        );
154        transmute(r)
155    }
156}
157
158#[inline]
163#[target_feature(enable = "ssse3")]
164#[cfg_attr(test, assert_instr(phaddw))]
165#[stable(feature = "simd_x86", since = "1.27.0")]
166pub fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
167    unsafe { transmute(phaddw128(a.as_i16x8(), b.as_i16x8())) }
168}
169
170#[inline]
176#[target_feature(enable = "ssse3")]
177#[cfg_attr(test, assert_instr(phaddsw))]
178#[stable(feature = "simd_x86", since = "1.27.0")]
179pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
180    unsafe { transmute(phaddsw128(a.as_i16x8(), b.as_i16x8())) }
181}
182
183#[inline]
188#[target_feature(enable = "ssse3")]
189#[cfg_attr(test, assert_instr(phaddd))]
190#[stable(feature = "simd_x86", since = "1.27.0")]
191pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
192    unsafe { transmute(phaddd128(a.as_i32x4(), b.as_i32x4())) }
193}
194
195#[inline]
200#[target_feature(enable = "ssse3")]
201#[cfg_attr(test, assert_instr(phsubw))]
202#[stable(feature = "simd_x86", since = "1.27.0")]
203pub fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
204    unsafe { transmute(phsubw128(a.as_i16x8(), b.as_i16x8())) }
205}
206
207#[inline]
214#[target_feature(enable = "ssse3")]
215#[cfg_attr(test, assert_instr(phsubsw))]
216#[stable(feature = "simd_x86", since = "1.27.0")]
217pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
218    unsafe { transmute(phsubsw128(a.as_i16x8(), b.as_i16x8())) }
219}
220
221#[inline]
226#[target_feature(enable = "ssse3")]
227#[cfg_attr(test, assert_instr(phsubd))]
228#[stable(feature = "simd_x86", since = "1.27.0")]
229pub fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
230    unsafe { transmute(phsubd128(a.as_i32x4(), b.as_i32x4())) }
231}
232
233#[inline]
241#[target_feature(enable = "ssse3")]
242#[cfg_attr(test, assert_instr(pmaddubsw))]
243#[stable(feature = "simd_x86", since = "1.27.0")]
244pub fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
245    unsafe { transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16())) }
246}
247
248#[inline]
254#[target_feature(enable = "ssse3")]
255#[cfg_attr(test, assert_instr(pmulhrsw))]
256#[stable(feature = "simd_x86", since = "1.27.0")]
257pub fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
258    unsafe { transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8())) }
259}
260
261#[inline]
268#[target_feature(enable = "ssse3")]
269#[cfg_attr(test, assert_instr(psignb))]
270#[stable(feature = "simd_x86", since = "1.27.0")]
271pub fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
272    unsafe { transmute(psignb128(a.as_i8x16(), b.as_i8x16())) }
273}
274
275#[inline]
282#[target_feature(enable = "ssse3")]
283#[cfg_attr(test, assert_instr(psignw))]
284#[stable(feature = "simd_x86", since = "1.27.0")]
285pub fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
286    unsafe { transmute(psignw128(a.as_i16x8(), b.as_i16x8())) }
287}
288
289#[inline]
296#[target_feature(enable = "ssse3")]
297#[cfg_attr(test, assert_instr(psignd))]
298#[stable(feature = "simd_x86", since = "1.27.0")]
299pub fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
300    unsafe { transmute(psignd128(a.as_i32x4(), b.as_i32x4())) }
301}
302
303#[allow(improper_ctypes)]
304unsafe extern "C" {
305    #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
306    fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
307
308    #[link_name = "llvm.x86.ssse3.phadd.w.128"]
309    fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
310
311    #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
312    fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
313
314    #[link_name = "llvm.x86.ssse3.phadd.d.128"]
315    fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
316
317    #[link_name = "llvm.x86.ssse3.phsub.w.128"]
318    fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
319
320    #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
321    fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
322
323    #[link_name = "llvm.x86.ssse3.phsub.d.128"]
324    fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
325
326    #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
327    fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
328
329    #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
330    fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
331
332    #[link_name = "llvm.x86.ssse3.psign.b.128"]
333    fn psignb128(a: i8x16, b: i8x16) -> i8x16;
334
335    #[link_name = "llvm.x86.ssse3.psign.w.128"]
336    fn psignw128(a: i16x8, b: i16x8) -> i16x8;
337
338    #[link_name = "llvm.x86.ssse3.psign.d.128"]
339    fn psignd128(a: i32x4, b: i32x4) -> i32x4;
340}
341
342#[cfg(test)]
343mod tests {
344    use stdarch_test::simd_test;
345
346    use crate::core_arch::x86::*;
347
348    #[simd_test(enable = "ssse3")]
349    unsafe fn test_mm_abs_epi8() {
350        let r = _mm_abs_epi8(_mm_set1_epi8(-5));
351        assert_eq_m128i(r, _mm_set1_epi8(5));
352    }
353
354    #[simd_test(enable = "ssse3")]
355    unsafe fn test_mm_abs_epi16() {
356        let r = _mm_abs_epi16(_mm_set1_epi16(-5));
357        assert_eq_m128i(r, _mm_set1_epi16(5));
358    }
359
360    #[simd_test(enable = "ssse3")]
361    unsafe fn test_mm_abs_epi32() {
362        let r = _mm_abs_epi32(_mm_set1_epi32(-5));
363        assert_eq_m128i(r, _mm_set1_epi32(5));
364    }
365
366    #[simd_test(enable = "ssse3")]
367    unsafe fn test_mm_shuffle_epi8() {
368        #[rustfmt::skip]
369        let a = _mm_setr_epi8(
370            1, 2, 3, 4, 5, 6, 7, 8,
371            9, 10, 11, 12, 13, 14, 15, 16,
372        );
373        #[rustfmt::skip]
374        let b = _mm_setr_epi8(
375            4, 128_u8 as i8, 4, 3,
376            24, 12, 6, 19,
377            12, 5, 5, 10,
378            4, 1, 8, 0,
379        );
380        let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
381        let r = _mm_shuffle_epi8(a, b);
382        assert_eq_m128i(r, expected);
383
384        let b = _mm_add_epi8(b, _mm_set1_epi8(32));
386        let r = _mm_shuffle_epi8(a, b);
387        assert_eq_m128i(r, expected);
388    }
389
390    #[simd_test(enable = "ssse3")]
391    unsafe fn test_mm_alignr_epi8() {
392        #[rustfmt::skip]
393        let a = _mm_setr_epi8(
394            1, 2, 3, 4, 5, 6, 7, 8,
395            9, 10, 11, 12, 13, 14, 15, 16,
396        );
397        #[rustfmt::skip]
398        let b = _mm_setr_epi8(
399            4, 63, 4, 3,
400            24, 12, 6, 19,
401            12, 5, 5, 10,
402            4, 1, 8, 0,
403        );
404        let r = _mm_alignr_epi8::<33>(a, b);
405        assert_eq_m128i(r, _mm_set1_epi8(0));
406
407        let r = _mm_alignr_epi8::<17>(a, b);
408        #[rustfmt::skip]
409        let expected = _mm_setr_epi8(
410            2, 3, 4, 5, 6, 7, 8, 9,
411            10, 11, 12, 13, 14, 15, 16, 0,
412        );
413        assert_eq_m128i(r, expected);
414
415        let r = _mm_alignr_epi8::<16>(a, b);
416        assert_eq_m128i(r, a);
417
418        let r = _mm_alignr_epi8::<15>(a, b);
419        #[rustfmt::skip]
420        let expected = _mm_setr_epi8(
421            0, 1, 2, 3, 4, 5, 6, 7,
422            8, 9, 10, 11, 12, 13, 14, 15,
423        );
424        assert_eq_m128i(r, expected);
425
426        let r = _mm_alignr_epi8::<0>(a, b);
427        assert_eq_m128i(r, b);
428    }
429
430    #[simd_test(enable = "ssse3")]
431    unsafe fn test_mm_hadd_epi16() {
432        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
433        let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
434        let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
435        let r = _mm_hadd_epi16(a, b);
436        assert_eq_m128i(r, expected);
437
438        let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
440        let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
441        let expected = _mm_setr_epi16(
442            i16::MIN,
443            i16::MIN + 1,
444            i16::MIN + 2,
445            i16::MIN + 3,
446            i16::MAX,
447            i16::MAX - 1,
448            i16::MAX - 2,
449            i16::MAX - 3,
450        );
451        let r = _mm_hadd_epi16(a, b);
452        assert_eq_m128i(r, expected);
453    }
454
455    #[simd_test(enable = "ssse3")]
456    unsafe fn test_mm_hadds_epi16() {
457        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
458        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
459        let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
460        let r = _mm_hadds_epi16(a, b);
461        assert_eq_m128i(r, expected);
462
463        let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
465        let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
466        let expected = _mm_setr_epi16(
467            i16::MAX,
468            i16::MAX,
469            i16::MAX,
470            i16::MAX,
471            i16::MIN,
472            i16::MIN,
473            i16::MIN,
474            i16::MIN,
475        );
476        let r = _mm_hadds_epi16(a, b);
477        assert_eq_m128i(r, expected);
478    }
479
480    #[simd_test(enable = "ssse3")]
481    unsafe fn test_mm_hadd_epi32() {
482        let a = _mm_setr_epi32(1, 2, 3, 4);
483        let b = _mm_setr_epi32(4, 128, 4, 3);
484        let expected = _mm_setr_epi32(3, 7, 132, 7);
485        let r = _mm_hadd_epi32(a, b);
486        assert_eq_m128i(r, expected);
487
488        let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2);
490        let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2);
491        let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
492        let r = _mm_hadd_epi32(a, b);
493        assert_eq_m128i(r, expected);
494    }
495
496    #[simd_test(enable = "ssse3")]
497    unsafe fn test_mm_hsub_epi16() {
498        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
499        let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
500        let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
501        let r = _mm_hsub_epi16(a, b);
502        assert_eq_m128i(r, expected);
503
504        let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
506        let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
507        let expected = _mm_setr_epi16(
508            i16::MIN,
509            i16::MIN + 1,
510            i16::MIN + 2,
511            i16::MIN + 3,
512            i16::MAX,
513            i16::MAX - 1,
514            i16::MAX - 2,
515            i16::MAX - 3,
516        );
517        let r = _mm_hsub_epi16(a, b);
518        assert_eq_m128i(r, expected);
519    }
520
521    #[simd_test(enable = "ssse3")]
522    unsafe fn test_mm_hsubs_epi16() {
523        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
524        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
525        let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
526        let r = _mm_hsubs_epi16(a, b);
527        assert_eq_m128i(r, expected);
528
529        let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
531        let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
532        let expected = _mm_setr_epi16(
533            i16::MAX,
534            i16::MAX,
535            i16::MAX,
536            i16::MAX,
537            i16::MIN,
538            i16::MIN,
539            i16::MIN,
540            i16::MIN,
541        );
542        let r = _mm_hsubs_epi16(a, b);
543        assert_eq_m128i(r, expected);
544    }
545
546    #[simd_test(enable = "ssse3")]
547    unsafe fn test_mm_hsub_epi32() {
548        let a = _mm_setr_epi32(1, 2, 3, 4);
549        let b = _mm_setr_epi32(4, 128, 4, 3);
550        let expected = _mm_setr_epi32(-1, -1, -124, 1);
551        let r = _mm_hsub_epi32(a, b);
552        assert_eq_m128i(r, expected);
553
554        let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2);
556        let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2);
557        let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
558        let r = _mm_hsub_epi32(a, b);
559        assert_eq_m128i(r, expected);
560    }
561
562    #[simd_test(enable = "ssse3")]
563    unsafe fn test_mm_maddubs_epi16() {
564        #[rustfmt::skip]
565        let a = _mm_setr_epi8(
566            1, 2, 3, 4, 5, 6, 7, 8,
567            9, 10, 11, 12, 13, 14, 15, 16,
568        );
569        #[rustfmt::skip]
570        let b = _mm_setr_epi8(
571            4, 63, 4, 3,
572            24, 12, 6, 19,
573            12, 5, 5, 10,
574            4, 1, 8, 0,
575        );
576        let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
577        let r = _mm_maddubs_epi16(a, b);
578        assert_eq_m128i(r, expected);
579
580        #[rustfmt::skip]
582        let a = _mm_setr_epi8(
583            u8::MAX as i8, u8::MAX as i8,
584            u8::MAX as i8, u8::MAX as i8,
585            u8::MAX as i8, u8::MAX as i8,
586            100, 100, 0, 0,
587            0, 0, 0, 0, 0, 0,
588        );
589        #[rustfmt::skip]
590        let b = _mm_setr_epi8(
591            i8::MAX, i8::MAX,
592            i8::MAX, i8::MIN,
593            i8::MIN, i8::MIN,
594            50, 15, 0, 0, 0,
595            0, 0, 0, 0, 0,
596        );
597        let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0);
598        let r = _mm_maddubs_epi16(a, b);
599        assert_eq_m128i(r, expected);
600    }
601
602    #[simd_test(enable = "ssse3")]
603    unsafe fn test_mm_mulhrs_epi16() {
604        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
605        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
606        let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
607        let r = _mm_mulhrs_epi16(a, b);
608        assert_eq_m128i(r, expected);
609
610        let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0);
612        let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0);
613        let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0);
614        let r = _mm_mulhrs_epi16(a, b);
615        assert_eq_m128i(r, expected);
616    }
617
618    #[simd_test(enable = "ssse3")]
619    unsafe fn test_mm_sign_epi8() {
620        #[rustfmt::skip]
621        let a = _mm_setr_epi8(
622            1, 2, 3, 4, 5, 6, 7, 8,
623            9, 10, 11, 12, 13, -14, -15, 16,
624        );
625        #[rustfmt::skip]
626        let b = _mm_setr_epi8(
627            4, 63, -4, 3, 24, 12, -6, -19,
628            12, 5, -5, 10, 4, 1, -8, 0,
629        );
630        #[rustfmt::skip]
631        let expected = _mm_setr_epi8(
632            1, 2, -3, 4, 5, 6, -7, -8,
633            9, 10, -11, 12, 13, -14, 15, 0,
634        );
635        let r = _mm_sign_epi8(a, b);
636        assert_eq_m128i(r, expected);
637    }
638
639    #[simd_test(enable = "ssse3")]
640    unsafe fn test_mm_sign_epi16() {
641        let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
642        let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
643        let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
644        let r = _mm_sign_epi16(a, b);
645        assert_eq_m128i(r, expected);
646    }
647
648    #[simd_test(enable = "ssse3")]
649    unsafe fn test_mm_sign_epi32() {
650        let a = _mm_setr_epi32(-1, 2, 3, 4);
651        let b = _mm_setr_epi32(1, -1, 1, 0);
652        let expected = _mm_setr_epi32(-1, -2, 3, 0);
653        let r = _mm_sign_epi32(a, b);
654        assert_eq_m128i(r, expected);
655    }
656}