1use crate::{
4    core_arch::{simd::*, x86::*},
5    intrinsics::simd::*,
6    intrinsics::sqrtf32,
7    mem, ptr,
8};
9
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13#[inline]
18#[target_feature(enable = "sse")]
19#[cfg_attr(test, assert_instr(addss))]
20#[stable(feature = "simd_x86", since = "1.27.0")]
21pub fn _mm_add_ss(a: __m128, b: __m128) -> __m128 {
22    unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) + _mm_cvtss_f32(b)) }
23}
24
25#[inline]
30#[target_feature(enable = "sse")]
31#[cfg_attr(test, assert_instr(addps))]
32#[stable(feature = "simd_x86", since = "1.27.0")]
33pub fn _mm_add_ps(a: __m128, b: __m128) -> __m128 {
34    unsafe { simd_add(a, b) }
35}
36
37#[inline]
42#[target_feature(enable = "sse")]
43#[cfg_attr(test, assert_instr(subss))]
44#[stable(feature = "simd_x86", since = "1.27.0")]
45pub fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 {
46    unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) - _mm_cvtss_f32(b)) }
47}
48
49#[inline]
54#[target_feature(enable = "sse")]
55#[cfg_attr(test, assert_instr(subps))]
56#[stable(feature = "simd_x86", since = "1.27.0")]
57pub fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 {
58    unsafe { simd_sub(a, b) }
59}
60
61#[inline]
66#[target_feature(enable = "sse")]
67#[cfg_attr(test, assert_instr(mulss))]
68#[stable(feature = "simd_x86", since = "1.27.0")]
69pub fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 {
70    unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) * _mm_cvtss_f32(b)) }
71}
72
73#[inline]
78#[target_feature(enable = "sse")]
79#[cfg_attr(test, assert_instr(mulps))]
80#[stable(feature = "simd_x86", since = "1.27.0")]
81pub fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 {
82    unsafe { simd_mul(a, b) }
83}
84
85#[inline]
90#[target_feature(enable = "sse")]
91#[cfg_attr(test, assert_instr(divss))]
92#[stable(feature = "simd_x86", since = "1.27.0")]
93pub fn _mm_div_ss(a: __m128, b: __m128) -> __m128 {
94    unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) / _mm_cvtss_f32(b)) }
95}
96
97#[inline]
102#[target_feature(enable = "sse")]
103#[cfg_attr(test, assert_instr(divps))]
104#[stable(feature = "simd_x86", since = "1.27.0")]
105pub fn _mm_div_ps(a: __m128, b: __m128) -> __m128 {
106    unsafe { simd_div(a, b) }
107}
108
109#[inline]
114#[target_feature(enable = "sse")]
115#[cfg_attr(test, assert_instr(sqrtss))]
116#[stable(feature = "simd_x86", since = "1.27.0")]
117pub fn _mm_sqrt_ss(a: __m128) -> __m128 {
118    unsafe { simd_insert!(a, 0, sqrtf32(_mm_cvtss_f32(a))) }
119}
120
121#[inline]
126#[target_feature(enable = "sse")]
127#[cfg_attr(test, assert_instr(sqrtps))]
128#[stable(feature = "simd_x86", since = "1.27.0")]
129pub fn _mm_sqrt_ps(a: __m128) -> __m128 {
130    unsafe { simd_fsqrt(a) }
131}
132
133#[inline]
138#[target_feature(enable = "sse")]
139#[cfg_attr(test, assert_instr(rcpss))]
140#[stable(feature = "simd_x86", since = "1.27.0")]
141pub fn _mm_rcp_ss(a: __m128) -> __m128 {
142    unsafe { rcpss(a) }
143}
144
145#[inline]
150#[target_feature(enable = "sse")]
151#[cfg_attr(test, assert_instr(rcpps))]
152#[stable(feature = "simd_x86", since = "1.27.0")]
153pub fn _mm_rcp_ps(a: __m128) -> __m128 {
154    unsafe { rcpps(a) }
155}
156
157#[inline]
162#[target_feature(enable = "sse")]
163#[cfg_attr(test, assert_instr(rsqrtss))]
164#[stable(feature = "simd_x86", since = "1.27.0")]
165pub fn _mm_rsqrt_ss(a: __m128) -> __m128 {
166    unsafe { rsqrtss(a) }
167}
168
169#[inline]
174#[target_feature(enable = "sse")]
175#[cfg_attr(test, assert_instr(rsqrtps))]
176#[stable(feature = "simd_x86", since = "1.27.0")]
177pub fn _mm_rsqrt_ps(a: __m128) -> __m128 {
178    unsafe { rsqrtps(a) }
179}
180
181#[inline]
187#[target_feature(enable = "sse")]
188#[cfg_attr(test, assert_instr(minss))]
189#[stable(feature = "simd_x86", since = "1.27.0")]
190pub fn _mm_min_ss(a: __m128, b: __m128) -> __m128 {
191    unsafe { minss(a, b) }
192}
193
194#[inline]
199#[target_feature(enable = "sse")]
200#[cfg_attr(test, assert_instr(minps))]
201#[stable(feature = "simd_x86", since = "1.27.0")]
202pub fn _mm_min_ps(a: __m128, b: __m128) -> __m128 {
203    unsafe { minps(a, b) }
205}
206
207#[inline]
213#[target_feature(enable = "sse")]
214#[cfg_attr(test, assert_instr(maxss))]
215#[stable(feature = "simd_x86", since = "1.27.0")]
216pub fn _mm_max_ss(a: __m128, b: __m128) -> __m128 {
217    unsafe { maxss(a, b) }
218}
219
220#[inline]
225#[target_feature(enable = "sse")]
226#[cfg_attr(test, assert_instr(maxps))]
227#[stable(feature = "simd_x86", since = "1.27.0")]
228pub fn _mm_max_ps(a: __m128, b: __m128) -> __m128 {
229    unsafe { maxps(a, b) }
231}
232
233#[inline]
237#[target_feature(enable = "sse")]
238#[cfg_attr(
240    all(test, any(target_arch = "x86_64", target_feature = "sse2")),
241    assert_instr(andps)
242)]
243#[stable(feature = "simd_x86", since = "1.27.0")]
244pub fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
245    unsafe {
246        let a: __m128i = mem::transmute(a);
247        let b: __m128i = mem::transmute(b);
248        mem::transmute(simd_and(a, b))
249    }
250}
251
252#[inline]
259#[target_feature(enable = "sse")]
260#[cfg_attr(
263    all(test, any(target_arch = "x86_64", target_feature = "sse2")),
264    assert_instr(andnps)
265)]
266#[stable(feature = "simd_x86", since = "1.27.0")]
267pub fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
268    unsafe {
269        let a: __m128i = mem::transmute(a);
270        let b: __m128i = mem::transmute(b);
271        let mask: __m128i = mem::transmute(i32x4::splat(-1));
272        mem::transmute(simd_and(simd_xor(mask, a), b))
273    }
274}
275
276#[inline]
280#[target_feature(enable = "sse")]
281#[cfg_attr(
283    all(test, any(target_arch = "x86_64", target_feature = "sse2")),
284    assert_instr(orps)
285)]
286#[stable(feature = "simd_x86", since = "1.27.0")]
287pub fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
288    unsafe {
289        let a: __m128i = mem::transmute(a);
290        let b: __m128i = mem::transmute(b);
291        mem::transmute(simd_or(a, b))
292    }
293}
294
295#[inline]
300#[target_feature(enable = "sse")]
301#[cfg_attr(
303    all(test, any(target_arch = "x86_64", target_feature = "sse2")),
304    assert_instr(xorps)
305)]
306#[stable(feature = "simd_x86", since = "1.27.0")]
307pub fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 {
308    unsafe {
309        let a: __m128i = mem::transmute(a);
310        let b: __m128i = mem::transmute(b);
311        mem::transmute(simd_xor(a, b))
312    }
313}
314
315#[inline]
321#[target_feature(enable = "sse")]
322#[cfg_attr(test, assert_instr(cmpeqss))]
323#[stable(feature = "simd_x86", since = "1.27.0")]
324pub fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 {
325    unsafe { cmpss(a, b, 0) }
326}
327
328#[inline]
335#[target_feature(enable = "sse")]
336#[cfg_attr(test, assert_instr(cmpltss))]
337#[stable(feature = "simd_x86", since = "1.27.0")]
338pub fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 {
339    unsafe { cmpss(a, b, 1) }
340}
341
342#[inline]
349#[target_feature(enable = "sse")]
350#[cfg_attr(test, assert_instr(cmpless))]
351#[stable(feature = "simd_x86", since = "1.27.0")]
352pub fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 {
353    unsafe { cmpss(a, b, 2) }
354}
355
356#[inline]
363#[target_feature(enable = "sse")]
364#[cfg_attr(test, assert_instr(cmpltss))]
365#[stable(feature = "simd_x86", since = "1.27.0")]
366pub fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 {
367    unsafe { simd_shuffle!(a, cmpss(b, a, 1), [4, 1, 2, 3]) }
368}
369
370#[inline]
377#[target_feature(enable = "sse")]
378#[cfg_attr(test, assert_instr(cmpless))]
379#[stable(feature = "simd_x86", since = "1.27.0")]
380pub fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 {
381    unsafe { simd_shuffle!(a, cmpss(b, a, 2), [4, 1, 2, 3]) }
382}
383
384#[inline]
391#[target_feature(enable = "sse")]
392#[cfg_attr(test, assert_instr(cmpneqss))]
393#[stable(feature = "simd_x86", since = "1.27.0")]
394pub fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 {
395    unsafe { cmpss(a, b, 4) }
396}
397
398#[inline]
405#[target_feature(enable = "sse")]
406#[cfg_attr(test, assert_instr(cmpnltss))]
407#[stable(feature = "simd_x86", since = "1.27.0")]
408pub fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 {
409    unsafe { cmpss(a, b, 5) }
410}
411
412#[inline]
419#[target_feature(enable = "sse")]
420#[cfg_attr(test, assert_instr(cmpnless))]
421#[stable(feature = "simd_x86", since = "1.27.0")]
422pub fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 {
423    unsafe { cmpss(a, b, 6) }
424}
425
426#[inline]
433#[target_feature(enable = "sse")]
434#[cfg_attr(test, assert_instr(cmpnltss))]
435#[stable(feature = "simd_x86", since = "1.27.0")]
436pub fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 {
437    unsafe { simd_shuffle!(a, cmpss(b, a, 5), [4, 1, 2, 3]) }
438}
439
440#[inline]
447#[target_feature(enable = "sse")]
448#[cfg_attr(test, assert_instr(cmpnless))]
449#[stable(feature = "simd_x86", since = "1.27.0")]
450pub fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 {
451    unsafe { simd_shuffle!(a, cmpss(b, a, 6), [4, 1, 2, 3]) }
452}
453
454#[inline]
461#[target_feature(enable = "sse")]
462#[cfg_attr(test, assert_instr(cmpordss))]
463#[stable(feature = "simd_x86", since = "1.27.0")]
464pub fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 {
465    unsafe { cmpss(a, b, 7) }
466}
467
468#[inline]
475#[target_feature(enable = "sse")]
476#[cfg_attr(test, assert_instr(cmpunordss))]
477#[stable(feature = "simd_x86", since = "1.27.0")]
478pub fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 {
479    unsafe { cmpss(a, b, 3) }
480}
481
482#[inline]
488#[target_feature(enable = "sse")]
489#[cfg_attr(test, assert_instr(cmpeqps))]
490#[stable(feature = "simd_x86", since = "1.27.0")]
491pub fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 {
492    unsafe { cmpps(a, b, 0) }
493}
494
495#[inline]
501#[target_feature(enable = "sse")]
502#[cfg_attr(test, assert_instr(cmpltps))]
503#[stable(feature = "simd_x86", since = "1.27.0")]
504pub fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 {
505    unsafe { cmpps(a, b, 1) }
506}
507
508#[inline]
515#[target_feature(enable = "sse")]
516#[cfg_attr(test, assert_instr(cmpleps))]
517#[stable(feature = "simd_x86", since = "1.27.0")]
518pub fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 {
519    unsafe { cmpps(a, b, 2) }
520}
521
522#[inline]
528#[target_feature(enable = "sse")]
529#[cfg_attr(test, assert_instr(cmpltps))]
530#[stable(feature = "simd_x86", since = "1.27.0")]
531pub fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 {
532    unsafe { cmpps(b, a, 1) }
533}
534
535#[inline]
542#[target_feature(enable = "sse")]
543#[cfg_attr(test, assert_instr(cmpleps))]
544#[stable(feature = "simd_x86", since = "1.27.0")]
545pub fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 {
546    unsafe { cmpps(b, a, 2) }
547}
548
549#[inline]
555#[target_feature(enable = "sse")]
556#[cfg_attr(test, assert_instr(cmpneqps))]
557#[stable(feature = "simd_x86", since = "1.27.0")]
558pub fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 {
559    unsafe { cmpps(a, b, 4) }
560}
561
562#[inline]
569#[target_feature(enable = "sse")]
570#[cfg_attr(test, assert_instr(cmpnltps))]
571#[stable(feature = "simd_x86", since = "1.27.0")]
572pub fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 {
573    unsafe { cmpps(a, b, 5) }
574}
575
576#[inline]
583#[target_feature(enable = "sse")]
584#[cfg_attr(test, assert_instr(cmpnleps))]
585#[stable(feature = "simd_x86", since = "1.27.0")]
586pub fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 {
587    unsafe { cmpps(a, b, 6) }
588}
589
590#[inline]
597#[target_feature(enable = "sse")]
598#[cfg_attr(test, assert_instr(cmpnltps))]
599#[stable(feature = "simd_x86", since = "1.27.0")]
600pub fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 {
601    unsafe { cmpps(b, a, 5) }
602}
603
604#[inline]
611#[target_feature(enable = "sse")]
612#[cfg_attr(test, assert_instr(cmpnleps))]
613#[stable(feature = "simd_x86", since = "1.27.0")]
614pub fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 {
615    unsafe { cmpps(b, a, 6) }
616}
617
618#[inline]
625#[target_feature(enable = "sse")]
626#[cfg_attr(test, assert_instr(cmpordps))]
627#[stable(feature = "simd_x86", since = "1.27.0")]
628pub fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 {
629    unsafe { cmpps(b, a, 7) }
630}
631
632#[inline]
639#[target_feature(enable = "sse")]
640#[cfg_attr(test, assert_instr(cmpunordps))]
641#[stable(feature = "simd_x86", since = "1.27.0")]
642pub fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 {
643    unsafe { cmpps(b, a, 3) }
644}
645
646#[inline]
651#[target_feature(enable = "sse")]
652#[cfg_attr(test, assert_instr(comiss))]
653#[stable(feature = "simd_x86", since = "1.27.0")]
654pub fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 {
655    unsafe { comieq_ss(a, b) }
656}
657
658#[inline]
663#[target_feature(enable = "sse")]
664#[cfg_attr(test, assert_instr(comiss))]
665#[stable(feature = "simd_x86", since = "1.27.0")]
666pub fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 {
667    unsafe { comilt_ss(a, b) }
668}
669
670#[inline]
676#[target_feature(enable = "sse")]
677#[cfg_attr(test, assert_instr(comiss))]
678#[stable(feature = "simd_x86", since = "1.27.0")]
679pub fn _mm_comile_ss(a: __m128, b: __m128) -> i32 {
680    unsafe { comile_ss(a, b) }
681}
682
683#[inline]
689#[target_feature(enable = "sse")]
690#[cfg_attr(test, assert_instr(comiss))]
691#[stable(feature = "simd_x86", since = "1.27.0")]
692pub fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 {
693    unsafe { comigt_ss(a, b) }
694}
695
696#[inline]
702#[target_feature(enable = "sse")]
703#[cfg_attr(test, assert_instr(comiss))]
704#[stable(feature = "simd_x86", since = "1.27.0")]
705pub fn _mm_comige_ss(a: __m128, b: __m128) -> i32 {
706    unsafe { comige_ss(a, b) }
707}
708
709#[inline]
714#[target_feature(enable = "sse")]
715#[cfg_attr(test, assert_instr(comiss))]
716#[stable(feature = "simd_x86", since = "1.27.0")]
717pub fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 {
718    unsafe { comineq_ss(a, b) }
719}
720
721#[inline]
727#[target_feature(enable = "sse")]
728#[cfg_attr(test, assert_instr(ucomiss))]
729#[stable(feature = "simd_x86", since = "1.27.0")]
730pub fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 {
731    unsafe { ucomieq_ss(a, b) }
732}
733
734#[inline]
741#[target_feature(enable = "sse")]
742#[cfg_attr(test, assert_instr(ucomiss))]
743#[stable(feature = "simd_x86", since = "1.27.0")]
744pub fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 {
745    unsafe { ucomilt_ss(a, b) }
746}
747
748#[inline]
755#[target_feature(enable = "sse")]
756#[cfg_attr(test, assert_instr(ucomiss))]
757#[stable(feature = "simd_x86", since = "1.27.0")]
758pub fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 {
759    unsafe { ucomile_ss(a, b) }
760}
761
762#[inline]
769#[target_feature(enable = "sse")]
770#[cfg_attr(test, assert_instr(ucomiss))]
771#[stable(feature = "simd_x86", since = "1.27.0")]
772pub fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 {
773    unsafe { ucomigt_ss(a, b) }
774}
775
776#[inline]
783#[target_feature(enable = "sse")]
784#[cfg_attr(test, assert_instr(ucomiss))]
785#[stable(feature = "simd_x86", since = "1.27.0")]
786pub fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 {
787    unsafe { ucomige_ss(a, b) }
788}
789
790#[inline]
796#[target_feature(enable = "sse")]
797#[cfg_attr(test, assert_instr(ucomiss))]
798#[stable(feature = "simd_x86", since = "1.27.0")]
799pub fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 {
800    unsafe { ucomineq_ss(a, b) }
801}
802
803#[inline]
813#[target_feature(enable = "sse")]
814#[cfg_attr(test, assert_instr(cvtss2si))]
815#[stable(feature = "simd_x86", since = "1.27.0")]
816pub fn _mm_cvtss_si32(a: __m128) -> i32 {
817    unsafe { cvtss2si(a) }
818}
819
820#[inline]
824#[target_feature(enable = "sse")]
825#[cfg_attr(test, assert_instr(cvtss2si))]
826#[stable(feature = "simd_x86", since = "1.27.0")]
827pub fn _mm_cvt_ss2si(a: __m128) -> i32 {
828    _mm_cvtss_si32(a)
829}
830
831#[inline]
843#[target_feature(enable = "sse")]
844#[cfg_attr(test, assert_instr(cvttss2si))]
845#[stable(feature = "simd_x86", since = "1.27.0")]
846pub fn _mm_cvttss_si32(a: __m128) -> i32 {
847    unsafe { cvttss2si(a) }
848}
849
850#[inline]
854#[target_feature(enable = "sse")]
855#[cfg_attr(test, assert_instr(cvttss2si))]
856#[stable(feature = "simd_x86", since = "1.27.0")]
857pub fn _mm_cvtt_ss2si(a: __m128) -> i32 {
858    _mm_cvttss_si32(a)
859}
860
861#[inline]
865#[target_feature(enable = "sse")]
866#[stable(feature = "simd_x86", since = "1.27.0")]
869pub fn _mm_cvtss_f32(a: __m128) -> f32 {
870    unsafe { simd_extract!(a, 0) }
871}
872
873#[inline]
881#[target_feature(enable = "sse")]
882#[cfg_attr(test, assert_instr(cvtsi2ss))]
883#[stable(feature = "simd_x86", since = "1.27.0")]
884pub fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 {
885    unsafe { cvtsi2ss(a, b) }
886}
887
888#[inline]
892#[target_feature(enable = "sse")]
893#[cfg_attr(test, assert_instr(cvtsi2ss))]
894#[stable(feature = "simd_x86", since = "1.27.0")]
895pub fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 {
896    _mm_cvtsi32_ss(a, b)
897}
898
899#[inline]
904#[target_feature(enable = "sse")]
905#[cfg_attr(test, assert_instr(movss))]
906#[stable(feature = "simd_x86", since = "1.27.0")]
907pub fn _mm_set_ss(a: f32) -> __m128 {
908    __m128([a, 0.0, 0.0, 0.0])
909}
910
911#[inline]
915#[target_feature(enable = "sse")]
916#[cfg_attr(test, assert_instr(shufps))]
917#[stable(feature = "simd_x86", since = "1.27.0")]
918pub fn _mm_set1_ps(a: f32) -> __m128 {
919    __m128([a, a, a, a])
920}
921
922#[inline]
926#[target_feature(enable = "sse")]
927#[cfg_attr(test, assert_instr(shufps))]
928#[stable(feature = "simd_x86", since = "1.27.0")]
929pub fn _mm_set_ps1(a: f32) -> __m128 {
930    _mm_set1_ps(a)
931}
932
933#[inline]
953#[target_feature(enable = "sse")]
954#[cfg_attr(test, assert_instr(unpcklps))]
955#[stable(feature = "simd_x86", since = "1.27.0")]
956pub fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
957    __m128([d, c, b, a])
958}
959
960#[inline]
971#[target_feature(enable = "sse")]
972#[cfg_attr(
973    all(test, any(target_env = "msvc", target_arch = "x86_64")),
974    assert_instr(unpcklps)
975)]
976#[cfg_attr(
978    all(test, all(not(target_env = "msvc"), target_arch = "x86")),
979    assert_instr(movaps)
980)]
981#[stable(feature = "simd_x86", since = "1.27.0")]
982pub fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
983    __m128([a, b, c, d])
984}
985
986#[inline]
990#[target_feature(enable = "sse")]
991#[cfg_attr(test, assert_instr(xorps))]
992#[stable(feature = "simd_x86", since = "1.27.0")]
993pub fn _mm_setzero_ps() -> __m128 {
994    const { unsafe { mem::zeroed() } }
995}
996
997#[inline]
1000#[allow(non_snake_case)]
1001#[unstable(feature = "stdarch_x86_mm_shuffle", issue = "111147")]
1002pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
1003    ((z << 6) | (y << 4) | (x << 2) | w) as i32
1004}
1005
1006#[inline]
1020#[target_feature(enable = "sse")]
1021#[cfg_attr(test, assert_instr(shufps, MASK = 3))]
1022#[rustc_legacy_const_generics(2)]
1023#[stable(feature = "simd_x86", since = "1.27.0")]
1024pub fn _mm_shuffle_ps<const MASK: i32>(a: __m128, b: __m128) -> __m128 {
1025    static_assert_uimm_bits!(MASK, 8);
1026    unsafe {
1027        simd_shuffle!(
1028            a,
1029            b,
1030            [
1031                MASK as u32 & 0b11,
1032                (MASK as u32 >> 2) & 0b11,
1033                ((MASK as u32 >> 4) & 0b11) + 4,
1034                ((MASK as u32 >> 6) & 0b11) + 4,
1035            ],
1036        )
1037    }
1038}
1039
1040#[inline]
1045#[target_feature(enable = "sse")]
1046#[cfg_attr(test, assert_instr(unpckhps))]
1047#[stable(feature = "simd_x86", since = "1.27.0")]
1048pub fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 {
1049    unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
1050}
1051
1052#[inline]
1057#[target_feature(enable = "sse")]
1058#[cfg_attr(test, assert_instr(unpcklps))]
1059#[stable(feature = "simd_x86", since = "1.27.0")]
1060pub fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
1061    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
1062}
1063
1064#[inline]
1069#[target_feature(enable = "sse")]
1070#[cfg_attr(test, assert_instr(movhlps))]
1071#[stable(feature = "simd_x86", since = "1.27.0")]
1072pub fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
1073    unsafe { simd_shuffle!(a, b, [6, 7, 2, 3]) }
1075}
1076
1077#[inline]
1082#[target_feature(enable = "sse")]
1083#[cfg_attr(test, assert_instr(movlhps))]
1084#[stable(feature = "simd_x86", since = "1.27.0")]
1085pub fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
1086    unsafe { simd_shuffle!(a, b, [0, 1, 4, 5]) }
1087}
1088
1089#[inline]
1096#[target_feature(enable = "sse")]
1097#[cfg_attr(test, assert_instr(movmskps))]
1098#[stable(feature = "simd_x86", since = "1.27.0")]
1099pub fn _mm_movemask_ps(a: __m128) -> i32 {
1100    unsafe {
1103        let mask: i32x4 = simd_lt(transmute(a), i32x4::ZERO);
1104        simd_bitmask::<i32x4, u8>(mask).into()
1105    }
1106}
1107
1108#[inline]
1115#[target_feature(enable = "sse")]
1116#[cfg_attr(test, assert_instr(movss))]
1117#[stable(feature = "simd_x86", since = "1.27.0")]
1118pub unsafe fn _mm_load_ss(p: *const f32) -> __m128 {
1119    __m128([*p, 0.0, 0.0, 0.0])
1120}
1121
1122#[inline]
1130#[target_feature(enable = "sse")]
1131#[cfg_attr(test, assert_instr(movss))]
1132#[stable(feature = "simd_x86", since = "1.27.0")]
1133pub unsafe fn _mm_load1_ps(p: *const f32) -> __m128 {
1134    let a = *p;
1135    __m128([a, a, a, a])
1136}
1137
1138#[inline]
1142#[target_feature(enable = "sse")]
1143#[cfg_attr(test, assert_instr(movss))]
1144#[stable(feature = "simd_x86", since = "1.27.0")]
1145pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 {
1146    _mm_load1_ps(p)
1147}
1148
1149#[inline]
1160#[target_feature(enable = "sse")]
1161#[cfg_attr(
1164    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1165    assert_instr(movaps)
1166)]
1167#[stable(feature = "simd_x86", since = "1.27.0")]
1168#[allow(clippy::cast_ptr_alignment)]
1169pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 {
1170    *(p as *const __m128)
1171}
1172
1173#[inline]
1183#[target_feature(enable = "sse")]
1184#[cfg_attr(test, assert_instr(movups))]
1185#[stable(feature = "simd_x86", since = "1.27.0")]
1186pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
1187    let mut dst = _mm_undefined_ps();
1190    ptr::copy_nonoverlapping(
1191        p as *const u8,
1192        ptr::addr_of_mut!(dst) as *mut u8,
1193        mem::size_of::<__m128>(),
1194    );
1195    dst
1196}
1197
1198#[inline]
1220#[target_feature(enable = "sse")]
1221#[cfg_attr(
1222    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1223    assert_instr(movaps)
1224)]
1225#[stable(feature = "simd_x86", since = "1.27.0")]
1226pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
1227    let a = _mm_load_ps(p);
1228    simd_shuffle!(a, a, [3, 2, 1, 0])
1229}
1230
1231#[inline]
1237#[target_feature(enable = "sse")]
1238#[cfg_attr(test, assert_instr(movss))]
1239#[stable(feature = "simd_x86", since = "1.27.0")]
1240pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
1241    *p = simd_extract!(a, 0);
1242}
1243
1244#[inline]
1263#[target_feature(enable = "sse")]
1264#[cfg_attr(
1265    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1266    assert_instr(movaps)
1267)]
1268#[stable(feature = "simd_x86", since = "1.27.0")]
1269#[allow(clippy::cast_ptr_alignment)]
1270pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) {
1271    let b: __m128 = simd_shuffle!(a, a, [0, 0, 0, 0]);
1272    *(p as *mut __m128) = b;
1273}
1274
1275#[inline]
1279#[target_feature(enable = "sse")]
1280#[cfg_attr(
1281    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1282    assert_instr(movaps)
1283)]
1284#[stable(feature = "simd_x86", since = "1.27.0")]
1285pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) {
1286    _mm_store1_ps(p, a);
1287}
1288
1289#[inline]
1301#[target_feature(enable = "sse")]
1302#[cfg_attr(
1303    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1304    assert_instr(movaps)
1305)]
1306#[stable(feature = "simd_x86", since = "1.27.0")]
1307#[allow(clippy::cast_ptr_alignment)]
1308pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) {
1309    *(p as *mut __m128) = a;
1310}
1311
1312#[inline]
1320#[target_feature(enable = "sse")]
1321#[cfg_attr(test, assert_instr(movups))]
1322#[stable(feature = "simd_x86", since = "1.27.0")]
1323pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
1324    ptr::copy_nonoverlapping(
1325        ptr::addr_of!(a) as *const u8,
1326        p as *mut u8,
1327        mem::size_of::<__m128>(),
1328    );
1329}
1330
1331#[inline]
1348#[target_feature(enable = "sse")]
1349#[cfg_attr(
1350    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1351    assert_instr(movaps)
1352)]
1353#[stable(feature = "simd_x86", since = "1.27.0")]
1354#[allow(clippy::cast_ptr_alignment)]
1355pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
1356    let b: __m128 = simd_shuffle!(a, a, [3, 2, 1, 0]);
1357    *(p as *mut __m128) = b;
1358}
1359
1360#[inline]
1370#[target_feature(enable = "sse")]
1371#[cfg_attr(test, assert_instr(movss))]
1372#[stable(feature = "simd_x86", since = "1.27.0")]
1373pub fn _mm_move_ss(a: __m128, b: __m128) -> __m128 {
1374    unsafe { simd_shuffle!(a, b, [4, 1, 2, 3]) }
1375}
1376
1377#[inline]
1445#[target_feature(enable = "sse")]
1446#[cfg_attr(test, assert_instr(sfence))]
1447#[stable(feature = "simd_x86", since = "1.27.0")]
1448pub unsafe fn _mm_sfence() {
1449    sfence()
1450}
1451
1452#[inline]
1467#[target_feature(enable = "sse")]
1468#[cfg_attr(test, assert_instr(stmxcsr))]
1469#[stable(feature = "simd_x86", since = "1.27.0")]
1470#[deprecated(
1471    since = "1.75.0",
1472    note = "see `_mm_getcsr` documentation - use inline assembly instead"
1473)]
1474pub unsafe fn _mm_getcsr() -> u32 {
1475    unsafe {
1476        let mut result = 0_i32;
1477        stmxcsr(ptr::addr_of_mut!(result) as *mut i8);
1478        result as u32
1479    }
1480}
1481
1482#[inline]
1616#[target_feature(enable = "sse")]
1617#[cfg_attr(test, assert_instr(ldmxcsr))]
1618#[stable(feature = "simd_x86", since = "1.27.0")]
1619#[deprecated(
1620    since = "1.75.0",
1621    note = "see `_mm_setcsr` documentation - use inline assembly instead"
1622)]
1623pub unsafe fn _mm_setcsr(val: u32) {
1624    ldmxcsr(ptr::addr_of!(val) as *const i8);
1625}
1626
1627#[stable(feature = "simd_x86", since = "1.27.0")]
1629pub const _MM_EXCEPT_INVALID: u32 = 0x0001;
1630#[stable(feature = "simd_x86", since = "1.27.0")]
1632pub const _MM_EXCEPT_DENORM: u32 = 0x0002;
1633#[stable(feature = "simd_x86", since = "1.27.0")]
1635pub const _MM_EXCEPT_DIV_ZERO: u32 = 0x0004;
1636#[stable(feature = "simd_x86", since = "1.27.0")]
1638pub const _MM_EXCEPT_OVERFLOW: u32 = 0x0008;
1639#[stable(feature = "simd_x86", since = "1.27.0")]
1641pub const _MM_EXCEPT_UNDERFLOW: u32 = 0x0010;
1642#[stable(feature = "simd_x86", since = "1.27.0")]
1644pub const _MM_EXCEPT_INEXACT: u32 = 0x0020;
1645#[stable(feature = "simd_x86", since = "1.27.0")]
1647pub const _MM_EXCEPT_MASK: u32 = 0x003f;
1648
1649#[stable(feature = "simd_x86", since = "1.27.0")]
1651pub const _MM_MASK_INVALID: u32 = 0x0080;
1652#[stable(feature = "simd_x86", since = "1.27.0")]
1654pub const _MM_MASK_DENORM: u32 = 0x0100;
1655#[stable(feature = "simd_x86", since = "1.27.0")]
1657pub const _MM_MASK_DIV_ZERO: u32 = 0x0200;
1658#[stable(feature = "simd_x86", since = "1.27.0")]
1660pub const _MM_MASK_OVERFLOW: u32 = 0x0400;
1661#[stable(feature = "simd_x86", since = "1.27.0")]
1663pub const _MM_MASK_UNDERFLOW: u32 = 0x0800;
1664#[stable(feature = "simd_x86", since = "1.27.0")]
1666pub const _MM_MASK_INEXACT: u32 = 0x1000;
1667#[stable(feature = "simd_x86", since = "1.27.0")]
1669pub const _MM_MASK_MASK: u32 = 0x1f80;
1670
1671#[stable(feature = "simd_x86", since = "1.27.0")]
1673pub const _MM_ROUND_NEAREST: u32 = 0x0000;
1674#[stable(feature = "simd_x86", since = "1.27.0")]
1676pub const _MM_ROUND_DOWN: u32 = 0x2000;
1677#[stable(feature = "simd_x86", since = "1.27.0")]
1679pub const _MM_ROUND_UP: u32 = 0x4000;
1680#[stable(feature = "simd_x86", since = "1.27.0")]
1682pub const _MM_ROUND_TOWARD_ZERO: u32 = 0x6000;
1683
1684#[stable(feature = "simd_x86", since = "1.27.0")]
1686pub const _MM_ROUND_MASK: u32 = 0x6000;
1687
1688#[stable(feature = "simd_x86", since = "1.27.0")]
1690pub const _MM_FLUSH_ZERO_MASK: u32 = 0x8000;
1691#[stable(feature = "simd_x86", since = "1.27.0")]
1693pub const _MM_FLUSH_ZERO_ON: u32 = 0x8000;
1694#[stable(feature = "simd_x86", since = "1.27.0")]
1696pub const _MM_FLUSH_ZERO_OFF: u32 = 0x0000;
1697
1698#[inline]
1702#[allow(deprecated)] #[allow(non_snake_case)]
1704#[target_feature(enable = "sse")]
1705#[stable(feature = "simd_x86", since = "1.27.0")]
1706#[deprecated(
1707    since = "1.75.0",
1708    note = "see `_mm_getcsr` documentation - use inline assembly instead"
1709)]
1710pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 {
1711    _mm_getcsr() & _MM_MASK_MASK
1712}
1713
1714#[inline]
1718#[allow(deprecated)] #[allow(non_snake_case)]
1720#[target_feature(enable = "sse")]
1721#[stable(feature = "simd_x86", since = "1.27.0")]
1722#[deprecated(
1723    since = "1.75.0",
1724    note = "see `_mm_getcsr` documentation - use inline assembly instead"
1725)]
1726pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 {
1727    _mm_getcsr() & _MM_EXCEPT_MASK
1728}
1729
1730#[inline]
1734#[allow(deprecated)] #[allow(non_snake_case)]
1736#[target_feature(enable = "sse")]
1737#[stable(feature = "simd_x86", since = "1.27.0")]
1738#[deprecated(
1739    since = "1.75.0",
1740    note = "see `_mm_getcsr` documentation - use inline assembly instead"
1741)]
1742pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 {
1743    _mm_getcsr() & _MM_FLUSH_ZERO_MASK
1744}
1745
1746#[inline]
1750#[allow(deprecated)] #[allow(non_snake_case)]
1752#[target_feature(enable = "sse")]
1753#[stable(feature = "simd_x86", since = "1.27.0")]
1754#[deprecated(
1755    since = "1.75.0",
1756    note = "see `_mm_getcsr` documentation - use inline assembly instead"
1757)]
1758pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 {
1759    _mm_getcsr() & _MM_ROUND_MASK
1760}
1761
1762#[inline]
1766#[allow(deprecated)] #[allow(non_snake_case)]
1768#[target_feature(enable = "sse")]
1769#[stable(feature = "simd_x86", since = "1.27.0")]
1770#[deprecated(
1771    since = "1.75.0",
1772    note = "see `_mm_setcsr` documentation - use inline assembly instead"
1773)]
1774pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) {
1775    _mm_setcsr((_mm_getcsr() & !_MM_MASK_MASK) | (x & _MM_MASK_MASK))
1776}
1777
1778#[inline]
1782#[allow(deprecated)] #[allow(non_snake_case)]
1784#[target_feature(enable = "sse")]
1785#[stable(feature = "simd_x86", since = "1.27.0")]
1786#[deprecated(
1787    since = "1.75.0",
1788    note = "see `_mm_setcsr` documentation - use inline assembly instead"
1789)]
1790pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) {
1791    _mm_setcsr((_mm_getcsr() & !_MM_EXCEPT_MASK) | (x & _MM_EXCEPT_MASK))
1792}
1793
1794#[inline]
1798#[allow(deprecated)] #[allow(non_snake_case)]
1800#[target_feature(enable = "sse")]
1801#[stable(feature = "simd_x86", since = "1.27.0")]
1802#[deprecated(
1803    since = "1.75.0",
1804    note = "see `_mm_setcsr` documentation - use inline assembly instead"
1805)]
1806pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) {
1807    _mm_setcsr((_mm_getcsr() & !_MM_FLUSH_ZERO_MASK) | (x & _MM_FLUSH_ZERO_MASK))
1808}
1809
1810#[inline]
1814#[allow(deprecated)] #[allow(non_snake_case)]
1816#[target_feature(enable = "sse")]
1817#[stable(feature = "simd_x86", since = "1.27.0")]
1818#[deprecated(
1819    since = "1.75.0",
1820    note = "see `_mm_setcsr` documentation - use inline assembly instead"
1821)]
1822pub unsafe fn _MM_SET_ROUNDING_MODE(x: u32) {
1823    _mm_setcsr((_mm_getcsr() & !_MM_ROUND_MASK) | (x & _MM_ROUND_MASK))
1824}
1825
1826#[stable(feature = "simd_x86", since = "1.27.0")]
1828pub const _MM_HINT_T0: i32 = 3;
1829
1830#[stable(feature = "simd_x86", since = "1.27.0")]
1832pub const _MM_HINT_T1: i32 = 2;
1833
1834#[stable(feature = "simd_x86", since = "1.27.0")]
1836pub const _MM_HINT_T2: i32 = 1;
1837
1838#[stable(feature = "simd_x86", since = "1.27.0")]
1840pub const _MM_HINT_NTA: i32 = 0;
1841
1842#[stable(feature = "simd_x86", since = "1.27.0")]
1844pub const _MM_HINT_ET0: i32 = 7;
1845
1846#[stable(feature = "simd_x86", since = "1.27.0")]
1848pub const _MM_HINT_ET1: i32 = 6;
1849
1850#[inline]
1893#[target_feature(enable = "sse")]
1894#[cfg_attr(test, assert_instr(prefetcht0, STRATEGY = _MM_HINT_T0))]
1895#[cfg_attr(test, assert_instr(prefetcht1, STRATEGY = _MM_HINT_T1))]
1896#[cfg_attr(test, assert_instr(prefetcht2, STRATEGY = _MM_HINT_T2))]
1897#[cfg_attr(test, assert_instr(prefetchnta, STRATEGY = _MM_HINT_NTA))]
1898#[rustc_legacy_const_generics(1)]
1899#[stable(feature = "simd_x86", since = "1.27.0")]
1900pub unsafe fn _mm_prefetch<const STRATEGY: i32>(p: *const i8) {
1901    static_assert_uimm_bits!(STRATEGY, 3);
1902    prefetch(p, (STRATEGY >> 2) & 1, STRATEGY & 3, 1);
1905}
1906
1907#[inline]
1914#[target_feature(enable = "sse")]
1915#[stable(feature = "simd_x86", since = "1.27.0")]
1916pub fn _mm_undefined_ps() -> __m128 {
1917    const { unsafe { mem::zeroed() } }
1918}
1919
1920#[inline]
1924#[allow(non_snake_case)]
1925#[target_feature(enable = "sse")]
1926#[stable(feature = "simd_x86", since = "1.27.0")]
1927pub fn _MM_TRANSPOSE4_PS(
1928    row0: &mut __m128,
1929    row1: &mut __m128,
1930    row2: &mut __m128,
1931    row3: &mut __m128,
1932) {
1933    let tmp0 = _mm_unpacklo_ps(*row0, *row1);
1934    let tmp2 = _mm_unpacklo_ps(*row2, *row3);
1935    let tmp1 = _mm_unpackhi_ps(*row0, *row1);
1936    let tmp3 = _mm_unpackhi_ps(*row2, *row3);
1937
1938    *row0 = _mm_movelh_ps(tmp0, tmp2);
1939    *row1 = _mm_movehl_ps(tmp2, tmp0);
1940    *row2 = _mm_movelh_ps(tmp1, tmp3);
1941    *row3 = _mm_movehl_ps(tmp3, tmp1);
1942}
1943
1944#[allow(improper_ctypes)]
1945unsafe extern "C" {
1946    #[link_name = "llvm.x86.sse.rcp.ss"]
1947    fn rcpss(a: __m128) -> __m128;
1948    #[link_name = "llvm.x86.sse.rcp.ps"]
1949    fn rcpps(a: __m128) -> __m128;
1950    #[link_name = "llvm.x86.sse.rsqrt.ss"]
1951    fn rsqrtss(a: __m128) -> __m128;
1952    #[link_name = "llvm.x86.sse.rsqrt.ps"]
1953    fn rsqrtps(a: __m128) -> __m128;
1954    #[link_name = "llvm.x86.sse.min.ss"]
1955    fn minss(a: __m128, b: __m128) -> __m128;
1956    #[link_name = "llvm.x86.sse.min.ps"]
1957    fn minps(a: __m128, b: __m128) -> __m128;
1958    #[link_name = "llvm.x86.sse.max.ss"]
1959    fn maxss(a: __m128, b: __m128) -> __m128;
1960    #[link_name = "llvm.x86.sse.max.ps"]
1961    fn maxps(a: __m128, b: __m128) -> __m128;
1962    #[link_name = "llvm.x86.sse.cmp.ps"]
1963    fn cmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
1964    #[link_name = "llvm.x86.sse.comieq.ss"]
1965    fn comieq_ss(a: __m128, b: __m128) -> i32;
1966    #[link_name = "llvm.x86.sse.comilt.ss"]
1967    fn comilt_ss(a: __m128, b: __m128) -> i32;
1968    #[link_name = "llvm.x86.sse.comile.ss"]
1969    fn comile_ss(a: __m128, b: __m128) -> i32;
1970    #[link_name = "llvm.x86.sse.comigt.ss"]
1971    fn comigt_ss(a: __m128, b: __m128) -> i32;
1972    #[link_name = "llvm.x86.sse.comige.ss"]
1973    fn comige_ss(a: __m128, b: __m128) -> i32;
1974    #[link_name = "llvm.x86.sse.comineq.ss"]
1975    fn comineq_ss(a: __m128, b: __m128) -> i32;
1976    #[link_name = "llvm.x86.sse.ucomieq.ss"]
1977    fn ucomieq_ss(a: __m128, b: __m128) -> i32;
1978    #[link_name = "llvm.x86.sse.ucomilt.ss"]
1979    fn ucomilt_ss(a: __m128, b: __m128) -> i32;
1980    #[link_name = "llvm.x86.sse.ucomile.ss"]
1981    fn ucomile_ss(a: __m128, b: __m128) -> i32;
1982    #[link_name = "llvm.x86.sse.ucomigt.ss"]
1983    fn ucomigt_ss(a: __m128, b: __m128) -> i32;
1984    #[link_name = "llvm.x86.sse.ucomige.ss"]
1985    fn ucomige_ss(a: __m128, b: __m128) -> i32;
1986    #[link_name = "llvm.x86.sse.ucomineq.ss"]
1987    fn ucomineq_ss(a: __m128, b: __m128) -> i32;
1988    #[link_name = "llvm.x86.sse.cvtss2si"]
1989    fn cvtss2si(a: __m128) -> i32;
1990    #[link_name = "llvm.x86.sse.cvttss2si"]
1991    fn cvttss2si(a: __m128) -> i32;
1992    #[link_name = "llvm.x86.sse.cvtsi2ss"]
1993    fn cvtsi2ss(a: __m128, b: i32) -> __m128;
1994    #[link_name = "llvm.x86.sse.sfence"]
1995    fn sfence();
1996    #[link_name = "llvm.x86.sse.stmxcsr"]
1997    fn stmxcsr(p: *mut i8);
1998    #[link_name = "llvm.x86.sse.ldmxcsr"]
1999    fn ldmxcsr(p: *const i8);
2000    #[link_name = "llvm.prefetch"]
2001    fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
2002    #[link_name = "llvm.x86.sse.cmp.ss"]
2003    fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
2004}
2005
2006#[inline]
2022#[target_feature(enable = "sse")]
2023#[cfg_attr(test, assert_instr(movntps))]
2024#[stable(feature = "simd_x86", since = "1.27.0")]
2025#[allow(clippy::cast_ptr_alignment)]
2026pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
2027    crate::arch::asm!(
2028        vps!("movntps", ",{a}"),
2029        p = in(reg) mem_addr,
2030        a = in(xmm_reg) a,
2031        options(nostack, preserves_flags),
2032    );
2033}
2034
2035#[cfg(test)]
2036mod tests {
2037    use crate::{hint::black_box, mem::transmute, ptr};
2038    use std::boxed;
2039    use stdarch_test::simd_test;
2040
2041    use crate::core_arch::{simd::*, x86::*};
2042
2043    const NAN: f32 = f32::NAN;
2044
2045    #[simd_test(enable = "sse")]
2046    unsafe fn test_mm_add_ps() {
2047        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2048        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2049        let r = _mm_add_ps(a, b);
2050        assert_eq_m128(r, _mm_setr_ps(-101.0, 25.0, 0.0, -15.0));
2051    }
2052
2053    #[simd_test(enable = "sse")]
2054    unsafe fn test_mm_add_ss() {
2055        let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0);
2056        let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0);
2057        let r = _mm_add_ss(a, b);
2058        assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0));
2059    }
2060
2061    #[simd_test(enable = "sse")]
2062    unsafe fn test_mm_sub_ps() {
2063        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2064        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2065        let r = _mm_sub_ps(a, b);
2066        assert_eq_m128(r, _mm_setr_ps(99.0, -15.0, 0.0, -5.0));
2067    }
2068
2069    #[simd_test(enable = "sse")]
2070    unsafe fn test_mm_sub_ss() {
2071        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2072        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2073        let r = _mm_sub_ss(a, b);
2074        assert_eq_m128(r, _mm_setr_ps(99.0, 5.0, 0.0, -10.0));
2075    }
2076
2077    #[simd_test(enable = "sse")]
2078    unsafe fn test_mm_mul_ps() {
2079        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2080        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2081        let r = _mm_mul_ps(a, b);
2082        assert_eq_m128(r, _mm_setr_ps(100.0, 100.0, 0.0, 50.0));
2083    }
2084
2085    #[simd_test(enable = "sse")]
2086    unsafe fn test_mm_mul_ss() {
2087        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2088        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2089        let r = _mm_mul_ss(a, b);
2090        assert_eq_m128(r, _mm_setr_ps(100.0, 5.0, 0.0, -10.0));
2091    }
2092
2093    #[simd_test(enable = "sse")]
2094    unsafe fn test_mm_div_ps() {
2095        let a = _mm_setr_ps(-1.0, 5.0, 2.0, -10.0);
2096        let b = _mm_setr_ps(-100.0, 20.0, 0.2, -5.0);
2097        let r = _mm_div_ps(a, b);
2098        assert_eq_m128(r, _mm_setr_ps(0.01, 0.25, 10.0, 2.0));
2099    }
2100
2101    #[simd_test(enable = "sse")]
2102    unsafe fn test_mm_div_ss() {
2103        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2104        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2105        let r = _mm_div_ss(a, b);
2106        assert_eq_m128(r, _mm_setr_ps(0.01, 5.0, 0.0, -10.0));
2107    }
2108
2109    #[simd_test(enable = "sse")]
2110    unsafe fn test_mm_sqrt_ss() {
2111        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2112        let r = _mm_sqrt_ss(a);
2113        let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0);
2114        assert_eq_m128(r, e);
2115    }
2116
2117    #[simd_test(enable = "sse")]
2118    unsafe fn test_mm_sqrt_ps() {
2119        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2120        let r = _mm_sqrt_ps(a);
2121        let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0);
2122        assert_eq_m128(r, e);
2123    }
2124
2125    #[simd_test(enable = "sse")]
2126    unsafe fn test_mm_rcp_ss() {
2127        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2128        let r = _mm_rcp_ss(a);
2129        let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0);
2130        let rel_err = 0.00048828125;
2131        assert_approx_eq!(get_m128(r, 0), get_m128(e, 0), 2. * rel_err);
2132        for i in 1..4 {
2133            assert_eq!(get_m128(r, i), get_m128(e, i));
2134        }
2135    }
2136
2137    #[simd_test(enable = "sse")]
2138    unsafe fn test_mm_rcp_ps() {
2139        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2140        let r = _mm_rcp_ps(a);
2141        let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215);
2142        let rel_err = 0.00048828125;
2143        for i in 0..4 {
2144            assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2145        }
2146    }
2147
2148    #[simd_test(enable = "sse")]
2149    unsafe fn test_mm_rsqrt_ss() {
2150        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2151        let r = _mm_rsqrt_ss(a);
2152        let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0);
2153        let rel_err = 0.00048828125;
2154        for i in 0..4 {
2155            assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2156        }
2157    }
2158
2159    #[simd_test(enable = "sse")]
2160    unsafe fn test_mm_rsqrt_ps() {
2161        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2162        let r = _mm_rsqrt_ps(a);
2163        let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845);
2164        let rel_err = 0.00048828125;
2165        for i in 0..4 {
2166            assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2167        }
2168    }
2169
2170    #[simd_test(enable = "sse")]
2171    unsafe fn test_mm_min_ss() {
2172        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2173        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2174        let r = _mm_min_ss(a, b);
2175        assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
2176    }
2177
2178    #[simd_test(enable = "sse")]
2179    unsafe fn test_mm_min_ps() {
2180        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2181        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2182        let r = _mm_min_ps(a, b);
2183        assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
2184
2185        let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
2191        let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
2192        let r1: [u8; 16] = transmute(_mm_min_ps(a, b));
2193        let r2: [u8; 16] = transmute(_mm_min_ps(b, a));
2194        let a: [u8; 16] = transmute(a);
2195        let b: [u8; 16] = transmute(b);
2196        assert_eq!(r1, b);
2197        assert_eq!(r2, a);
2198        assert_ne!(a, b); }
2200
2201    #[simd_test(enable = "sse")]
2202    unsafe fn test_mm_max_ss() {
2203        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2204        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2205        let r = _mm_max_ss(a, b);
2206        assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, -10.0));
2207    }
2208
2209    #[simd_test(enable = "sse")]
2210    unsafe fn test_mm_max_ps() {
2211        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2212        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2213        let r = _mm_max_ps(a, b);
2214        assert_eq_m128(r, _mm_setr_ps(-1.0, 20.0, 0.0, -5.0));
2215
2216        let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
2218        let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
2219        let r1: [u8; 16] = transmute(_mm_max_ps(a, b));
2220        let r2: [u8; 16] = transmute(_mm_max_ps(b, a));
2221        let a: [u8; 16] = transmute(a);
2222        let b: [u8; 16] = transmute(b);
2223        assert_eq!(r1, b);
2224        assert_eq!(r2, a);
2225        assert_ne!(a, b); }
2227
2228    #[simd_test(enable = "sse")]
2229    unsafe fn test_mm_and_ps() {
2230        let a = transmute(u32x4::splat(0b0011));
2231        let b = transmute(u32x4::splat(0b0101));
2232        let r = _mm_and_ps(*black_box(&a), *black_box(&b));
2233        let e = transmute(u32x4::splat(0b0001));
2234        assert_eq_m128(r, e);
2235    }
2236
2237    #[simd_test(enable = "sse")]
2238    unsafe fn test_mm_andnot_ps() {
2239        let a = transmute(u32x4::splat(0b0011));
2240        let b = transmute(u32x4::splat(0b0101));
2241        let r = _mm_andnot_ps(*black_box(&a), *black_box(&b));
2242        let e = transmute(u32x4::splat(0b0100));
2243        assert_eq_m128(r, e);
2244    }
2245
2246    #[simd_test(enable = "sse")]
2247    unsafe fn test_mm_or_ps() {
2248        let a = transmute(u32x4::splat(0b0011));
2249        let b = transmute(u32x4::splat(0b0101));
2250        let r = _mm_or_ps(*black_box(&a), *black_box(&b));
2251        let e = transmute(u32x4::splat(0b0111));
2252        assert_eq_m128(r, e);
2253    }
2254
2255    #[simd_test(enable = "sse")]
2256    unsafe fn test_mm_xor_ps() {
2257        let a = transmute(u32x4::splat(0b0011));
2258        let b = transmute(u32x4::splat(0b0101));
2259        let r = _mm_xor_ps(*black_box(&a), *black_box(&b));
2260        let e = transmute(u32x4::splat(0b0110));
2261        assert_eq_m128(r, e);
2262    }
2263
2264    #[simd_test(enable = "sse")]
2265    unsafe fn test_mm_cmpeq_ss() {
2266        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2267        let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
2268        let r: u32x4 = transmute(_mm_cmpeq_ss(a, b));
2269        let e: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0), 2.0, 3.0, 4.0));
2270        assert_eq!(r, e);
2271
2272        let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2273        let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
2274        let e2: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0));
2275        assert_eq!(r2, e2);
2276    }
2277
2278    #[simd_test(enable = "sse")]
2279    unsafe fn test_mm_cmplt_ss() {
2280        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2281        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2282        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2283        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2284
2285        let b1 = 0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmplt_ss(a, b));
2290        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2291        assert_eq!(rb, eb);
2292
2293        let rc: u32x4 = transmute(_mm_cmplt_ss(a, c));
2294        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2295        assert_eq!(rc, ec);
2296
2297        let rd: u32x4 = transmute(_mm_cmplt_ss(a, d));
2298        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2299        assert_eq!(rd, ed);
2300    }
2301
2302    #[simd_test(enable = "sse")]
2303    unsafe fn test_mm_cmple_ss() {
2304        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2305        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2306        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2307        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2308
2309        let b1 = 0u32; let c1 = !0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmple_ss(a, b));
2314        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2315        assert_eq!(rb, eb);
2316
2317        let rc: u32x4 = transmute(_mm_cmple_ss(a, c));
2318        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2319        assert_eq!(rc, ec);
2320
2321        let rd: u32x4 = transmute(_mm_cmple_ss(a, d));
2322        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2323        assert_eq!(rd, ed);
2324    }
2325
2326    #[simd_test(enable = "sse")]
2327    unsafe fn test_mm_cmpgt_ss() {
2328        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2329        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2330        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2331        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2332
2333        let b1 = !0u32; let c1 = 0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b));
2338        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2339        assert_eq!(rb, eb);
2340
2341        let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c));
2342        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2343        assert_eq!(rc, ec);
2344
2345        let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d));
2346        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2347        assert_eq!(rd, ed);
2348    }
2349
2350    #[simd_test(enable = "sse")]
2351    unsafe fn test_mm_cmpge_ss() {
2352        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2353        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2354        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2355        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2356
2357        let b1 = !0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpge_ss(a, b));
2362        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2363        assert_eq!(rb, eb);
2364
2365        let rc: u32x4 = transmute(_mm_cmpge_ss(a, c));
2366        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2367        assert_eq!(rc, ec);
2368
2369        let rd: u32x4 = transmute(_mm_cmpge_ss(a, d));
2370        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2371        assert_eq!(rd, ed);
2372    }
2373
2374    #[simd_test(enable = "sse")]
2375    unsafe fn test_mm_cmpneq_ss() {
2376        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2377        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2378        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2379        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2380
2381        let b1 = !0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b));
2386        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2387        assert_eq!(rb, eb);
2388
2389        let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c));
2390        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2391        assert_eq!(rc, ec);
2392
2393        let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d));
2394        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2395        assert_eq!(rd, ed);
2396    }
2397
2398    #[simd_test(enable = "sse")]
2399    unsafe fn test_mm_cmpnlt_ss() {
2400        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2406        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2407        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2408        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2409
2410        let b1 = !0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b));
2415        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2416        assert_eq!(rb, eb);
2417
2418        let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c));
2419        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2420        assert_eq!(rc, ec);
2421
2422        let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d));
2423        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2424        assert_eq!(rd, ed);
2425    }
2426
2427    #[simd_test(enable = "sse")]
2428    unsafe fn test_mm_cmpnle_ss() {
2429        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2435        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2436        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2437        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2438
2439        let b1 = !0u32; let c1 = 0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b));
2444        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2445        assert_eq!(rb, eb);
2446
2447        let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c));
2448        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2449        assert_eq!(rc, ec);
2450
2451        let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d));
2452        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2453        assert_eq!(rd, ed);
2454    }
2455
2456    #[simd_test(enable = "sse")]
2457    unsafe fn test_mm_cmpngt_ss() {
2458        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2464        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2465        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2466        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2467
2468        let b1 = 0u32; let c1 = !0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b));
2473        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2474        assert_eq!(rb, eb);
2475
2476        let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c));
2477        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2478        assert_eq!(rc, ec);
2479
2480        let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d));
2481        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2482        assert_eq!(rd, ed);
2483    }
2484
2485    #[simd_test(enable = "sse")]
2486    unsafe fn test_mm_cmpnge_ss() {
2487        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2493        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2494        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2495        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2496
2497        let b1 = 0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b));
2502        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2503        assert_eq!(rb, eb);
2504
2505        let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c));
2506        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2507        assert_eq!(rc, ec);
2508
2509        let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d));
2510        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2511        assert_eq!(rd, ed);
2512    }
2513
2514    #[simd_test(enable = "sse")]
2515    unsafe fn test_mm_cmpord_ss() {
2516        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2517        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2518        let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
2519        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2520
2521        let b1 = !0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpord_ss(a, b));
2526        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2527        assert_eq!(rb, eb);
2528
2529        let rc: u32x4 = transmute(_mm_cmpord_ss(a, c));
2530        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2531        assert_eq!(rc, ec);
2532
2533        let rd: u32x4 = transmute(_mm_cmpord_ss(a, d));
2534        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2535        assert_eq!(rd, ed);
2536    }
2537
2538    #[simd_test(enable = "sse")]
2539    unsafe fn test_mm_cmpunord_ss() {
2540        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2541        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2542        let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
2543        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2544
2545        let b1 = 0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b));
2550        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2551        assert_eq!(rb, eb);
2552
2553        let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c));
2554        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2555        assert_eq!(rc, ec);
2556
2557        let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d));
2558        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2559        assert_eq!(rd, ed);
2560    }
2561
2562    #[simd_test(enable = "sse")]
2563    unsafe fn test_mm_cmpeq_ps() {
2564        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2565        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2566        let tru = !0u32;
2567        let fls = 0u32;
2568
2569        let e = u32x4::new(fls, fls, tru, fls);
2570        let r: u32x4 = transmute(_mm_cmpeq_ps(a, b));
2571        assert_eq!(r, e);
2572    }
2573
2574    #[simd_test(enable = "sse")]
2575    unsafe fn test_mm_cmplt_ps() {
2576        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2577        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2578        let tru = !0u32;
2579        let fls = 0u32;
2580
2581        let e = u32x4::new(tru, fls, fls, fls);
2582        let r: u32x4 = transmute(_mm_cmplt_ps(a, b));
2583        assert_eq!(r, e);
2584    }
2585
2586    #[simd_test(enable = "sse")]
2587    unsafe fn test_mm_cmple_ps() {
2588        let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0);
2589        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2590        let tru = !0u32;
2591        let fls = 0u32;
2592
2593        let e = u32x4::new(tru, fls, tru, fls);
2594        let r: u32x4 = transmute(_mm_cmple_ps(a, b));
2595        assert_eq!(r, e);
2596    }
2597
2598    #[simd_test(enable = "sse")]
2599    unsafe fn test_mm_cmpgt_ps() {
2600        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2601        let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
2602        let tru = !0u32;
2603        let fls = 0u32;
2604
2605        let e = u32x4::new(fls, tru, fls, fls);
2606        let r: u32x4 = transmute(_mm_cmpgt_ps(a, b));
2607        assert_eq!(r, e);
2608    }
2609
2610    #[simd_test(enable = "sse")]
2611    unsafe fn test_mm_cmpge_ps() {
2612        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2613        let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
2614        let tru = !0u32;
2615        let fls = 0u32;
2616
2617        let e = u32x4::new(fls, tru, tru, fls);
2618        let r: u32x4 = transmute(_mm_cmpge_ps(a, b));
2619        assert_eq!(r, e);
2620    }
2621
2622    #[simd_test(enable = "sse")]
2623    unsafe fn test_mm_cmpneq_ps() {
2624        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2625        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2626        let tru = !0u32;
2627        let fls = 0u32;
2628
2629        let e = u32x4::new(tru, tru, fls, tru);
2630        let r: u32x4 = transmute(_mm_cmpneq_ps(a, b));
2631        assert_eq!(r, e);
2632    }
2633
2634    #[simd_test(enable = "sse")]
2635    unsafe fn test_mm_cmpnlt_ps() {
2636        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2637        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2638        let tru = !0u32;
2639        let fls = 0u32;
2640
2641        let e = u32x4::new(fls, tru, tru, tru);
2642        let r: u32x4 = transmute(_mm_cmpnlt_ps(a, b));
2643        assert_eq!(r, e);
2644    }
2645
2646    #[simd_test(enable = "sse")]
2647    unsafe fn test_mm_cmpnle_ps() {
2648        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2649        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2650        let tru = !0u32;
2651        let fls = 0u32;
2652
2653        let e = u32x4::new(fls, tru, fls, tru);
2654        let r: u32x4 = transmute(_mm_cmpnle_ps(a, b));
2655        assert_eq!(r, e);
2656    }
2657
2658    #[simd_test(enable = "sse")]
2659    unsafe fn test_mm_cmpngt_ps() {
2660        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2661        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2662        let tru = !0u32;
2663        let fls = 0u32;
2664
2665        let e = u32x4::new(tru, fls, tru, tru);
2666        let r: u32x4 = transmute(_mm_cmpngt_ps(a, b));
2667        assert_eq!(r, e);
2668    }
2669
2670    #[simd_test(enable = "sse")]
2671    unsafe fn test_mm_cmpnge_ps() {
2672        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2673        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2674        let tru = !0u32;
2675        let fls = 0u32;
2676
2677        let e = u32x4::new(tru, fls, fls, tru);
2678        let r: u32x4 = transmute(_mm_cmpnge_ps(a, b));
2679        assert_eq!(r, e);
2680    }
2681
2682    #[simd_test(enable = "sse")]
2683    unsafe fn test_mm_cmpord_ps() {
2684        let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
2685        let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
2686        let tru = !0u32;
2687        let fls = 0u32;
2688
2689        let e = u32x4::new(tru, fls, fls, fls);
2690        let r: u32x4 = transmute(_mm_cmpord_ps(a, b));
2691        assert_eq!(r, e);
2692    }
2693
2694    #[simd_test(enable = "sse")]
2695    unsafe fn test_mm_cmpunord_ps() {
2696        let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
2697        let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
2698        let tru = !0u32;
2699        let fls = 0u32;
2700
2701        let e = u32x4::new(fls, tru, tru, tru);
2702        let r: u32x4 = transmute(_mm_cmpunord_ps(a, b));
2703        assert_eq!(r, e);
2704    }
2705
2706    #[simd_test(enable = "sse")]
2707    unsafe fn test_mm_comieq_ss() {
2708        let aa = &[3.0f32, 12.0, 23.0, NAN];
2709        let bb = &[3.0f32, 47.5, 1.5, NAN];
2710
2711        let ee = &[1i32, 0, 0, 0];
2712
2713        for i in 0..4 {
2714            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2715            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2716
2717            let r = _mm_comieq_ss(a, b);
2718
2719            assert_eq!(
2720                ee[i], r,
2721                "_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2722                a, b, r, ee[i], i
2723            );
2724        }
2725    }
2726
2727    #[simd_test(enable = "sse")]
2728    unsafe fn test_mm_comilt_ss() {
2729        let aa = &[3.0f32, 12.0, 23.0, NAN];
2730        let bb = &[3.0f32, 47.5, 1.5, NAN];
2731
2732        let ee = &[0i32, 1, 0, 0];
2733
2734        for i in 0..4 {
2735            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2736            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2737
2738            let r = _mm_comilt_ss(a, b);
2739
2740            assert_eq!(
2741                ee[i], r,
2742                "_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2743                a, b, r, ee[i], i
2744            );
2745        }
2746    }
2747
2748    #[simd_test(enable = "sse")]
2749    unsafe fn test_mm_comile_ss() {
2750        let aa = &[3.0f32, 12.0, 23.0, NAN];
2751        let bb = &[3.0f32, 47.5, 1.5, NAN];
2752
2753        let ee = &[1i32, 1, 0, 0];
2754
2755        for i in 0..4 {
2756            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2757            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2758
2759            let r = _mm_comile_ss(a, b);
2760
2761            assert_eq!(
2762                ee[i], r,
2763                "_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})",
2764                a, b, r, ee[i], i
2765            );
2766        }
2767    }
2768
2769    #[simd_test(enable = "sse")]
2770    unsafe fn test_mm_comigt_ss() {
2771        let aa = &[3.0f32, 12.0, 23.0, NAN];
2772        let bb = &[3.0f32, 47.5, 1.5, NAN];
2773
2774        let ee = &[1i32, 0, 1, 0];
2775
2776        for i in 0..4 {
2777            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2778            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2779
2780            let r = _mm_comige_ss(a, b);
2781
2782            assert_eq!(
2783                ee[i], r,
2784                "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})",
2785                a, b, r, ee[i], i
2786            );
2787        }
2788    }
2789
2790    #[simd_test(enable = "sse")]
2791    unsafe fn test_mm_comineq_ss() {
2792        let aa = &[3.0f32, 12.0, 23.0, NAN];
2793        let bb = &[3.0f32, 47.5, 1.5, NAN];
2794
2795        let ee = &[0i32, 1, 1, 1];
2796
2797        for i in 0..4 {
2798            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2799            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2800
2801            let r = _mm_comineq_ss(a, b);
2802
2803            assert_eq!(
2804                ee[i], r,
2805                "_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2806                a, b, r, ee[i], i
2807            );
2808        }
2809    }
2810
2811    #[simd_test(enable = "sse")]
2812    unsafe fn test_mm_ucomieq_ss() {
2813        let aa = &[3.0f32, 12.0, 23.0, NAN];
2814        let bb = &[3.0f32, 47.5, 1.5, NAN];
2815
2816        let ee = &[1i32, 0, 0, 0];
2817
2818        for i in 0..4 {
2819            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2820            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2821
2822            let r = _mm_ucomieq_ss(a, b);
2823
2824            assert_eq!(
2825                ee[i], r,
2826                "_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2827                a, b, r, ee[i], i
2828            );
2829        }
2830    }
2831
2832    #[simd_test(enable = "sse")]
2833    unsafe fn test_mm_ucomilt_ss() {
2834        let aa = &[3.0f32, 12.0, 23.0, NAN];
2835        let bb = &[3.0f32, 47.5, 1.5, NAN];
2836
2837        let ee = &[0i32, 1, 0, 0];
2838
2839        for i in 0..4 {
2840            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2841            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2842
2843            let r = _mm_ucomilt_ss(a, b);
2844
2845            assert_eq!(
2846                ee[i], r,
2847                "_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2848                a, b, r, ee[i], i
2849            );
2850        }
2851    }
2852
2853    #[simd_test(enable = "sse")]
2854    unsafe fn test_mm_ucomile_ss() {
2855        let aa = &[3.0f32, 12.0, 23.0, NAN];
2856        let bb = &[3.0f32, 47.5, 1.5, NAN];
2857
2858        let ee = &[1i32, 1, 0, 0];
2859
2860        for i in 0..4 {
2861            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2862            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2863
2864            let r = _mm_ucomile_ss(a, b);
2865
2866            assert_eq!(
2867                ee[i], r,
2868                "_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})",
2869                a, b, r, ee[i], i
2870            );
2871        }
2872    }
2873
2874    #[simd_test(enable = "sse")]
2875    unsafe fn test_mm_ucomigt_ss() {
2876        let aa = &[3.0f32, 12.0, 23.0, NAN];
2877        let bb = &[3.0f32, 47.5, 1.5, NAN];
2878
2879        let ee = &[0i32, 0, 1, 0];
2880
2881        for i in 0..4 {
2882            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2883            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2884
2885            let r = _mm_ucomigt_ss(a, b);
2886
2887            assert_eq!(
2888                ee[i], r,
2889                "_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2890                a, b, r, ee[i], i
2891            );
2892        }
2893    }
2894
2895    #[simd_test(enable = "sse")]
2896    unsafe fn test_mm_ucomige_ss() {
2897        let aa = &[3.0f32, 12.0, 23.0, NAN];
2898        let bb = &[3.0f32, 47.5, 1.5, NAN];
2899
2900        let ee = &[1i32, 0, 1, 0];
2901
2902        for i in 0..4 {
2903            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2904            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2905
2906            let r = _mm_ucomige_ss(a, b);
2907
2908            assert_eq!(
2909                ee[i], r,
2910                "_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})",
2911                a, b, r, ee[i], i
2912            );
2913        }
2914    }
2915
2916    #[simd_test(enable = "sse")]
2917    unsafe fn test_mm_ucomineq_ss() {
2918        let aa = &[3.0f32, 12.0, 23.0, NAN];
2919        let bb = &[3.0f32, 47.5, 1.5, NAN];
2920
2921        let ee = &[0i32, 1, 1, 1];
2922
2923        for i in 0..4 {
2924            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2925            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2926
2927            let r = _mm_ucomineq_ss(a, b);
2928
2929            assert_eq!(
2930                ee[i], r,
2931                "_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2932                a, b, r, ee[i], i
2933            );
2934        }
2935    }
2936
2937    #[simd_test(enable = "sse")]
2938    unsafe fn test_mm_cvtss_si32() {
2939        let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
2940        let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520];
2941        for i in 0..inputs.len() {
2942            let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
2943            let e = result[i];
2944            let r = _mm_cvtss_si32(x);
2945            assert_eq!(
2946                e, r,
2947                "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}",
2948                i, x, r, e
2949            );
2950        }
2951    }
2952
2953    #[simd_test(enable = "sse")]
2954    unsafe fn test_mm_cvttss_si32() {
2955        let inputs = &[
2956            (42.0f32, 42i32),
2957            (-31.4, -31),
2958            (-33.5, -33),
2959            (-34.5, -34),
2960            (10.999, 10),
2961            (-5.99, -5),
2962            (4.0e10, i32::MIN),
2963            (4.0e-10, 0),
2964            (NAN, i32::MIN),
2965            (2147483500.1, 2147483520),
2966        ];
2967        for (i, &(xi, e)) in inputs.iter().enumerate() {
2968            let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
2969            let r = _mm_cvttss_si32(x);
2970            assert_eq!(
2971                e, r,
2972                "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}",
2973                i, x, r, e
2974            );
2975        }
2976    }
2977
2978    #[simd_test(enable = "sse")]
2979    unsafe fn test_mm_cvtsi32_ss() {
2980        let inputs = &[
2981            (4555i32, 4555.0f32),
2982            (322223333, 322223330.0),
2983            (-432, -432.0),
2984            (-322223333, -322223330.0),
2985        ];
2986
2987        for &(x, f) in inputs.iter() {
2988            let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
2989            let r = _mm_cvtsi32_ss(a, x);
2990            let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
2991            assert_eq_m128(e, r);
2992        }
2993    }
2994
2995    #[simd_test(enable = "sse")]
2996    unsafe fn test_mm_cvtss_f32() {
2997        let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0);
2998        assert_eq!(_mm_cvtss_f32(a), 312.0134);
2999    }
3000
3001    #[simd_test(enable = "sse")]
3002    unsafe fn test_mm_set_ss() {
3003        let r = _mm_set_ss(black_box(4.25));
3004        assert_eq_m128(r, _mm_setr_ps(4.25, 0.0, 0.0, 0.0));
3005    }
3006
3007    #[simd_test(enable = "sse")]
3008    unsafe fn test_mm_set1_ps() {
3009        let r1 = _mm_set1_ps(black_box(4.25));
3010        let r2 = _mm_set_ps1(black_box(4.25));
3011        assert_eq!(get_m128(r1, 0), 4.25);
3012        assert_eq!(get_m128(r1, 1), 4.25);
3013        assert_eq!(get_m128(r1, 2), 4.25);
3014        assert_eq!(get_m128(r1, 3), 4.25);
3015        assert_eq!(get_m128(r2, 0), 4.25);
3016        assert_eq!(get_m128(r2, 1), 4.25);
3017        assert_eq!(get_m128(r2, 2), 4.25);
3018        assert_eq!(get_m128(r2, 3), 4.25);
3019    }
3020
3021    #[simd_test(enable = "sse")]
3022    unsafe fn test_mm_set_ps() {
3023        let r = _mm_set_ps(
3024            black_box(1.0),
3025            black_box(2.0),
3026            black_box(3.0),
3027            black_box(4.0),
3028        );
3029        assert_eq!(get_m128(r, 0), 4.0);
3030        assert_eq!(get_m128(r, 1), 3.0);
3031        assert_eq!(get_m128(r, 2), 2.0);
3032        assert_eq!(get_m128(r, 3), 1.0);
3033    }
3034
3035    #[simd_test(enable = "sse")]
3036    unsafe fn test_mm_setr_ps() {
3037        let r = _mm_setr_ps(
3038            black_box(1.0),
3039            black_box(2.0),
3040            black_box(3.0),
3041            black_box(4.0),
3042        );
3043        assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3044    }
3045
3046    #[simd_test(enable = "sse")]
3047    unsafe fn test_mm_setzero_ps() {
3048        let r = *black_box(&_mm_setzero_ps());
3049        assert_eq_m128(r, _mm_set1_ps(0.0));
3050    }
3051
3052    #[simd_test(enable = "sse")]
3053    unsafe fn test_MM_SHUFFLE() {
3054        assert_eq!(_MM_SHUFFLE(0, 1, 1, 3), 0b00_01_01_11);
3055        assert_eq!(_MM_SHUFFLE(3, 1, 1, 0), 0b11_01_01_00);
3056        assert_eq!(_MM_SHUFFLE(1, 2, 2, 1), 0b01_10_10_01);
3057    }
3058
3059    #[simd_test(enable = "sse")]
3060    unsafe fn test_mm_shuffle_ps() {
3061        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3062        let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3063        let r = _mm_shuffle_ps::<0b00_01_01_11>(a, b);
3064        assert_eq_m128(r, _mm_setr_ps(4.0, 2.0, 6.0, 5.0));
3065    }
3066
3067    #[simd_test(enable = "sse")]
3068    unsafe fn test_mm_unpackhi_ps() {
3069        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3070        let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3071        let r = _mm_unpackhi_ps(a, b);
3072        assert_eq_m128(r, _mm_setr_ps(3.0, 7.0, 4.0, 8.0));
3073    }
3074
3075    #[simd_test(enable = "sse")]
3076    unsafe fn test_mm_unpacklo_ps() {
3077        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3078        let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3079        let r = _mm_unpacklo_ps(a, b);
3080        assert_eq_m128(r, _mm_setr_ps(1.0, 5.0, 2.0, 6.0));
3081    }
3082
3083    #[simd_test(enable = "sse")]
3084    unsafe fn test_mm_movehl_ps() {
3085        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3086        let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3087        let r = _mm_movehl_ps(a, b);
3088        assert_eq_m128(r, _mm_setr_ps(7.0, 8.0, 3.0, 4.0));
3089    }
3090
3091    #[simd_test(enable = "sse")]
3092    unsafe fn test_mm_movelh_ps() {
3093        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3094        let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3095        let r = _mm_movelh_ps(a, b);
3096        assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0));
3097    }
3098
3099    #[simd_test(enable = "sse")]
3100    unsafe fn test_mm_load_ss() {
3101        let a = 42.0f32;
3102        let r = _mm_load_ss(ptr::addr_of!(a));
3103        assert_eq_m128(r, _mm_setr_ps(42.0, 0.0, 0.0, 0.0));
3104    }
3105
3106    #[simd_test(enable = "sse")]
3107    unsafe fn test_mm_load1_ps() {
3108        let a = 42.0f32;
3109        let r = _mm_load1_ps(ptr::addr_of!(a));
3110        assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0));
3111    }
3112
3113    #[simd_test(enable = "sse")]
3114    unsafe fn test_mm_load_ps() {
3115        let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3116
3117        let mut p = vals.as_ptr();
3118        let mut fixup = 0.0f32;
3119
3120        let unalignment = (p as usize) & 0xf;
3124        if unalignment != 0 {
3125            let delta = (16 - unalignment) >> 2;
3126            fixup = delta as f32;
3127            p = p.add(delta);
3128        }
3129
3130        let r = _mm_load_ps(p);
3131        let e = _mm_add_ps(_mm_setr_ps(1.0, 2.0, 3.0, 4.0), _mm_set1_ps(fixup));
3132        assert_eq_m128(r, e);
3133    }
3134
3135    #[simd_test(enable = "sse")]
3136    unsafe fn test_mm_loadu_ps() {
3137        let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3138        let p = vals.as_ptr().add(3);
3139        let r = _mm_loadu_ps(black_box(p));
3140        assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0));
3141    }
3142
3143    #[simd_test(enable = "sse")]
3144    unsafe fn test_mm_loadr_ps() {
3145        let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3146
3147        let mut p = vals.as_ptr();
3148        let mut fixup = 0.0f32;
3149
3150        let unalignment = (p as usize) & 0xf;
3154        if unalignment != 0 {
3155            let delta = (16 - unalignment) >> 2;
3156            fixup = delta as f32;
3157            p = p.add(delta);
3158        }
3159
3160        let r = _mm_loadr_ps(p);
3161        let e = _mm_add_ps(_mm_setr_ps(4.0, 3.0, 2.0, 1.0), _mm_set1_ps(fixup));
3162        assert_eq_m128(r, e);
3163    }
3164
3165    #[simd_test(enable = "sse")]
3166    unsafe fn test_mm_store_ss() {
3167        let mut vals = [0.0f32; 8];
3168        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3169        _mm_store_ss(vals.as_mut_ptr().add(1), a);
3170
3171        assert_eq!(vals[0], 0.0);
3172        assert_eq!(vals[1], 1.0);
3173        assert_eq!(vals[2], 0.0);
3174    }
3175
3176    #[simd_test(enable = "sse")]
3177    unsafe fn test_mm_store1_ps() {
3178        let mut vals = [0.0f32; 8];
3179        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3180
3181        let mut ofs = 0;
3182        let mut p = vals.as_mut_ptr();
3183
3184        if (p as usize) & 0xf != 0 {
3185            ofs = (16 - ((p as usize) & 0xf)) >> 2;
3186            p = p.add(ofs);
3187        }
3188
3189        _mm_store1_ps(p, *black_box(&a));
3190
3191        if ofs > 0 {
3192            assert_eq!(vals[ofs - 1], 0.0);
3193        }
3194        assert_eq!(vals[ofs + 0], 1.0);
3195        assert_eq!(vals[ofs + 1], 1.0);
3196        assert_eq!(vals[ofs + 2], 1.0);
3197        assert_eq!(vals[ofs + 3], 1.0);
3198        assert_eq!(vals[ofs + 4], 0.0);
3199    }
3200
3201    #[simd_test(enable = "sse")]
3202    unsafe fn test_mm_store_ps() {
3203        let mut vals = [0.0f32; 8];
3204        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3205
3206        let mut ofs = 0;
3207        let mut p = vals.as_mut_ptr();
3208
3209        if (p as usize) & 0xf != 0 {
3211            ofs = (16 - ((p as usize) & 0xf)) >> 2;
3212            p = p.add(ofs);
3213        }
3214
3215        _mm_store_ps(p, *black_box(&a));
3216
3217        if ofs > 0 {
3218            assert_eq!(vals[ofs - 1], 0.0);
3219        }
3220        assert_eq!(vals[ofs + 0], 1.0);
3221        assert_eq!(vals[ofs + 1], 2.0);
3222        assert_eq!(vals[ofs + 2], 3.0);
3223        assert_eq!(vals[ofs + 3], 4.0);
3224        assert_eq!(vals[ofs + 4], 0.0);
3225    }
3226
3227    #[simd_test(enable = "sse")]
3228    unsafe fn test_mm_storer_ps() {
3229        let mut vals = [0.0f32; 8];
3230        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3231
3232        let mut ofs = 0;
3233        let mut p = vals.as_mut_ptr();
3234
3235        if (p as usize) & 0xf != 0 {
3237            ofs = (16 - ((p as usize) & 0xf)) >> 2;
3238            p = p.add(ofs);
3239        }
3240
3241        _mm_storer_ps(p, *black_box(&a));
3242
3243        if ofs > 0 {
3244            assert_eq!(vals[ofs - 1], 0.0);
3245        }
3246        assert_eq!(vals[ofs + 0], 4.0);
3247        assert_eq!(vals[ofs + 1], 3.0);
3248        assert_eq!(vals[ofs + 2], 2.0);
3249        assert_eq!(vals[ofs + 3], 1.0);
3250        assert_eq!(vals[ofs + 4], 0.0);
3251    }
3252
3253    #[simd_test(enable = "sse")]
3254    unsafe fn test_mm_storeu_ps() {
3255        let mut vals = [0.0f32; 8];
3256        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3257
3258        let mut ofs = 0;
3259        let mut p = vals.as_mut_ptr();
3260
3261        if (p as usize) & 0xf == 0 {
3263            ofs = 1;
3264            p = p.add(1);
3265        }
3266
3267        _mm_storeu_ps(p, *black_box(&a));
3268
3269        if ofs > 0 {
3270            assert_eq!(vals[ofs - 1], 0.0);
3271        }
3272        assert_eq!(vals[ofs + 0], 1.0);
3273        assert_eq!(vals[ofs + 1], 2.0);
3274        assert_eq!(vals[ofs + 2], 3.0);
3275        assert_eq!(vals[ofs + 3], 4.0);
3276        assert_eq!(vals[ofs + 4], 0.0);
3277    }
3278
3279    #[simd_test(enable = "sse")]
3280    unsafe fn test_mm_move_ss() {
3281        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3282        let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3283
3284        let r = _mm_move_ss(a, b);
3285        let e = _mm_setr_ps(5.0, 2.0, 3.0, 4.0);
3286        assert_eq_m128(e, r);
3287    }
3288
3289    #[simd_test(enable = "sse")]
3290    unsafe fn test_mm_movemask_ps() {
3291        let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0));
3292        assert_eq!(r, 0b0101);
3293
3294        let r = _mm_movemask_ps(_mm_setr_ps(-1.0, -5.0, -5.0, 0.0));
3295        assert_eq!(r, 0b0111);
3296    }
3297
3298    #[simd_test(enable = "sse")]
3299    #[cfg_attr(miri, ignore)]
3301    unsafe fn test_mm_sfence() {
3302        _mm_sfence();
3303    }
3304
3305    #[simd_test(enable = "sse")]
3306    unsafe fn test_MM_TRANSPOSE4_PS() {
3307        let mut a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3308        let mut b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3309        let mut c = _mm_setr_ps(9.0, 10.0, 11.0, 12.0);
3310        let mut d = _mm_setr_ps(13.0, 14.0, 15.0, 16.0);
3311
3312        _MM_TRANSPOSE4_PS(&mut a, &mut b, &mut c, &mut d);
3313
3314        assert_eq_m128(a, _mm_setr_ps(1.0, 5.0, 9.0, 13.0));
3315        assert_eq_m128(b, _mm_setr_ps(2.0, 6.0, 10.0, 14.0));
3316        assert_eq_m128(c, _mm_setr_ps(3.0, 7.0, 11.0, 15.0));
3317        assert_eq_m128(d, _mm_setr_ps(4.0, 8.0, 12.0, 16.0));
3318    }
3319
3320    #[repr(align(16))]
3321    struct Memory {
3322        pub data: [f32; 4],
3323    }
3324
3325    #[simd_test(enable = "sse")]
3326    #[cfg_attr(miri, ignore)]
3329    unsafe fn test_mm_stream_ps() {
3330        let a = _mm_set1_ps(7.0);
3331        let mut mem = Memory { data: [-1.0; 4] };
3332
3333        _mm_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
3334        for i in 0..4 {
3335            assert_eq!(mem.data[i], get_m128(a, i));
3336        }
3337    }
3338}