core/stdarch/crates/core_arch/src/x86/
sse4a.rs1use crate::core_arch::{simd::*, x86::*};
4
5#[cfg(test)]
6use stdarch_test::assert_instr;
7
8#[allow(improper_ctypes)]
9unsafe extern "C" {
10    #[link_name = "llvm.x86.sse4a.extrq"]
11    fn extrq(x: i64x2, y: i8x16) -> i64x2;
12    #[link_name = "llvm.x86.sse4a.extrqi"]
13    fn extrqi(x: i64x2, len: u8, idx: u8) -> i64x2;
14    #[link_name = "llvm.x86.sse4a.insertq"]
15    fn insertq(x: i64x2, y: i64x2) -> i64x2;
16    #[link_name = "llvm.x86.sse4a.insertqi"]
17    fn insertqi(x: i64x2, y: i64x2, len: u8, idx: u8) -> i64x2;
18    #[link_name = "llvm.x86.sse4a.movnt.sd"]
19    fn movntsd(x: *mut f64, y: __m128d);
20    #[link_name = "llvm.x86.sse4a.movnt.ss"]
21    fn movntss(x: *mut f32, y: __m128);
22}
23
24#[inline]
36#[target_feature(enable = "sse4a")]
37#[cfg_attr(test, assert_instr(extrq))]
38#[stable(feature = "simd_x86", since = "1.27.0")]
39pub fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
40    unsafe { transmute(extrq(x.as_i64x2(), y.as_i8x16())) }
41}
42
43#[inline]
52#[target_feature(enable = "sse4a")]
53#[cfg_attr(test, assert_instr(extrq, LEN = 5, IDX = 5))]
54#[rustc_legacy_const_generics(1, 2)]
55#[stable(feature = "simd_x86_updates", since = "1.82.0")]
56pub fn _mm_extracti_si64<const LEN: i32, const IDX: i32>(x: __m128i) -> __m128i {
57    static_assert_uimm_bits!(LEN, 6);
59    static_assert_uimm_bits!(IDX, 6);
60    static_assert!((LEN == 0 && IDX == 0) || (LEN != 0 && LEN + IDX <= 64));
61    unsafe { transmute(extrqi(x.as_i64x2(), LEN as u8, IDX as u8)) }
62}
63
64#[inline]
74#[target_feature(enable = "sse4a")]
75#[cfg_attr(test, assert_instr(insertq))]
76#[stable(feature = "simd_x86", since = "1.27.0")]
77pub fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
78    unsafe { transmute(insertq(x.as_i64x2(), y.as_i64x2())) }
79}
80
81#[inline]
88#[target_feature(enable = "sse4a")]
89#[cfg_attr(test, assert_instr(insertq, LEN = 5, IDX = 5))]
90#[rustc_legacy_const_generics(2, 3)]
91#[stable(feature = "simd_x86_updates", since = "1.82.0")]
92pub fn _mm_inserti_si64<const LEN: i32, const IDX: i32>(x: __m128i, y: __m128i) -> __m128i {
93    static_assert_uimm_bits!(LEN, 6);
95    static_assert_uimm_bits!(IDX, 6);
96    static_assert!((LEN == 0 && IDX == 0) || (LEN != 0 && LEN + IDX <= 64));
97    unsafe { transmute(insertqi(x.as_i64x2(), y.as_i64x2(), LEN as u8, IDX as u8)) }
98}
99
100#[inline]
113#[target_feature(enable = "sse4a")]
114#[cfg_attr(test, assert_instr(movntsd))]
115#[stable(feature = "simd_x86", since = "1.27.0")]
116pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
117    movntsd(p, a);
118}
119
120#[inline]
133#[target_feature(enable = "sse4a")]
134#[cfg_attr(test, assert_instr(movntss))]
135#[stable(feature = "simd_x86", since = "1.27.0")]
136pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
137    movntss(p, a);
138}
139
140#[cfg(test)]
141mod tests {
142    use crate::core_arch::x86::*;
143    use stdarch_test::simd_test;
144
145    #[simd_test(enable = "sse4a")]
146    unsafe fn test_mm_extract_si64() {
147        let b = 0b0110_0000_0000_i64;
148        let x = _mm_setr_epi64x(b, 0);
150        let v = 0b001000___00___000100_i64;
151        let y = _mm_setr_epi64x(v, 0);
153        let e = _mm_setr_epi64x(0b0110_i64, 0);
154        let r = _mm_extract_si64(x, y);
155        assert_eq_m128i(r, e);
156    }
157
158    #[simd_test(enable = "sse4a")]
159    unsafe fn test_mm_extracti_si64() {
160        let a = _mm_setr_epi64x(0x0123456789abcdef, 0);
161        let r = _mm_extracti_si64::<8, 8>(a);
162        let e = _mm_setr_epi64x(0xcd, 0);
163        assert_eq_m128i(r, e);
164    }
165
166    #[simd_test(enable = "sse4a")]
167    unsafe fn test_mm_insert_si64() {
168        let i = 0b0110_i64;
169        let z = 0b1010_1010_1010i64;
171        let e = 0b0110_1010_1010i64;
173        let x = _mm_setr_epi64x(z, 0);
175        let expected = _mm_setr_epi64x(e, 0);
176        let v = 0b001000___00___000100_i64;
177        let y = _mm_setr_epi64x(i, v);
179        let r = _mm_insert_si64(x, y);
180        assert_eq_m128i(r, expected);
181    }
182
183    #[simd_test(enable = "sse4a")]
184    unsafe fn test_mm_inserti_si64() {
185        let a = _mm_setr_epi64x(0x0123456789abcdef, 0);
186        let b = _mm_setr_epi64x(0x0011223344556677, 0);
187        let r = _mm_inserti_si64::<8, 8>(a, b);
188        let e = _mm_setr_epi64x(0x0123456789ab77ef, 0);
189        assert_eq_m128i(r, e);
190    }
191
192    #[repr(align(16))]
193    struct MemoryF64 {
194        data: [f64; 2],
195    }
196
197    #[simd_test(enable = "sse4a")]
198    #[cfg_attr(miri, ignore)]
201    unsafe fn test_mm_stream_sd() {
202        let mut mem = MemoryF64 {
203            data: [1.0_f64, 2.0],
204        };
205        {
206            let vals = &mut mem.data;
207            let d = vals.as_mut_ptr();
208
209            let x = _mm_setr_pd(3.0, 4.0);
210
211            _mm_stream_sd(d, x);
212        }
213        assert_eq!(mem.data[0], 3.0);
214        assert_eq!(mem.data[1], 2.0);
215    }
216
217    #[repr(align(16))]
218    struct MemoryF32 {
219        data: [f32; 4],
220    }
221
222    #[simd_test(enable = "sse4a")]
223    #[cfg_attr(miri, ignore)]
226    unsafe fn test_mm_stream_ss() {
227        let mut mem = MemoryF32 {
228            data: [1.0_f32, 2.0, 3.0, 4.0],
229        };
230        {
231            let vals = &mut mem.data;
232            let d = vals.as_mut_ptr();
233
234            let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
235
236            _mm_stream_ss(d, x);
237        }
238        assert_eq!(mem.data[0], 5.0);
239        assert_eq!(mem.data[1], 2.0);
240        assert_eq!(mem.data[2], 3.0);
241        assert_eq!(mem.data[3], 4.0);
242    }
243}