1use crate::core_arch::{simd::*, x86::*};
2
3#[allow(improper_ctypes)]
4unsafe extern "C" {
5    #[link_name = "llvm.x86.sha1msg1"]
6    fn sha1msg1(a: i32x4, b: i32x4) -> i32x4;
7    #[link_name = "llvm.x86.sha1msg2"]
8    fn sha1msg2(a: i32x4, b: i32x4) -> i32x4;
9    #[link_name = "llvm.x86.sha1nexte"]
10    fn sha1nexte(a: i32x4, b: i32x4) -> i32x4;
11    #[link_name = "llvm.x86.sha1rnds4"]
12    fn sha1rnds4(a: i32x4, b: i32x4, c: i8) -> i32x4;
13    #[link_name = "llvm.x86.sha256msg1"]
14    fn sha256msg1(a: i32x4, b: i32x4) -> i32x4;
15    #[link_name = "llvm.x86.sha256msg2"]
16    fn sha256msg2(a: i32x4, b: i32x4) -> i32x4;
17    #[link_name = "llvm.x86.sha256rnds2"]
18    fn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4;
19    #[link_name = "llvm.x86.vsha512msg1"]
20    fn vsha512msg1(a: i64x4, b: i64x2) -> i64x4;
21    #[link_name = "llvm.x86.vsha512msg2"]
22    fn vsha512msg2(a: i64x4, b: i64x4) -> i64x4;
23    #[link_name = "llvm.x86.vsha512rnds2"]
24    fn vsha512rnds2(a: i64x4, b: i64x4, k: i64x2) -> i64x4;
25    #[link_name = "llvm.x86.vsm3msg1"]
26    fn vsm3msg1(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
27    #[link_name = "llvm.x86.vsm3msg2"]
28    fn vsm3msg2(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
29    #[link_name = "llvm.x86.vsm3rnds2"]
30    fn vsm3rnds2(a: i32x4, b: i32x4, c: i32x4, d: i32) -> i32x4;
31    #[link_name = "llvm.x86.vsm4key4128"]
32    fn vsm4key4128(a: i32x4, b: i32x4) -> i32x4;
33    #[link_name = "llvm.x86.vsm4key4256"]
34    fn vsm4key4256(a: i32x8, b: i32x8) -> i32x8;
35    #[link_name = "llvm.x86.vsm4rnds4128"]
36    fn vsm4rnds4128(a: i32x4, b: i32x4) -> i32x4;
37    #[link_name = "llvm.x86.vsm4rnds4256"]
38    fn vsm4rnds4256(a: i32x8, b: i32x8) -> i32x8;
39}
40
41#[cfg(test)]
42use stdarch_test::assert_instr;
43
44#[inline]
50#[target_feature(enable = "sha")]
51#[cfg_attr(test, assert_instr(sha1msg1))]
52#[stable(feature = "simd_x86", since = "1.27.0")]
53pub fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
54    unsafe { transmute(sha1msg1(a.as_i32x4(), b.as_i32x4())) }
55}
56
57#[inline]
63#[target_feature(enable = "sha")]
64#[cfg_attr(test, assert_instr(sha1msg2))]
65#[stable(feature = "simd_x86", since = "1.27.0")]
66pub fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
67    unsafe { transmute(sha1msg2(a.as_i32x4(), b.as_i32x4())) }
68}
69
70#[inline]
76#[target_feature(enable = "sha")]
77#[cfg_attr(test, assert_instr(sha1nexte))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
80    unsafe { transmute(sha1nexte(a.as_i32x4(), b.as_i32x4())) }
81}
82
83#[inline]
91#[target_feature(enable = "sha")]
92#[cfg_attr(test, assert_instr(sha1rnds4, FUNC = 0))]
93#[rustc_legacy_const_generics(2)]
94#[stable(feature = "simd_x86", since = "1.27.0")]
95pub fn _mm_sha1rnds4_epu32<const FUNC: i32>(a: __m128i, b: __m128i) -> __m128i {
96    static_assert_uimm_bits!(FUNC, 2);
97    unsafe { transmute(sha1rnds4(a.as_i32x4(), b.as_i32x4(), FUNC as i8)) }
98}
99
100#[inline]
106#[target_feature(enable = "sha")]
107#[cfg_attr(test, assert_instr(sha256msg1))]
108#[stable(feature = "simd_x86", since = "1.27.0")]
109pub fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
110    unsafe { transmute(sha256msg1(a.as_i32x4(), b.as_i32x4())) }
111}
112
113#[inline]
119#[target_feature(enable = "sha")]
120#[cfg_attr(test, assert_instr(sha256msg2))]
121#[stable(feature = "simd_x86", since = "1.27.0")]
122pub fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
123    unsafe { transmute(sha256msg2(a.as_i32x4(), b.as_i32x4())) }
124}
125
126#[inline]
134#[target_feature(enable = "sha")]
135#[cfg_attr(test, assert_instr(sha256rnds2))]
136#[stable(feature = "simd_x86", since = "1.27.0")]
137pub fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m128i {
138    unsafe { transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4())) }
139}
140
141#[inline]
147#[target_feature(enable = "sha512,avx")]
148#[cfg_attr(test, assert_instr(vsha512msg1))]
149#[unstable(feature = "sha512_sm_x86", issue = "126624")]
150pub fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i {
151    unsafe { transmute(vsha512msg1(a.as_i64x4(), b.as_i64x2())) }
152}
153
154#[inline]
160#[target_feature(enable = "sha512,avx")]
161#[cfg_attr(test, assert_instr(vsha512msg2))]
162#[unstable(feature = "sha512_sm_x86", issue = "126624")]
163pub fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i {
164    unsafe { transmute(vsha512msg2(a.as_i64x4(), b.as_i64x4())) }
165}
166
167#[inline]
176#[target_feature(enable = "sha512,avx")]
177#[cfg_attr(test, assert_instr(vsha512rnds2))]
178#[unstable(feature = "sha512_sm_x86", issue = "126624")]
179pub fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, k: __m128i) -> __m256i {
180    unsafe { transmute(vsha512rnds2(a.as_i64x4(), b.as_i64x4(), k.as_i64x2())) }
181}
182
183#[inline]
189#[target_feature(enable = "sm3,avx")]
190#[cfg_attr(test, assert_instr(vsm3msg1))]
191#[unstable(feature = "sha512_sm_x86", issue = "126624")]
192pub fn _mm_sm3msg1_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
193    unsafe { transmute(vsm3msg1(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
194}
195
196#[inline]
202#[target_feature(enable = "sm3,avx")]
203#[cfg_attr(test, assert_instr(vsm3msg2))]
204#[unstable(feature = "sha512_sm_x86", issue = "126624")]
205pub fn _mm_sm3msg2_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
206    unsafe { transmute(vsm3msg2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
207}
208
209#[inline]
219#[target_feature(enable = "sm3,avx")]
220#[cfg_attr(test, assert_instr(vsm3rnds2, IMM8 = 0))]
221#[rustc_legacy_const_generics(3)]
222#[unstable(feature = "sha512_sm_x86", issue = "126624")]
223pub fn _mm_sm3rnds2_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
224    static_assert!(
225        IMM8 == (IMM8 & 0x3e),
226        "IMM8 must be an even number in the range `0..=62`"
227    );
228    unsafe { transmute(vsm3rnds2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4(), IMM8)) }
229}
230
231#[inline]
236#[target_feature(enable = "sm4,avx")]
237#[cfg_attr(test, assert_instr(vsm4key4))]
238#[unstable(feature = "sha512_sm_x86", issue = "126624")]
239pub fn _mm_sm4key4_epi32(a: __m128i, b: __m128i) -> __m128i {
240    unsafe { transmute(vsm4key4128(a.as_i32x4(), b.as_i32x4())) }
241}
242
243#[inline]
248#[target_feature(enable = "sm4,avx")]
249#[cfg_attr(test, assert_instr(vsm4key4))]
250#[unstable(feature = "sha512_sm_x86", issue = "126624")]
251pub fn _mm256_sm4key4_epi32(a: __m256i, b: __m256i) -> __m256i {
252    unsafe { transmute(vsm4key4256(a.as_i32x8(), b.as_i32x8())) }
253}
254
255#[inline]
260#[target_feature(enable = "sm4,avx")]
261#[cfg_attr(test, assert_instr(vsm4rnds4))]
262#[unstable(feature = "sha512_sm_x86", issue = "126624")]
263pub fn _mm_sm4rnds4_epi32(a: __m128i, b: __m128i) -> __m128i {
264    unsafe { transmute(vsm4rnds4128(a.as_i32x4(), b.as_i32x4())) }
265}
266
267#[inline]
272#[target_feature(enable = "sm4,avx")]
273#[cfg_attr(test, assert_instr(vsm4rnds4))]
274#[unstable(feature = "sha512_sm_x86", issue = "126624")]
275pub fn _mm256_sm4rnds4_epi32(a: __m256i, b: __m256i) -> __m256i {
276    unsafe { transmute(vsm4rnds4256(a.as_i32x8(), b.as_i32x8())) }
277}
278
279#[cfg(test)]
280mod tests {
281    use crate::{
282        core_arch::{simd::*, x86::*},
283        hint::black_box,
284    };
285    use stdarch_test::simd_test;
286
287    #[simd_test(enable = "sha")]
288    #[allow(overflowing_literals)]
289    unsafe fn test_mm_sha1msg1_epu32() {
290        let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
291        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
292        let expected = _mm_set_epi64x(0x98829f34f74ad457, 0xda2b1a44d0b5ad3c);
293        let r = _mm_sha1msg1_epu32(a, b);
294        assert_eq_m128i(r, expected);
295    }
296
297    #[simd_test(enable = "sha")]
298    #[allow(overflowing_literals)]
299    unsafe fn test_mm_sha1msg2_epu32() {
300        let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
301        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
302        let expected = _mm_set_epi64x(0xf714b202d863d47d, 0x90c30d946b3d3b35);
303        let r = _mm_sha1msg2_epu32(a, b);
304        assert_eq_m128i(r, expected);
305    }
306
307    #[simd_test(enable = "sha")]
308    #[allow(overflowing_literals)]
309    unsafe fn test_mm_sha1nexte_epu32() {
310        let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
311        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
312        let expected = _mm_set_epi64x(0x2589d5be923f82a4, 0x59f111f13956c25b);
313        let r = _mm_sha1nexte_epu32(a, b);
314        assert_eq_m128i(r, expected);
315    }
316
317    #[simd_test(enable = "sha")]
318    #[allow(overflowing_literals)]
319    unsafe fn test_mm_sha1rnds4_epu32() {
320        let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
321        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
322        let expected = _mm_set_epi64x(0x32b13cd8322f5268, 0xc54420862bd9246f);
323        let r = _mm_sha1rnds4_epu32::<0>(a, b);
324        assert_eq_m128i(r, expected);
325
326        let expected = _mm_set_epi64x(0x6d4c43e56a3c25d9, 0xa7e00fb775cbd3fe);
327        let r = _mm_sha1rnds4_epu32::<1>(a, b);
328        assert_eq_m128i(r, expected);
329
330        let expected = _mm_set_epi64x(0xb304e383c01222f4, 0x66f6b3b1f89d8001);
331        let r = _mm_sha1rnds4_epu32::<2>(a, b);
332        assert_eq_m128i(r, expected);
333
334        let expected = _mm_set_epi64x(0x8189b758bfabfa79, 0xdb08f6e78cae098b);
335        let r = _mm_sha1rnds4_epu32::<3>(a, b);
336        assert_eq_m128i(r, expected);
337    }
338
339    #[simd_test(enable = "sha")]
340    #[allow(overflowing_literals)]
341    unsafe fn test_mm_sha256msg1_epu32() {
342        let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
343        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
344        let expected = _mm_set_epi64x(0xeb84973fd5cda67d, 0x2857b88f406b09ee);
345        let r = _mm_sha256msg1_epu32(a, b);
346        assert_eq_m128i(r, expected);
347    }
348
349    #[simd_test(enable = "sha")]
350    #[allow(overflowing_literals)]
351    unsafe fn test_mm_sha256msg2_epu32() {
352        let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
353        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
354        let expected = _mm_set_epi64x(0xb58777ce887fd851, 0x15d1ec8b73ac8450);
355        let r = _mm_sha256msg2_epu32(a, b);
356        assert_eq_m128i(r, expected);
357    }
358
359    #[simd_test(enable = "sha")]
360    #[allow(overflowing_literals)]
361    unsafe fn test_mm_sha256rnds2_epu32() {
362        let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
363        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
364        let k = _mm_set_epi64x(0, 0x12835b01d807aa98);
365        let expected = _mm_set_epi64x(0xd3063037effb15ea, 0x187ee3db0d6d1d19);
366        let r = _mm_sha256rnds2_epu32(a, b, k);
367        assert_eq_m128i(r, expected);
368    }
369
370    static DATA_64: [u64; 10] = [
371        0x0011223344556677,
372        0x8899aabbccddeeff,
373        0xffeeddccbbaa9988,
374        0x7766554433221100,
375        0x0123456789abcdef,
376        0xfedcba9876543210,
377        0x02468ace13579bdf,
378        0xfdb97531eca86420,
379        0x048c159d26ae37bf,
380        0xfb73ea62d951c840,
381    ];
382
383    #[simd_test(enable = "sha512,avx")]
384    unsafe fn test_mm256_sha512msg1_epi64() {
385        fn s0(word: u64) -> u64 {
386            word.rotate_right(1) ^ word.rotate_right(8) ^ (word >> 7)
387        }
388
389        let A = &DATA_64[0..4];
390        let B = &DATA_64[4..6];
391
392        let a = _mm256_loadu_si256(A.as_ptr().cast());
393        let b = _mm_loadu_si128(B.as_ptr().cast());
394
395        let r = _mm256_sha512msg1_epi64(a, b);
396
397        let e = _mm256_setr_epi64x(
398            A[0].wrapping_add(s0(A[1])) as _,
399            A[1].wrapping_add(s0(A[2])) as _,
400            A[2].wrapping_add(s0(A[3])) as _,
401            A[3].wrapping_add(s0(B[0])) as _,
402        );
403
404        assert_eq_m256i(r, e);
405    }
406
407    #[simd_test(enable = "sha512,avx")]
408    unsafe fn test_mm256_sha512msg2_epi64() {
409        fn s1(word: u64) -> u64 {
410            word.rotate_right(19) ^ word.rotate_right(61) ^ (word >> 6)
411        }
412
413        let A = &DATA_64[0..4];
414        let B = &DATA_64[4..8];
415
416        let a = _mm256_loadu_si256(A.as_ptr().cast());
417        let b = _mm256_loadu_si256(B.as_ptr().cast());
418
419        let r = _mm256_sha512msg2_epi64(a, b);
420
421        let e0 = A[0].wrapping_add(s1(B[2]));
422        let e1 = A[1].wrapping_add(s1(B[3]));
423        let e = _mm256_setr_epi64x(
424            e0 as _,
425            e1 as _,
426            A[2].wrapping_add(s1(e0)) as _,
427            A[3].wrapping_add(s1(e1)) as _,
428        );
429
430        assert_eq_m256i(r, e);
431    }
432
433    #[simd_test(enable = "sha512,avx")]
434    unsafe fn test_mm256_sha512rnds2_epi64() {
435        fn cap_sigma0(word: u64) -> u64 {
436            word.rotate_right(28) ^ word.rotate_right(34) ^ word.rotate_right(39)
437        }
438
439        fn cap_sigma1(word: u64) -> u64 {
440            word.rotate_right(14) ^ word.rotate_right(18) ^ word.rotate_right(41)
441        }
442
443        fn maj(a: u64, b: u64, c: u64) -> u64 {
444            (a & b) ^ (a & c) ^ (b & c)
445        }
446
447        fn ch(e: u64, f: u64, g: u64) -> u64 {
448            (e & f) ^ (g & !e)
449        }
450
451        let A = &DATA_64[0..4];
452        let B = &DATA_64[4..8];
453        let K = &DATA_64[8..10];
454
455        let a = _mm256_loadu_si256(A.as_ptr().cast());
456        let b = _mm256_loadu_si256(B.as_ptr().cast());
457        let k = _mm_loadu_si128(K.as_ptr().cast());
458
459        let r = _mm256_sha512rnds2_epi64(a, b, k);
460
461        let mut array = [B[3], B[2], A[3], A[2], B[1], B[0], A[1], A[0]];
462        for i in 0..2 {
463            let new_d = ch(array[4], array[5], array[6])
464                .wrapping_add(cap_sigma1(array[4]))
465                .wrapping_add(K[i])
466                .wrapping_add(array[7]);
467            array[7] = new_d
468                .wrapping_add(maj(array[0], array[1], array[2]))
469                .wrapping_add(cap_sigma0(array[0]));
470            array[3] = new_d.wrapping_add(array[3]);
471            array.rotate_right(1);
472        }
473        let e = _mm256_setr_epi64x(array[5] as _, array[4] as _, array[1] as _, array[0] as _);
474
475        assert_eq_m256i(r, e);
476    }
477
478    static DATA_32: [u32; 16] = [
479        0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff, 0xffeeddcc, 0xbbaa9988, 0x77665544,
480        0x33221100, 0x01234567, 0x89abcdef, 0xfedcba98, 0x76543210, 0x02468ace, 0x13579bdf,
481        0xfdb97531, 0xeca86420,
482    ];
483
484    #[simd_test(enable = "sm3,avx")]
485    unsafe fn test_mm_sm3msg1_epi32() {
486        fn p1(x: u32) -> u32 {
487            x ^ x.rotate_left(15) ^ x.rotate_left(23)
488        }
489        let A = &DATA_32[0..4];
490        let B = &DATA_32[4..8];
491        let C = &DATA_32[8..12];
492
493        let a = _mm_loadu_si128(A.as_ptr().cast());
494        let b = _mm_loadu_si128(B.as_ptr().cast());
495        let c = _mm_loadu_si128(C.as_ptr().cast());
496
497        let r = _mm_sm3msg1_epi32(a, b, c);
498
499        let e = _mm_setr_epi32(
500            p1(A[0] ^ C[0] ^ B[0].rotate_left(15)) as _,
501            p1(A[1] ^ C[1] ^ B[1].rotate_left(15)) as _,
502            p1(A[2] ^ C[2] ^ B[2].rotate_left(15)) as _,
503            p1(A[3] ^ C[3]) as _,
504        );
505
506        assert_eq_m128i(r, e);
507    }
508
509    #[simd_test(enable = "sm3,avx")]
510    unsafe fn test_mm_sm3msg2_epi32() {
511        let A = &DATA_32[0..4];
512        let B = &DATA_32[4..8];
513        let C = &DATA_32[8..12];
514
515        let a = _mm_loadu_si128(A.as_ptr().cast());
516        let b = _mm_loadu_si128(B.as_ptr().cast());
517        let c = _mm_loadu_si128(C.as_ptr().cast());
518
519        let r = _mm_sm3msg2_epi32(a, b, c);
520
521        let e0 = B[0].rotate_left(7) ^ C[0] ^ A[0];
522        let e = _mm_setr_epi32(
523            e0 as _,
524            (B[1].rotate_left(7) ^ C[1] ^ A[1]) as _,
525            (B[2].rotate_left(7) ^ C[2] ^ A[2]) as _,
526            (B[3].rotate_left(7)
527                ^ C[3]
528                ^ A[3]
529                ^ e0.rotate_left(6)
530                ^ e0.rotate_left(15)
531                ^ e0.rotate_left(30)) as _,
532        );
533
534        assert_eq_m128i(r, e);
535    }
536
537    #[simd_test(enable = "sm3,avx")]
538    unsafe fn test_mm_sm3rnds2_epi32() {
539        fn p0(x: u32) -> u32 {
540            x ^ x.rotate_left(9) ^ x.rotate_left(17)
541        }
542        fn ff(x: u32, y: u32, z: u32, round: u32) -> u32 {
543            if round < 16 {
544                x ^ y ^ z
545            } else {
546                (x & y) | (x & z) | (y & z)
547            }
548        }
549        fn gg(x: u32, y: u32, z: u32, round: u32) -> u32 {
550            if round < 16 {
551                x ^ y ^ z
552            } else {
553                (x & y) | (!x & z)
554            }
555        }
556
557        const ROUND: u32 = 30;
558
559        let A = &DATA_32[0..4];
560        let B = &DATA_32[4..8];
561        let C = &DATA_32[8..12];
562
563        let a = _mm_loadu_si128(A.as_ptr().cast());
564        let b = _mm_loadu_si128(B.as_ptr().cast());
565        let c = _mm_loadu_si128(C.as_ptr().cast());
566
567        let r = _mm_sm3rnds2_epi32::<{ ROUND as i32 }>(a, b, c);
568
569        let CONST: u32 = if ROUND < 16 { 0x79cc4519 } else { 0x7a879d8a };
570
571        let mut array = [
572            B[3],
573            B[2],
574            A[3].rotate_left(9),
575            A[2].rotate_left(9),
576            B[1],
577            B[0],
578            A[1].rotate_left(19),
579            A[0].rotate_left(19),
580        ];
581
582        for i in 0..2 {
583            let s1 = array[0]
584                .rotate_left(12)
585                .wrapping_add(array[4])
586                .wrapping_add(CONST.rotate_left(ROUND as u32 + i as u32))
587                .rotate_left(7);
588            let s2 = s1 ^ array[0].rotate_left(12);
589
590            let t1 = ff(array[0], array[1], array[2], ROUND)
591                .wrapping_add(array[3])
592                .wrapping_add(s2)
593                .wrapping_add(C[i] ^ C[i + 2]);
594            let t2 = gg(array[4], array[5], array[6], ROUND)
595                .wrapping_add(array[7])
596                .wrapping_add(s1)
597                .wrapping_add(C[i]);
598
599            array[3] = array[2];
600            array[2] = array[1].rotate_left(9);
601            array[1] = array[0];
602            array[0] = t1;
603            array[7] = array[6];
604            array[6] = array[5].rotate_left(19);
605            array[5] = array[4];
606            array[4] = p0(t2);
607        }
608
609        let e = _mm_setr_epi32(array[5] as _, array[4] as _, array[1] as _, array[0] as _);
610
611        assert_eq_m128i(r, e);
612    }
613
614    fn lower_t(x: u32) -> u32 {
615        static SBOX: [u8; 256] = [
616            0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB,
617            0x2C, 0x05, 0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26,
618            0x49, 0x86, 0x06, 0x99, 0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54,
619            0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, 0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95,
620            0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, 0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73,
621            0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, 0x68, 0x6B, 0x81, 0xB2,
622            0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, 0x1E, 0x24,
623            0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87,
624            0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4,
625            0xC8, 0x9E, 0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE,
626            0xF9, 0x61, 0x15, 0xA1, 0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93,
627            0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, 0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60,
628            0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, 0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD,
629            0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, 0x8D, 0x1B, 0xAF, 0x92,
630            0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, 0x0A, 0xC1,
631            0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0,
632            0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E,
633            0xC6, 0x84, 0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E,
634            0xD7, 0xCB, 0x39, 0x48,
635        ];
636
637        ((SBOX[(x >> 24) as usize] as u32) << 24)
638            | ((SBOX[((x >> 16) & 0xff) as usize] as u32) << 16)
639            | ((SBOX[((x >> 8) & 0xff) as usize] as u32) << 8)
640            | (SBOX[(x & 0xff) as usize] as u32)
641    }
642
643    #[simd_test(enable = "sm4,avx")]
644    unsafe fn test_mm_sm4key4_epi32() {
645        fn l_key(x: u32) -> u32 {
646            x ^ x.rotate_left(13) ^ x.rotate_left(23)
647        }
648        fn f_key(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
649            x0 ^ l_key(lower_t(x1 ^ x2 ^ x3 ^ rk))
650        }
651
652        let A = &DATA_32[0..4];
653        let B = &DATA_32[4..8];
654
655        let a = _mm_loadu_si128(A.as_ptr().cast());
656        let b = _mm_loadu_si128(B.as_ptr().cast());
657
658        let r = _mm_sm4key4_epi32(a, b);
659
660        let e0 = f_key(A[0], A[1], A[2], A[3], B[0]);
661        let e1 = f_key(A[1], A[2], A[3], e0, B[1]);
662        let e2 = f_key(A[2], A[3], e0, e1, B[2]);
663        let e3 = f_key(A[3], e0, e1, e2, B[3]);
664        let e = _mm_setr_epi32(e0 as _, e1 as _, e2 as _, e3 as _);
665
666        assert_eq_m128i(r, e);
667    }
668
669    #[simd_test(enable = "sm4,avx")]
670    unsafe fn test_mm256_sm4key4_epi32() {
671        let a_low = _mm_loadu_si128(DATA_32.as_ptr().cast());
672        let a_high = _mm_loadu_si128(DATA_32[4..].as_ptr().cast());
673        let b_low = _mm_loadu_si128(DATA_32[8..].as_ptr().cast());
674        let b_high = _mm_loadu_si128(DATA_32[12..].as_ptr().cast());
675
676        let a = _mm256_set_m128i(a_high, a_low);
677        let b = _mm256_set_m128i(b_high, b_low);
678
679        let r = _mm256_sm4key4_epi32(a, b);
680
681        let e_low = _mm_sm4key4_epi32(a_low, b_low);
682        let e_high = _mm_sm4key4_epi32(a_high, b_high);
683        let e = _mm256_set_m128i(e_high, e_low);
684
685        assert_eq_m256i(r, e);
686    }
687
688    #[simd_test(enable = "sm4,avx")]
689    unsafe fn test_mm_sm4rnds4_epi32() {
690        fn l_rnd(x: u32) -> u32 {
691            x ^ x.rotate_left(2) ^ x.rotate_left(10) ^ x.rotate_left(18) ^ x.rotate_left(24)
692        }
693        fn f_rnd(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
694            x0 ^ l_rnd(lower_t(x1 ^ x2 ^ x3 ^ rk))
695        }
696
697        let A = &DATA_32[0..4];
698        let B = &DATA_32[4..8];
699
700        let a = _mm_loadu_si128(A.as_ptr().cast());
701        let b = _mm_loadu_si128(B.as_ptr().cast());
702
703        let r = _mm_sm4rnds4_epi32(a, b);
704
705        let e0 = f_rnd(A[0], A[1], A[2], A[3], B[0]);
706        let e1 = f_rnd(A[1], A[2], A[3], e0, B[1]);
707        let e2 = f_rnd(A[2], A[3], e0, e1, B[2]);
708        let e3 = f_rnd(A[3], e0, e1, e2, B[3]);
709        let e = _mm_setr_epi32(e0 as _, e1 as _, e2 as _, e3 as _);
710
711        assert_eq_m128i(r, e);
712    }
713
714    #[simd_test(enable = "sm4,avx")]
715    unsafe fn test_mm256_sm4rnds4_epi32() {
716        let a_low = _mm_loadu_si128(DATA_32.as_ptr().cast());
717        let a_high = _mm_loadu_si128(DATA_32[4..].as_ptr().cast());
718        let b_low = _mm_loadu_si128(DATA_32[8..].as_ptr().cast());
719        let b_high = _mm_loadu_si128(DATA_32[12..].as_ptr().cast());
720
721        let a = _mm256_set_m128i(a_high, a_low);
722        let b = _mm256_set_m128i(b_high, b_low);
723
724        let r = _mm256_sm4rnds4_epi32(a, b);
725
726        let e_low = _mm_sm4rnds4_epi32(a_low, b_low);
727        let e_high = _mm_sm4rnds4_epi32(a_high, b_high);
728        let e = _mm256_set_m128i(e_high, e_low);
729
730        assert_eq_m256i(r, e);
731    }
732}