Skip to content

Commit a129b69

Browse files
committed
Implement AVX512-VP2INTERSECT intrinsics
1 parent bc46000 commit a129b69

3 files changed

Lines changed: 206 additions & 16 deletions

File tree

crates/core_arch/missing-x86.md

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -44,22 +44,6 @@
4444
</p></details>
4545

4646

47-
<details><summary>["AVX512_VP2INTERSECT", "AVX512F"]</summary><p>
48-
49-
* [ ] [`_mm512_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi32)
50-
* [ ] [`_mm512_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi64)
51-
</p></details>
52-
53-
54-
<details><summary>["AVX512_VP2INTERSECT", "AVX512VL"]</summary><p>
55-
56-
* [ ] [`_mm256_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi32)
57-
* [ ] [`_mm256_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi64)
58-
* [ ] [`_mm_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi32)
59-
* [ ] [`_mm_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi64)
60-
</p></details>
61-
62-
6347
<details><summary>["CET_SS"]</summary><p>
6448

6549
* [ ] [`_clrssbsy`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_clrssbsy)
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
//! Vector Pair Intersection to a Pair of Mask Registers (VP2INTERSECT)
2+
3+
use crate::core_arch::{simd::*, x86::*};
4+
5+
#[cfg(test)]
6+
use stdarch_test::assert_instr;
7+
8+
/// Compute intersection of packed 32-bit integer vectors a and b,
9+
/// and store indication of match in the corresponding bit of two mask registers
10+
/// specified by k1 and k2. A match in corresponding elements of a and b is
11+
/// indicated by a set bit in the corresponding bit of the mask registers.
12+
///
13+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi32&expand=0)
14+
#[inline]
15+
#[target_feature(enable = "avx512vp2intersect,avx512vl")]
16+
#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
17+
#[cfg_attr(test, assert_instr(vp2intersectd))]
18+
pub unsafe fn _mm_2intersect_epi32(a: __m128i, b: __m128i, k1: *mut __mmask8, k2: *mut __mmask8) {
19+
(*k1, *k2) = vp2intersectd_128(a.as_i32x4(), b.as_i32x4());
20+
}
21+
22+
/// Compute intersection of packed 64-bit integer vectors a and b,
23+
/// and store indication of match in the corresponding bit of two mask registers
24+
/// specified by k1 and k2. A match in corresponding elements of a and b is
25+
/// indicated by a set bit in the corresponding bit of the mask registers.
26+
///
27+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi64&expand=0)
28+
#[inline]
29+
#[target_feature(enable = "avx512vp2intersect,avx512vl")]
30+
#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
31+
#[cfg_attr(test, assert_instr(vp2intersectq))]
32+
pub unsafe fn _mm_2intersect_epi64(a: __m128i, b: __m128i, k1: *mut __mmask8, k2: *mut __mmask8) {
33+
(*k1, *k2) = vp2intersectq_128(a.as_i64x2(), b.as_i64x2());
34+
}
35+
36+
/// Compute intersection of packed 32-bit integer vectors a and b,
37+
/// and store indication of match in the corresponding bit of two mask registers
38+
/// specified by k1 and k2. A match in corresponding elements of a and b is
39+
/// indicated by a set bit in the corresponding bit of the mask registers.
40+
///
41+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi32&expand=0)
42+
#[inline]
43+
#[target_feature(enable = "avx512vp2intersect,avx512vl")]
44+
#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
45+
#[cfg_attr(test, assert_instr(vp2intersectd))]
46+
pub unsafe fn _mm256_2intersect_epi32(
47+
a: __m256i,
48+
b: __m256i,
49+
k1: *mut __mmask8,
50+
k2: *mut __mmask8,
51+
) {
52+
(*k1, *k2) = vp2intersectd_256(a.as_i32x8(), b.as_i32x8());
53+
}
54+
55+
/// Compute intersection of packed 64-bit integer vectors a and b,
56+
/// and store indication of match in the corresponding bit of two mask registers
57+
/// specified by k1 and k2. A match in corresponding elements of a and b is
58+
/// indicated by a set bit in the corresponding bit of the mask registers.
59+
///
60+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi64&expand=0)
61+
#[inline]
62+
#[target_feature(enable = "avx512vp2intersect,avx512vl")]
63+
#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
64+
#[cfg_attr(test, assert_instr(vp2intersectq))]
65+
pub unsafe fn _mm256_2intersect_epi64(
66+
a: __m256i,
67+
b: __m256i,
68+
k1: *mut __mmask8,
69+
k2: *mut __mmask8,
70+
) {
71+
(*k1, *k2) = vp2intersectq_256(a.as_i64x4(), b.as_i64x4());
72+
}
73+
74+
/// Compute intersection of packed 32-bit integer vectors a and b,
75+
/// and store indication of match in the corresponding bit of two mask registers
76+
/// specified by k1 and k2. A match in corresponding elements of a and b is
77+
/// indicated by a set bit in the corresponding bit of the mask registers.
78+
///
79+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi32&expand=0)
80+
#[inline]
81+
#[target_feature(enable = "avx512vp2intersect,avx512f")]
82+
#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
83+
#[cfg_attr(test, assert_instr(vp2intersectd))]
84+
pub unsafe fn _mm512_2intersect_epi32(
85+
a: __m512i,
86+
b: __m512i,
87+
k1: *mut __mmask16,
88+
k2: *mut __mmask16,
89+
) {
90+
(*k1, *k2) = vp2intersectd_512(a.as_i32x16(), b.as_i32x16());
91+
}
92+
93+
/// Compute intersection of packed 64-bit integer vectors a and b,
94+
/// and store indication of match in the corresponding bit of two mask registers
95+
/// specified by k1 and k2. A match in corresponding elements of a and b is
96+
/// indicated by a set bit in the corresponding bit of the mask registers.
97+
///
98+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi64&expand=0)
99+
#[inline]
100+
#[target_feature(enable = "avx512vp2intersect,avx512f")]
101+
#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
102+
#[cfg_attr(test, assert_instr(vp2intersectq))]
103+
pub unsafe fn _mm512_2intersect_epi64(
104+
a: __m512i,
105+
b: __m512i,
106+
k1: *mut __mmask8,
107+
k2: *mut __mmask8,
108+
) {
109+
(*k1, *k2) = vp2intersectq_512(a.as_i64x8(), b.as_i64x8());
110+
}
111+
112+
#[allow(improper_ctypes)]
113+
unsafe extern "C" {
114+
#[link_name = "llvm.x86.avx512.vp2intersect.d.128"]
115+
fn vp2intersectd_128(a: i32x4, b: i32x4) -> (u8, u8);
116+
#[link_name = "llvm.x86.avx512.vp2intersect.q.128"]
117+
fn vp2intersectq_128(a: i64x2, b: i64x2) -> (u8, u8);
118+
119+
#[link_name = "llvm.x86.avx512.vp2intersect.d.256"]
120+
fn vp2intersectd_256(a: i32x8, b: i32x8) -> (u8, u8);
121+
#[link_name = "llvm.x86.avx512.vp2intersect.q.256"]
122+
fn vp2intersectq_256(a: i64x4, b: i64x4) -> (u8, u8);
123+
124+
#[link_name = "llvm.x86.avx512.vp2intersect.d.512"]
125+
fn vp2intersectd_512(a: i32x16, b: i32x16) -> (u16, u16);
126+
#[link_name = "llvm.x86.avx512.vp2intersect.q.512"]
127+
fn vp2intersectq_512(a: i64x8, b: i64x8) -> (u8, u8);
128+
}
129+
130+
#[cfg(test)]
131+
mod tests {
132+
use crate::core_arch::x86::*;
133+
use stdarch_test::simd_test;
134+
135+
#[simd_test(enable = "avx512vp2intersect,avx512vl")]
136+
unsafe fn test_mm_2intersect_epi32() {
137+
let a = _mm_set_epi32(1, 2, 3, 4);
138+
let b = _mm_set_epi32(3, 4, 5, 6);
139+
let mut k1 = 0;
140+
let mut k2 = 0;
141+
_mm_2intersect_epi32(a, b, &mut k1, &mut k2);
142+
assert_eq!(k1, 0b0011);
143+
assert_eq!(k2, 0b1100);
144+
}
145+
146+
#[simd_test(enable = "avx512vp2intersect,avx512vl")]
147+
unsafe fn test_mm_2intersect_epi64() {
148+
let a = _mm_set_epi64x(1, 2);
149+
let b = _mm_set_epi64x(2, 3);
150+
let mut k1 = 0;
151+
let mut k2 = 0;
152+
_mm_2intersect_epi64(a, b, &mut k1, &mut k2);
153+
assert_eq!(k1, 0b01);
154+
assert_eq!(k2, 0b10);
155+
}
156+
157+
#[simd_test(enable = "avx512vp2intersect,avx512vl")]
158+
unsafe fn test_mm256_2intersect_epi32() {
159+
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
160+
let b = _mm256_set_epi32(5, 6, 7, 8, 9, 10, 11, 12);
161+
let mut k1 = 0;
162+
let mut k2 = 0;
163+
_mm256_2intersect_epi32(a, b, &mut k1, &mut k2);
164+
assert_eq!(k1, 0b00001111);
165+
assert_eq!(k2, 0b11110000);
166+
}
167+
168+
#[simd_test(enable = "avx512vp2intersect,avx512vl")]
169+
unsafe fn test_mm256_2intersect_epi64() {
170+
let a = _mm256_set_epi64x(1, 2, 3, 4);
171+
let b = _mm256_set_epi64x(3, 4, 5, 6);
172+
let mut k1 = 0;
173+
let mut k2 = 0;
174+
_mm256_2intersect_epi64(a, b, &mut k1, &mut k2);
175+
assert_eq!(k1, 0b0011);
176+
assert_eq!(k2, 0b1100);
177+
}
178+
179+
#[simd_test(enable = "avx512vp2intersect,avx512f")]
180+
unsafe fn test_mm512_2intersect_epi32() {
181+
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
182+
let b = _mm512_set_epi32(
183+
9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
184+
);
185+
let mut k1 = 0;
186+
let mut k2 = 0;
187+
_mm512_2intersect_epi32(a, b, &mut k1, &mut k2);
188+
assert_eq!(k1, 0b0000000011111111);
189+
assert_eq!(k2, 0b1111111100000000);
190+
}
191+
192+
#[simd_test(enable = "avx512vp2intersect,avx512f")]
193+
unsafe fn test_mm512_2intersect_epi64() {
194+
let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
195+
let b = _mm512_set_epi64(5, 6, 7, 8, 9, 10, 11, 12);
196+
let mut k1 = 0;
197+
let mut k2 = 0;
198+
_mm512_2intersect_epi64(a, b, &mut k1, &mut k2);
199+
assert_eq!(k1, 0b00001111);
200+
assert_eq!(k2, 0b11110000);
201+
}
202+
}

crates/core_arch/src/x86/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,3 +778,7 @@ pub use self::kl::*;
778778
mod movrs;
779779
#[unstable(feature = "movrs_target_feature", issue = "137976")]
780780
pub use self::movrs::*;
781+
782+
mod avx512vp2intersect;
783+
#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
784+
pub use self::avx512vp2intersect::*;

0 commit comments

Comments
 (0)