Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ jobs:
name: Test
runs-on: ${{ matrix.target.os }}
strategy:
fail-fast: false
matrix:
profile:
- dev
Expand Down
32 changes: 16 additions & 16 deletions crates/core_arch/src/aarch64/sve/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9799,7 +9799,7 @@ pub fn svdupq_n_f32(x0: f32, x1: f32, x2: f32, x3: f32) -> svfloat32_t {
unsafe extern "unadjusted" {
#[cfg_attr(
target_arch = "aarch64",
link_name = "llvm.experimental.vector.insert.nxv4f32.v4f32"
link_name = "llvm.vector.insert.nxv4f32.v4f32"
)]
fn _svdupq_n_f32(op0: svfloat32_t, op1: float32x4_t, idx: i64) -> svfloat32_t;
}
Expand All @@ -9817,7 +9817,7 @@ pub fn svdupq_n_s32(x0: i32, x1: i32, x2: i32, x3: i32) -> svint32_t {
unsafe extern "unadjusted" {
#[cfg_attr(
target_arch = "aarch64",
link_name = "llvm.experimental.vector.insert.nxv4i32.v4i32"
link_name = "llvm.vector.insert.nxv4i32.v4i32"
)]
fn _svdupq_n_s32(op0: svint32_t, op1: int32x4_t, idx: i64) -> svint32_t;
}
Expand Down Expand Up @@ -9851,7 +9851,7 @@ pub fn svdupq_n_f64(x0: f64, x1: f64) -> svfloat64_t {
unsafe extern "unadjusted" {
#[cfg_attr(
target_arch = "aarch64",
link_name = "llvm.experimental.vector.insert.nxv2f64.v2f64"
link_name = "llvm.vector.insert.nxv2f64.v2f64"
)]
fn _svdupq_n_f64(op0: svfloat64_t, op1: float64x2_t, idx: i64) -> svfloat64_t;
}
Expand All @@ -9869,7 +9869,7 @@ pub fn svdupq_n_s64(x0: i64, x1: i64) -> svint64_t {
unsafe extern "unadjusted" {
#[cfg_attr(
target_arch = "aarch64",
link_name = "llvm.experimental.vector.insert.nxv2i64.v2i64"
link_name = "llvm.vector.insert.nxv2i64.v2i64"
)]
fn _svdupq_n_s64(op0: svint64_t, op1: int64x2_t, idx: i64) -> svint64_t;
}
Expand Down Expand Up @@ -9904,7 +9904,7 @@ pub fn svdupq_n_s16(
unsafe extern "unadjusted" {
#[cfg_attr(
target_arch = "aarch64",
link_name = "llvm.experimental.vector.insert.nxv8i16.v8i16"
link_name = "llvm.vector.insert.nxv8i16.v8i16"
)]
fn _svdupq_n_s16(op0: svint16_t, op1: int16x8_t, idx: i64) -> svint16_t;
}
Expand Down Expand Up @@ -9972,7 +9972,7 @@ pub fn svdupq_n_s8(
unsafe extern "unadjusted" {
#[cfg_attr(
target_arch = "aarch64",
link_name = "llvm.experimental.vector.insert.nxv16i8.v16i8"
link_name = "llvm.vector.insert.nxv16i8.v16i8"
)]
fn _svdupq_n_s8(op0: svint8_t, op1: int8x16_t, idx: i64) -> svint8_t;
}
Expand Down Expand Up @@ -35208,7 +35208,7 @@ pub fn svreinterpret_u64_u64(op: svuint64_t) -> svuint64_t {
#[cfg_attr(test, assert_instr(rev))]
pub fn svrev_b8(op: svbool_t) -> svbool_t {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv16i1")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv16i1")]
fn _svrev_b8(op: svbool_t) -> svbool_t;
}
unsafe { _svrev_b8(op) }
Expand All @@ -35221,7 +35221,7 @@ pub fn svrev_b8(op: svbool_t) -> svbool_t {
#[cfg_attr(test, assert_instr(rev))]
pub fn svrev_b16(op: svbool_t) -> svbool_t {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv8i1")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv8i1")]
fn _svrev_b16(op: svbool8_t) -> svbool8_t;
}
unsafe { _svrev_b16(op.sve_into()).sve_into() }
Expand All @@ -35234,7 +35234,7 @@ pub fn svrev_b16(op: svbool_t) -> svbool_t {
#[cfg_attr(test, assert_instr(rev))]
pub fn svrev_b32(op: svbool_t) -> svbool_t {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv4i1")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv4i1")]
fn _svrev_b32(op: svbool4_t) -> svbool4_t;
}
unsafe { _svrev_b32(op.sve_into()).sve_into() }
Expand All @@ -35247,7 +35247,7 @@ pub fn svrev_b32(op: svbool_t) -> svbool_t {
#[cfg_attr(test, assert_instr(rev))]
pub fn svrev_b64(op: svbool_t) -> svbool_t {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv2i1")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv2i1")]
fn _svrev_b64(op: svbool2_t) -> svbool2_t;
}
unsafe { _svrev_b64(op.sve_into()).sve_into() }
Expand All @@ -35260,7 +35260,7 @@ pub fn svrev_b64(op: svbool_t) -> svbool_t {
#[cfg_attr(test, assert_instr(rev))]
pub fn svrev_f32(op: svfloat32_t) -> svfloat32_t {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv4f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv4f32")]
fn _svrev_f32(op: svfloat32_t) -> svfloat32_t;
}
unsafe { _svrev_f32(op) }
Expand All @@ -35273,7 +35273,7 @@ pub fn svrev_f32(op: svfloat32_t) -> svfloat32_t {
#[cfg_attr(test, assert_instr(rev))]
pub fn svrev_f64(op: svfloat64_t) -> svfloat64_t {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv2f64")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv2f64")]
fn _svrev_f64(op: svfloat64_t) -> svfloat64_t;
}
unsafe { _svrev_f64(op) }
Expand All @@ -35286,7 +35286,7 @@ pub fn svrev_f64(op: svfloat64_t) -> svfloat64_t {
#[cfg_attr(test, assert_instr(rev))]
pub fn svrev_s8(op: svint8_t) -> svint8_t {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv16i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv16i8")]
fn _svrev_s8(op: svint8_t) -> svint8_t;
}
unsafe { _svrev_s8(op) }
Expand All @@ -35299,7 +35299,7 @@ pub fn svrev_s8(op: svint8_t) -> svint8_t {
#[cfg_attr(test, assert_instr(rev))]
pub fn svrev_s16(op: svint16_t) -> svint16_t {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv8i16")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv8i16")]
fn _svrev_s16(op: svint16_t) -> svint16_t;
}
unsafe { _svrev_s16(op) }
Expand All @@ -35312,7 +35312,7 @@ pub fn svrev_s16(op: svint16_t) -> svint16_t {
#[cfg_attr(test, assert_instr(rev))]
pub fn svrev_s32(op: svint32_t) -> svint32_t {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv4i32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv4i32")]
fn _svrev_s32(op: svint32_t) -> svint32_t;
}
unsafe { _svrev_s32(op) }
Expand All @@ -35325,7 +35325,7 @@ pub fn svrev_s32(op: svint32_t) -> svint32_t {
#[cfg_attr(test, assert_instr(rev))]
pub fn svrev_s64(op: svint64_t) -> svint64_t {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv2i64")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv2i64")]
fn _svrev_s64(op: svint64_t) -> svint64_t;
}
unsafe { _svrev_s64(op) }
Expand Down
1 change: 1 addition & 0 deletions crates/core_arch/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
stdarch_powerpc_feature_detection,
)
)]
#![warn(deprecated_llvm_intrinsic)]

#[cfg(test)]
#[macro_use]
Expand Down
38 changes: 5 additions & 33 deletions crates/core_arch/src/x86/avx512bf16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
//!
//! [AVX512BF16 intrinsics]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769&avx512techs=AVX512_BF16

use crate::arch::asm;
use crate::core_arch::{simd::*, x86::*};
use crate::intrinsics::simd::*;

Expand All @@ -17,6 +16,8 @@ unsafe extern "C" {
fn cvtne2ps2bf16_256(a: f32x8, b: f32x8) -> i16x16;
#[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.512"]
fn cvtne2ps2bf16_512(a: f32x16, b: f32x16) -> i16x32;
#[link_name = "llvm.x86.avx512bf16.mask.cvtneps2bf16.128"]
fn cvtneps2bf16_128(a: f32x4, src: i16x8, k: __mmask8) -> i16x8;
#[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.256"]
fn cvtneps2bf16_256(a: f32x8) -> i16x8;
#[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.512"]
Expand Down Expand Up @@ -519,16 +520,7 @@ pub fn _mm_cvtsbh_ss(a: bf16) -> f32 {
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
unsafe {
let mut dst: __m128bh;
asm!(
"vcvtneps2bf16 {dst}, {src}",
dst = lateout(xmm_reg) dst,
src = in(xmm_reg) a,
options(pure, nomem, nostack, preserves_flags)
);
dst
}
_mm_mask_cvtneps_pbh(__m128bh::splat(0), !0, a)
}

/// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
Expand All @@ -541,17 +533,7 @@ pub fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh {
unsafe {
let mut dst = src;
asm!(
"vcvtneps2bf16 {dst}{{{k}}},{src}",
dst = inlateout(xmm_reg) dst,
src = in(xmm_reg) a,
k = in(kreg) k,
options(pure, nomem, nostack, preserves_flags)
);
dst
}
unsafe { cvtneps2bf16_128(a.as_f32x4(), src.as_i16x8(), k).as_m128bh() }
}

/// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
Expand All @@ -564,17 +546,7 @@ pub fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh {
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh {
unsafe {
let mut dst: __m128bh;
asm!(
"vcvtneps2bf16 {dst}{{{k}}}{{z}},{src}",
dst = lateout(xmm_reg) dst,
src = in(xmm_reg) a,
k = in(kreg) k,
options(pure, nomem, nostack, preserves_flags)
);
dst
}
_mm_mask_cvtneps_pbh(__m128bh::splat(0), k, a)
}

/// Converts a single-precision (32-bit) floating-point element in a to a BF16 (16-bit) floating-point
Expand Down
24 changes: 12 additions & 12 deletions crates/core_arch/src/x86/avx512bitalg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ use stdarch_test::assert_instr;

#[allow(improper_ctypes)]
unsafe extern "C" {
#[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
#[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
fn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32;
#[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"]
fn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16;
#[link_name = "llvm.x86.avx512.vpshufbitqmb.512"]
fn bitshuffle_512(data: i8x64, indices: i8x64) -> __mmask64;
#[link_name = "llvm.x86.avx512.vpshufbitqmb.256"]
fn bitshuffle_256(data: i8x32, indices: i8x32) -> __mmask32;
#[link_name = "llvm.x86.avx512.vpshufbitqmb.128"]
fn bitshuffle_128(data: i8x16, indices: i8x16) -> __mmask16;
}

/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
Expand Down Expand Up @@ -370,7 +370,7 @@ pub const fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0) }
unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64()) }
}

/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
Expand All @@ -386,7 +386,7 @@ pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k) }
_mm512_bitshuffle_epi64_mask(b, c) & k
}

/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
Expand All @@ -399,7 +399,7 @@ pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0) }
unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32()) }
}

/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
Expand All @@ -415,7 +415,7 @@ pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k) }
_mm256_bitshuffle_epi64_mask(b, c) & k
}

/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
Expand All @@ -428,7 +428,7 @@ pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0) }
unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16()) }
}

/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
Expand All @@ -444,7 +444,7 @@ pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k) }
_mm_bitshuffle_epi64_mask(b, c) & k
}

#[cfg(test)]
Expand Down
Loading
Loading