blob: e636d6c8aaf8decb3bf3eb6b000d52bf0e31f3dd [file] [log] [blame]
fn main() {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
assert!(is_x86_feature_detected!("sse2"));
unsafe {
tests::test_sse2();
}
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod tests {
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::f64::NAN;
use std::mem::transmute;
#[target_feature(enable = "sse2")]
unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i {
_mm_set_epi64x(b, a)
}
#[target_feature(enable = "sse2")]
pub(super) unsafe fn test_sse2() {
// Mostly copied from library/stdarch/crates/core_arch/src/x86{,_64}/sse2.rs
unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i {
_mm_set_epi64x(b, a)
}
#[track_caller]
#[target_feature(enable = "sse")]
unsafe fn assert_eq_m128(a: __m128, b: __m128) {
let r = _mm_cmpeq_ps(a, b);
if _mm_movemask_ps(r) != 0b1111 {
panic!("{:?} != {:?}", a, b);
}
}
#[track_caller]
#[target_feature(enable = "sse2")]
unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
}
#[track_caller]
#[target_feature(enable = "sse2")]
unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
panic!("{:?} != {:?}", a, b);
}
}
#[target_feature(enable = "sse2")]
unsafe fn test_mm_avg_epu8() {
let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
let r = _mm_avg_epu8(a, b);
assert_eq_m128i(r, _mm_set1_epi8(6));
}
test_mm_avg_epu8();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_avg_epu16() {
let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
let r = _mm_avg_epu16(a, b);
assert_eq_m128i(r, _mm_set1_epi16(6));
}
test_mm_avg_epu16();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_madd_epi16() {
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm_madd_epi16(a, b);
let e = _mm_setr_epi32(29, 81, 149, 233);
assert_eq_m128i(r, e);
let a =
_mm_setr_epi16(i16::MAX, i16::MAX, i16::MIN, i16::MIN, i16::MIN, i16::MAX, 0, 0);
let b =
_mm_setr_epi16(i16::MAX, i16::MAX, i16::MIN, i16::MIN, i16::MAX, i16::MIN, 0, 0);
let r = _mm_madd_epi16(a, b);
let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
assert_eq_m128i(r, e);
}
test_mm_madd_epi16();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_mulhi_epi16() {
let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
let r = _mm_mulhi_epi16(a, b);
assert_eq_m128i(r, _mm_set1_epi16(-16));
}
test_mm_mulhi_epi16();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_mulhi_epu16() {
let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
let r = _mm_mulhi_epu16(a, b);
assert_eq_m128i(r, _mm_set1_epi16(15));
}
test_mm_mulhi_epu16();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_mul_epu32() {
let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
let r = _mm_mul_epu32(a, b);
let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
assert_eq_m128i(r, e);
}
test_mm_mul_epu32();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_sad_epu8() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
1, 2, 3, 4,
155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
1, 2, 3, 4,
);
let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
let r = _mm_sad_epu8(a, b);
let e = _mm_setr_epi64x(1020, 614);
assert_eq_m128i(r, e);
}
test_mm_sad_epu8();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_sll_epi16() {
let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
assert_eq_m128i(
r,
_mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
);
let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
assert_eq_m128i(r, a);
let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
assert_eq_m128i(r, _mm_set1_epi16(0));
let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
assert_eq_m128i(r, _mm_set1_epi16(0));
}
test_mm_sll_epi16();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_srl_epi16() {
let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
assert_eq_m128i(r, _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0));
let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
assert_eq_m128i(r, a);
let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
assert_eq_m128i(r, _mm_set1_epi16(0));
let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
assert_eq_m128i(r, _mm_set1_epi16(0));
}
test_mm_srl_epi16();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_sra_epi16() {
let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
assert_eq_m128i(r, _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10));
let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
assert_eq_m128i(r, a);
let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
}
test_mm_sra_epi16();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_sll_epi32() {
let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
assert_eq_m128i(r, a);
let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
assert_eq_m128i(r, _mm_set1_epi32(0));
let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
assert_eq_m128i(r, _mm_set1_epi32(0));
}
test_mm_sll_epi32();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_srl_epi32() {
let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
assert_eq_m128i(r, a);
let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
assert_eq_m128i(r, _mm_set1_epi32(0));
let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
assert_eq_m128i(r, _mm_set1_epi32(0));
}
test_mm_srl_epi32();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_sra_epi32() {
let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
assert_eq_m128i(r, a);
let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
}
test_mm_sra_epi32();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_sll_epi64() {
let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
assert_eq_m128i(r, a);
let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
assert_eq_m128i(r, _mm_set1_epi64x(0));
let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
assert_eq_m128i(r, _mm_set1_epi64x(0));
}
test_mm_sll_epi64();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_srl_epi64() {
let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
assert_eq_m128i(r, a);
let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
assert_eq_m128i(r, _mm_set1_epi64x(0));
let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
assert_eq_m128i(r, _mm_set1_epi64x(0));
}
test_mm_srl_epi64();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cvtepi32_ps() {
let a = _mm_setr_epi32(1, 2, 3, 4);
let r = _mm_cvtepi32_ps(a);
assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
}
test_mm_cvtepi32_ps();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cvtps_epi32() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let r = _mm_cvtps_epi32(a);
assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
}
test_mm_cvtps_epi32();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cvttps_epi32() {
let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
let r = _mm_cvttps_epi32(a);
assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
let r = _mm_cvttps_epi32(a);
assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
}
test_mm_cvttps_epi32();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_packs_epi16() {
let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
let r = _mm_packs_epi16(a, b);
assert_eq_m128i(
r,
_mm_setr_epi8(0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F),
);
}
test_mm_packs_epi16();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_packus_epi16() {
let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
let r = _mm_packus_epi16(a, b);
assert_eq_m128i(r, _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0));
}
test_mm_packus_epi16();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_packs_epi32() {
let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
let r = _mm_packs_epi32(a, b);
assert_eq_m128i(r, _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF));
}
test_mm_packs_epi32();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_min_sd() {
let a = _mm_setr_pd(1.0, 2.0);
let b = _mm_setr_pd(5.0, 10.0);
let r = _mm_min_sd(a, b);
assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
}
test_mm_min_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_min_pd() {
let a = _mm_setr_pd(-1.0, 5.0);
let b = _mm_setr_pd(-100.0, 20.0);
let r = _mm_min_pd(a, b);
assert_eq_m128d(r, _mm_setr_pd(-100.0, 5.0));
// `_mm_min_pd` can **not** be implemented using the `simd_min` rust intrinsic because
// the semantics of `simd_min` are different to those of `_mm_min_pd` regarding handling
// of `-0.0`.
let a = _mm_setr_pd(-0.0, 0.0);
let b = _mm_setr_pd(0.0, 0.0);
let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
let a: [u8; 16] = transmute(a);
let b: [u8; 16] = transmute(b);
assert_eq!(r1, b);
assert_eq!(r2, a);
assert_ne!(a, b); // sanity check that -0.0 is actually present
}
test_mm_min_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_max_sd() {
let a = _mm_setr_pd(1.0, 2.0);
let b = _mm_setr_pd(5.0, 10.0);
let r = _mm_max_sd(a, b);
assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
}
test_mm_max_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_max_pd() {
let a = _mm_setr_pd(-1.0, 5.0);
let b = _mm_setr_pd(-100.0, 20.0);
let r = _mm_max_pd(a, b);
assert_eq_m128d(r, _mm_setr_pd(-1.0, 20.0));
// `_mm_max_pd` can **not** be implemented using the `simd_max` rust intrinsic because
// the semantics of `simd_max` are different to those of `_mm_max_pd` regarding handling
// of `-0.0`.
let a = _mm_setr_pd(-0.0, 0.0);
let b = _mm_setr_pd(0.0, 0.0);
let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
let a: [u8; 16] = transmute(a);
let b: [u8; 16] = transmute(b);
assert_eq!(r1, b);
assert_eq!(r2, a);
assert_ne!(a, b); // sanity check that -0.0 is actually present
}
test_mm_max_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_sqrt_sd() {
let a = _mm_setr_pd(1.0, 2.0);
let b = _mm_setr_pd(5.0, 10.0);
let r = _mm_sqrt_sd(a, b);
assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
}
test_mm_sqrt_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_sqrt_pd() {
let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
}
test_mm_sqrt_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpeq_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpeq_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmplt_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmplt_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmple_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmple_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpgt_sd() {
let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpgt_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpge_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpge_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpord_sd() {
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
let e = _mm_setr_epi64x(0, transmute(2.0f64));
let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpord_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpunord_sd() {
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpunord_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpneq_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
let e = _mm_setr_epi64x(!0, transmute(2.0f64));
let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpneq_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpnlt_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
let e = _mm_setr_epi64x(0, transmute(2.0f64));
let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpnlt_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpnle_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(0, transmute(2.0f64));
let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpnle_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpngt_sd() {
let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(0, transmute(2.0f64));
let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpngt_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpnge_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(0, transmute(2.0f64));
let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpnge_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpeq_pd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(!0, 0);
let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpeq_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmplt_pd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(0, !0);
let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmplt_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmple_pd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(!0, !0);
let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmple_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpgt_pd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(0, 0);
let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpgt_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpge_pd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(!0, 0);
let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpge_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpord_pd() {
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
let e = _mm_setr_epi64x(0, !0);
let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpord_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpunord_pd() {
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
let e = _mm_setr_epi64x(!0, 0);
let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpunord_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpneq_pd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
let e = _mm_setr_epi64x(!0, !0);
let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpneq_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpnlt_pd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
let e = _mm_setr_epi64x(0, 0);
let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpnlt_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpnle_pd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(0, 0);
let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpnle_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpngt_pd() {
let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(0, !0);
let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpngt_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cmpnge_pd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
let e = _mm_setr_epi64x(0, !0);
let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
assert_eq_m128i(r, e);
}
test_mm_cmpnge_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_comieq_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
assert!(_mm_comieq_sd(a, b) != 0);
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
assert!(_mm_comieq_sd(a, b) == 0);
}
test_mm_comieq_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_comilt_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
assert!(_mm_comilt_sd(a, b) == 0);
}
test_mm_comilt_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_comile_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
assert!(_mm_comile_sd(a, b) != 0);
}
test_mm_comile_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_comigt_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
assert!(_mm_comigt_sd(a, b) == 0);
}
test_mm_comigt_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_comige_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
assert!(_mm_comige_sd(a, b) != 0);
}
test_mm_comige_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_comineq_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
assert!(_mm_comineq_sd(a, b) == 0);
}
test_mm_comineq_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_ucomieq_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
assert!(_mm_ucomieq_sd(a, b) != 0);
let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
assert!(_mm_ucomieq_sd(a, b) == 0);
}
test_mm_ucomieq_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_ucomilt_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
assert!(_mm_ucomilt_sd(a, b) == 0);
}
test_mm_ucomilt_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_ucomile_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
assert!(_mm_ucomile_sd(a, b) != 0);
}
test_mm_ucomile_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_ucomigt_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
assert!(_mm_ucomigt_sd(a, b) == 0);
}
test_mm_ucomigt_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_ucomige_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
assert!(_mm_ucomige_sd(a, b) != 0);
}
test_mm_ucomige_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_ucomineq_sd() {
let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
assert!(_mm_ucomineq_sd(a, b) == 0);
}
test_mm_ucomineq_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cvtpd_ps() {
let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
}
test_mm_cvtpd_ps();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cvtps_pd() {
let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
let r = _mm_cvtps_pd(_mm_setr_ps(f32::MAX, f32::INFINITY, f32::NEG_INFINITY, f32::MIN));
assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
}
test_mm_cvtps_pd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cvtpd_epi32() {
let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
}
test_mm_cvtpd_epi32();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cvttpd_epi32() {
let a = _mm_setr_pd(-1.1, 2.2);
let r = _mm_cvttpd_epi32(a);
assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
let r = _mm_cvttpd_epi32(a);
assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
}
test_mm_cvttpd_epi32();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cvtsd_si32() {
let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
assert_eq!(r, -2);
let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
assert_eq!(r, i32::MIN);
let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
assert_eq!(r, i32::MIN);
}
test_mm_cvtsd_si32();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cvttsd_si32() {
let a = _mm_setr_pd(-1.1, 2.2);
let r = _mm_cvttsd_si32(a);
assert_eq!(r, -1);
let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
let r = _mm_cvttsd_si32(a);
assert_eq!(r, i32::MIN);
}
test_mm_cvttsd_si32();
// Intrinsic only available on x86_64
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cvtsd_si64() {
let r = _mm_cvtsd_si64(_mm_setr_pd(-2.0, 5.0));
assert_eq!(r, -2_i64);
let r = _mm_cvtsd_si64(_mm_setr_pd(f64::MAX, f64::MIN));
assert_eq!(r, i64::MIN);
}
#[cfg(target_arch = "x86_64")]
test_mm_cvtsd_si64();
// Intrinsic only available on x86_64
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cvttsd_si64() {
let a = _mm_setr_pd(-1.1, 2.2);
let r = _mm_cvttsd_si64(a);
assert_eq!(r, -1_i64);
}
#[cfg(target_arch = "x86_64")]
test_mm_cvttsd_si64();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cvtsd_ss() {
let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
let b = _mm_setr_pd(2.0, -5.0);
let r = _mm_cvtsd_ss(a, b);
assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
let b = _mm_setr_pd(f64::INFINITY, -5.0);
let r = _mm_cvtsd_ss(a, b);
assert_eq_m128(
r,
_mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY),
);
}
test_mm_cvtsd_ss();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_cvtss_sd() {
let a = _mm_setr_pd(-1.1, 2.2);
let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let r = _mm_cvtss_sd(a, b);
assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
let a = _mm_setr_pd(-1.1, f64::INFINITY);
let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
let r = _mm_cvtss_sd(a, b);
assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
}
test_mm_cvtss_sd();
#[target_feature(enable = "sse2")]
unsafe fn test_mm_movemask_pd() {
let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
assert_eq!(r, 0b01);
let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
assert_eq!(r, 0b11);
}
test_mm_movemask_pd();
}
}