Skip to content

Commit

Permalink
Add fast type conversion methods where possible
Browse files Browse the repository at this point in the history
For SIMD types that have intrinsics for quickly
converting to another type, methods were added to
leverage these intrinsics for fast conversion.
  • Loading branch information
shssoichiro committed Oct 27, 2022
1 parent efb05d3 commit 83dfde4
Show file tree
Hide file tree
Showing 18 changed files with 1,070 additions and 60 deletions.
78 changes: 75 additions & 3 deletions src/f32x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ pick! {
if #[cfg(target_feature="sse")] {
#[derive(Default, Clone, Copy, PartialEq)]
#[repr(C, align(16))]
pub struct f32x4 { sse: m128 }
pub struct f32x4 { pub(crate) sse: m128 }
} else if #[cfg(target_feature="simd128")] {
use core::arch::wasm32::*;

#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct f32x4 { simd: v128 }
pub struct f32x4 { pub(crate) simd: v128 }

impl Default for f32x4 {
fn default() -> Self {
Expand All @@ -26,7 +26,7 @@ pick! {
} else {
#[derive(Default, Clone, Copy, PartialEq)]
#[repr(C, align(16))]
pub struct f32x4 { arr: [f32;4] }
pub struct f32x4 { pub(crate) arr: [f32;4] }
}
}

Expand Down Expand Up @@ -1484,4 +1484,76 @@ impl f32x4 {
pub fn as_array_ref(&self) -> &[f32; 4] {
cast_ref(self)
}

/// Converts the first two f32 elements within this struct to f64 elements.
///
/// The remaining elements are discarded.
#[inline]
pub fn to_f64x2(self) -> f64x2 {
pick! {
if #[cfg(target_feature="sse2")] {
f64x2 { sse: convert_to_m128d_from_lower2_m128(self.sse) }
} else {
f64x2::new([
f64::from(self.arr[0]),
f64::from(self.arr[1]),
])
}
}
}

/// Converts the f32 elements within this struct to f64 elements.
#[inline]
pub fn to_f64x4(self) -> f64x4 {
pick! {
if #[cfg(target_feature="avx")] {
f64x4 { avx: convert_to_m256d_from_m128(self.sse) }
} else {
f64x4::new([
f64::from(self.arr[0]),
f64::from(self.arr[1]),
f64::from(self.arr[2]),
f64::from(self.arr[3]),
])
}
}
}

/// Converts the f32 elements within this struct to i32 elements.
///
/// The decimal portions of the values are truncated.
#[inline]
pub fn to_i32x4_truncate(self) -> i32x4 {
pick! {
if #[cfg(target_feature="sse2")] {
i32x4 { sse: truncate_m128_to_m128i(self.sse) }
} else {
i32x4::new([
self.arr[0] as i32,
self.arr[1] as i32,
self.arr[2] as i32,
self.arr[3] as i32,
])
}
}
}

/// Converts the f32 elements within this struct to i32 elements.
///
/// The decimal portions of the values are rounded to the nearest integer.
#[inline]
pub fn to_i32x4_round(self) -> i32x4 {
pick! {
if #[cfg(target_feature="sse2")] {
i32x4 { sse: convert_to_i32_m128i_from_m128(self.sse) }
} else {
i32x4::new([
self.arr[0].round() as i32,
self.arr[1].round() as i32,
self.arr[2].round() as i32,
self.arr[3].round() as i32,
])
}
}
}
}
54 changes: 50 additions & 4 deletions src/f32x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@ pick! {
if #[cfg(target_feature="avx")] {
#[derive(Default, Clone, Copy, PartialEq)]
#[repr(C, align(32))]
pub struct f32x8 { avx: m256 }
pub struct f32x8 { pub(crate) avx: m256 }
} else if #[cfg(target_feature="sse2")] {
#[derive(Default, Clone, Copy, PartialEq)]
#[repr(C, align(32))]
pub struct f32x8 { sse0: m128, sse1: m128 }
pub struct f32x8 { pub(crate) sse0: m128, pub(crate) sse1: m128 }
} else if #[cfg(target_feature="simd128")] {
use core::arch::wasm32::*;

#[derive(Clone, Copy)]
#[repr(C, align(32))]
pub struct f32x8 { simd0: v128, simd1: v128 }
pub struct f32x8 { pub(crate) simd0: v128, pub(crate) simd1: v128 }

impl Default for f32x8 {
fn default() -> Self {
Expand All @@ -31,7 +31,7 @@ pick! {
} else {
#[derive(Default, Clone, Copy, PartialEq)]
#[repr(C, align(32))]
pub struct f32x8 { arr: [f32;8] }
pub struct f32x8 { pub(crate) arr: [f32;8] }
}
}

Expand Down Expand Up @@ -1698,6 +1698,52 @@ impl f32x8 {
pub fn as_array_ref(&self) -> &[f32; 8] {
cast_ref(self)
}

/// Converts the f32 elements within this struct to i32 elements.
///
/// The decimal portions of the values are truncated.
#[inline]
pub fn to_i32x8_truncate(self) -> i32x8 {
pick! {
if #[cfg(target_feature="avx2")] {
i32x8 { avx2: convert_truncate_to_i32_m256i_from_m256(self.avx) }
} else {
i32x8::new([
self.arr[0] as i32,
self.arr[1] as i32,
self.arr[2] as i32,
self.arr[3] as i32,
self.arr[4] as i32,
self.arr[5] as i32,
self.arr[6] as i32,
self.arr[7] as i32,
])
}
}
}

/// Converts the f32 elements within this struct to i32 elements.
///
/// The decimal portions of the values are rounded to the nearest integer.
#[inline]
pub fn to_i32x8_round(self) -> i32x8 {
pick! {
if #[cfg(target_feature="avx2")] {
i32x8 { avx2: convert_to_i32_m256i_from_m256(self.avx) }
} else {
i32x8::new([
self.arr[0].round() as i32,
self.arr[1].round() as i32,
self.arr[2].round() as i32,
self.arr[3].round() as i32,
self.arr[4].round() as i32,
self.arr[5].round() as i32,
self.arr[6].round() as i32,
self.arr[7].round() as i32,
])
}
}
}
}

impl Not for f32x8 {
Expand Down
70 changes: 67 additions & 3 deletions src/f64x2_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ pick! {
if #[cfg(target_feature="sse2")] {
#[derive(Default, Clone, Copy, PartialEq)]
#[repr(C, align(16))]
pub struct f64x2 { sse: m128d }
pub struct f64x2 { pub(crate) sse: m128d }
} else if #[cfg(target_feature="simd128")] {
use core::arch::wasm32::*;

#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct f64x2 { simd: v128 }
pub struct f64x2 { pub(crate) simd: v128 }

impl Default for f64x2 {
fn default() -> Self {
Expand All @@ -26,7 +26,7 @@ pick! {
} else {
#[derive(Default, Clone, Copy, PartialEq)]
#[repr(C, align(16))]
pub struct f64x2 { arr: [f64;2] }
pub struct f64x2 { pub(crate) arr: [f64;2] }
}
}

Expand Down Expand Up @@ -1527,6 +1527,70 @@ impl f64x2 {
pub fn as_array_ref(&self) -> &[f64; 2] {
cast_ref(self)
}

/// Converts the f64 elements within this struct to f32 elements.
///
/// The first two elements will be the downcast values from this struct.
/// The remaining elements will be zero.
#[inline]
pub fn to_f32x4(self) -> f32x4 {
pick! {
if #[cfg(target_feature="sse2")] {
f32x4 { sse: convert_to_m128_from_m128d(self.sse) }
} else {
f32x4::new([
self.arr[0] as f32,
self.arr[1] as f32,
0.0f32,
0.0f32
])
}
}
}

/// Converts the f32 elements within this struct to i32 elements.
///
/// The decimal portions of the values are truncated.
///
/// The first two elements will be the downcast values from this struct.
/// The remaining elements will be zero.
#[inline]
pub fn to_i32x4_truncate(self) -> i32x4 {
pick! {
if #[cfg(target_feature="sse2")] {
i32x4 { sse: truncate_m128d_to_m128i(self.sse) }
} else {
i32x4::new([
self.arr[0] as i32,
self.arr[1] as i32,
0i32,
0i32,
])
}
}
}

/// Converts the f32 elements within this struct to i32 elements.
///
/// The decimal portions of the values are rounded to the nearest integer.
///
/// The first two elements will be the downcast values from this struct.
/// The remaining elements will be zero.
#[inline]
pub fn to_i32x4_round(self) -> i32x4 {
pick! {
if #[cfg(target_feature="sse2")] {
i32x4 { sse: convert_to_i32_m128i_from_m128d(self.sse) }
} else {
i32x4::new([
self.arr[0].round() as i32,
self.arr[1].round() as i32,
0i32,
0i32,
])
}
}
}
}

impl Not for f64x2 {
Expand Down
63 changes: 59 additions & 4 deletions src/f64x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@ pick! {
if #[cfg(target_feature="avx")] {
#[derive(Default, Clone, Copy, PartialEq)]
#[repr(C, align(32))]
pub struct f64x4 { avx: m256d }
pub struct f64x4 { pub(crate) avx: m256d }
} else if #[cfg(target_feature="sse2")] {
#[derive(Default, Clone, Copy, PartialEq)]
#[repr(C, align(32))]
pub struct f64x4 { sse0: m128d, sse1: m128d }
pub struct f64x4 { pub(crate) sse0: m128d, pub(crate) sse1: m128d }
} else if #[cfg(target_feature="simd128")] {
use core::arch::wasm32::*;

#[derive(Clone, Copy)]
#[repr(C, align(32))]
pub struct f64x4 { simd0: v128, simd1: v128 }
pub struct f64x4 { pub(crate) simd0: v128, pub(crate) simd1: v128 }

impl Default for f64x4 {
fn default() -> Self {
Expand All @@ -31,7 +31,7 @@ pick! {
} else {
#[derive(Default, Clone, Copy, PartialEq)]
#[repr(C, align(32))]
pub struct f64x4 { arr: [f64;4] }
pub struct f64x4 { pub(crate) arr: [f64;4] }
}
}

Expand Down Expand Up @@ -1654,6 +1654,61 @@ impl f64x4 {
pub fn as_array_ref(&self) -> &[f64; 4] {
cast_ref(self)
}

/// Converts the f64 elements within this struct to f32 elements.
#[inline]
pub fn to_f32x4(self) -> f32x4 {
pick! {
if #[cfg(target_feature="avx2")] {
f32x4 { sse: convert_to_m128_from_m256d(self.avx) }
} else {
f32x4::new([
self.arr[0] as f32,
self.arr[1] as f32,
self.arr[2] as f32,
self.arr[3] as f32,
])
}
}
}

/// Converts the f32 elements within this struct to i32 elements.
///
/// The decimal portions of the values are truncated.
#[inline]
pub fn to_i32x4_truncate(self) -> i32x4 {
pick! {
if #[cfg(target_feature="avx2")] {
i32x4 { sse: convert_truncate_to_i32_m128i_from_m256d(self.avx) }
} else {
i32x4::new([
self.arr[0] as i32,
self.arr[1] as i32,
self.arr[2] as i32,
self.arr[3] as i32,
])
}
}
}

/// Converts the f32 elements within this struct to i32 elements.
///
/// The decimal portions of the values are rounded to the nearest integer.
#[inline]
pub fn to_i32x4_round(self) -> i32x4 {
pick! {
if #[cfg(target_feature="avx2")] {
i32x4 { sse: convert_to_i32_m128i_from_m256d(self.avx) }
} else {
i32x4::new([
self.arr[0].round() as i32,
self.arr[1].round() as i32,
self.arr[2].round() as i32,
self.arr[3].round() as i32,
])
}
}
}
}

impl Not for f64x4 {
Expand Down
6 changes: 3 additions & 3 deletions src/i16x16_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ pick! {
if #[cfg(target_feature="avx2")] {
#[derive(Default, Clone, Copy, PartialEq, Eq)]
#[repr(C, align(32))]
pub struct i16x16 { avx2: m256i }
pub struct i16x16 { pub(crate) avx2: m256i }
} else if #[cfg(target_feature="sse2")] {
#[derive(Default, Clone, Copy, PartialEq, Eq)]
#[repr(C, align(32))]
Expand All @@ -14,7 +14,7 @@ pick! {

#[derive(Clone, Copy)]
#[repr(C, align(32))]
pub struct i16x16 { simd0: v128, simd1: v128 }
pub struct i16x16 { pub(crate) simd0: v128, pub(crate) simd1: v128 }

impl Default for i16x16 {
fn default() -> Self {
Expand All @@ -32,7 +32,7 @@ pick! {
} else {
#[derive(Default, Clone, Copy, PartialEq, Eq)]
#[repr(C, align(32))]
pub struct i16x16 { arr: [i16;16] }
pub struct i16x16 { pub(crate) arr: [i16;16] }
}
}

Expand Down
Loading

0 comments on commit 83dfde4

Please sign in to comment.