diff --git a/src/f32x4_.rs b/src/f32x4_.rs
index 52a13ee9..8a107282 100644
--- a/src/f32x4_.rs
+++ b/src/f32x4_.rs
@@ -4,13 +4,13 @@ pick! {
   if #[cfg(target_feature="sse")] {
     #[derive(Default, Clone, Copy, PartialEq)]
     #[repr(C, align(16))]
-    pub struct f32x4 { sse: m128 }
+    pub struct f32x4 { pub(crate) sse: m128 }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(transparent)]
-    pub struct f32x4 { simd: v128 }
+    pub struct f32x4 { pub(crate) simd: v128 }
 
     impl Default for f32x4 {
       fn default() -> Self {
@@ -26,7 +26,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq)]
     #[repr(C, align(16))]
-    pub struct f32x4 { arr: [f32;4] }
+    pub struct f32x4 { pub(crate) arr: [f32;4] }
   }
 }
 
@@ -664,14 +664,9 @@ impl f32x4 {
   /// values you get implementation defined behavior.
   #[inline]
   #[must_use]
+  #[deprecated(since = "0.7.6", note = "use `to_i32x4_round_fast` instead")]
   pub fn fast_round_int(self) -> i32x4 {
-    pick! {
-      if #[cfg(target_feature="sse2")] {
-        cast(convert_to_i32_m128i_from_m128(self.sse))
-      } else {
-        self.round_int()
-      }
-    }
+    self.to_i32x4_round_fast()
   }
 
   /// Rounds each lane into an integer. This saturates out of range values and
@@ -679,27 +674,9 @@ impl f32x4 {
   /// doesn't handle out of range values or NaNs.
   #[inline]
   #[must_use]
+  #[deprecated(since = "0.7.6", note = "use `to_i32x4_round` instead")]
   pub fn round_int(self) -> i32x4 {
-    pick! {
-      if #[cfg(target_feature="sse2")] {
-        // Based on: https://github.com/v8/v8/blob/210987a552a2bf2a854b0baa9588a5959ff3979d/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.h#L489-L504
-        let non_nan_mask = self.cmp_eq(self);
-        let non_nan = self & non_nan_mask;
-        let flip_to_max: i32x4 = cast(self.cmp_ge(Self::splat(2147483648.0)));
-        let cast: i32x4 = cast(convert_to_i32_m128i_from_m128(non_nan.sse));
-        flip_to_max ^ cast
-      } else if #[cfg(target_feature="simd128")] {
-        cast(Self { simd: i32x4_trunc_sat_f32x4(f32x4_nearest(self.simd)) })
-      } else {
-        let rounded: [f32; 4] = cast(self.round());
-        cast([
-          rounded[0] as i32,
-          rounded[1] as i32,
-          rounded[2] as i32,
-          rounded[3] as i32,
-        ])
-      }
-    }
+    self.to_i32x4_round()
   }
 
   /// Truncates each lane into an integer. This is a faster implementation than
@@ -707,14 +684,9 @@ impl f32x4 {
   /// values you get implementation defined behavior.
   #[inline]
   #[must_use]
+  #[deprecated(since = "0.7.6", note = "use `to_i32x4_truncate_fast` instead")]
   pub fn fast_trunc_int(self) -> i32x4 {
-    pick! {
-      if #[cfg(target_feature="sse2")] {
-        cast(truncate_m128_to_m128i(self.sse))
-      } else {
-        self.trunc_int()
-      }
-    }
+    self.to_i32x4_truncate_fast()
   }
 
   /// Truncates each lane into an integer. This saturates out of range values
@@ -722,27 +694,9 @@ impl f32x4 {
   /// that doesn't handle out of range values or NaNs.
   #[inline]
   #[must_use]
+  #[deprecated(since = "0.7.6", note = "use `to_i32x4_truncate` instead")]
   pub fn trunc_int(self) -> i32x4 {
-    pick! {
-      if #[cfg(target_feature="sse2")] {
-        // Based on: https://github.com/v8/v8/blob/210987a552a2bf2a854b0baa9588a5959ff3979d/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.h#L489-L504
-        let non_nan_mask = self.cmp_eq(self);
-        let non_nan = self & non_nan_mask;
-        let flip_to_max: i32x4 = cast(self.cmp_ge(Self::splat(2147483648.0)));
-        let cast: i32x4 = cast(truncate_m128_to_m128i(non_nan.sse));
-        flip_to_max ^ cast
-      } else if #[cfg(target_feature="simd128")] {
-        cast(Self { simd: i32x4_trunc_sat_f32x4(self.simd) })
-      } else {
-        let n: [f32;4] = cast(self);
-        cast([
-          n[0] as i32,
-          n[1] as i32,
-          n[2] as i32,
-          n[3] as i32,
-        ])
-      }
-    }
+    self.to_i32x4_truncate()
   }
   #[inline]
   #[must_use]
@@ -1037,7 +991,7 @@ impl f32x4 {
 
     // Find quadrant
     let y = (xa * TWO_OVER_PI).round();
-    let q: i32x4 = y.round_int();
+    let q: i32x4 = y.to_i32x4_round();
 
     let x = y.mul_neg_add(DP3F, y.mul_neg_add(DP2F, y.mul_neg_add(DP1F, xa)));
 
@@ -1417,7 +1371,7 @@ impl f32x4 {
     );
 
     let ee = e1 + e2 + e3;
-    let ei = cast::<_, i32x4>(ee.round_int());
+    let ei = cast::<_, i32x4>(ee.to_i32x4_round());
     let ej = cast::<_, i32x4>(ei + (cast::<_, i32x4>(z) >> 23));
 
     let overflow = cast::<_, f32x4>(ej.cmp_gt(i32x4::splat(0x0FF)))
@@ -1451,7 +1405,7 @@ impl f32x4 {
       // Y into an integer
       let yi = y.cmp_eq(y.round());
       // Is y odd?
-      let y_odd = cast::<_, i32x4>(y.round_int() << 31).round_float();
+      let y_odd = cast::<_, i32x4>(y.to_i32x4_round() << 31).round_float();
 
       let z1 =
         yi.blend(z | y_odd, self.cmp_eq(Self::ZERO).blend(z, Self::nan_pow()));
@@ -1484,4 +1438,144 @@ impl f32x4 {
   pub fn as_array_ref(&self) -> &[f32; 4] {
     cast_ref(self)
   }
+
+  /// Converts the first two f32 elements within this struct to f64 elements.
+  ///
+  /// The remaining elements are discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_f64x2(self) -> f64x2 {
+    pick! {
+      if #[cfg(target_feature="sse2")] {
+        f64x2 { sse: convert_to_m128d_from_lower2_m128(self.sse) }
+      } else {
+        let arr = self.to_array();
+        f64x2::new([
+          f64::from(arr[0]),
+          f64::from(arr[1]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the f32 elements within this struct to f64 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_f64x4(self) -> f64x4 {
+    pick! {
+      if #[cfg(target_feature="avx")] {
+        f64x4 { avx: convert_to_m256d_from_m128(self.sse) }
+      } else {
+        let arr = self.to_array();
+        f64x4::new([
+          f64::from(arr[0]),
+          f64::from(arr[1]),
+          f64::from(arr[2]),
+          f64::from(arr[3]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the f32 elements within this struct to i32 elements.
+  ///
+  /// The decimal portions of the values are truncated.
+  ///
+  /// This is a faster implementation than `to_i32x4_truncate`,
+  /// but it doesn't handle out of range values or NaNs. For those
+  /// values you get implementation defined behavior.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x4_truncate_fast(self) -> i32x4 {
+    pick! {
+      if #[cfg(target_feature="sse2")] {
+        i32x4 { sse: truncate_m128_to_m128i(self.sse) }
+      } else {
+        self.to_i32x4_truncate()
+      }
+    }
+  }
+
+  /// Converts the f32 elements within this struct to i32 elements.
+  ///
+  /// The decimal portions of the values are truncated.
+  ///
+  /// This saturates out of range values  and turns NaNs into 0.
+  /// Use `to_i32x4_truncate_fast` for a faster implementation
+  /// that doesn't handle out of range values or NaNs.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x4_truncate(self) -> i32x4 {
+    pick! {
+      if #[cfg(target_feature="sse2")] {
+        // Based on: https://github.com/v8/v8/blob/210987a552a2bf2a854b0baa9588a5959ff3979d/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.h#L489-L504
+        let non_nan_mask = self.cmp_eq(self);
+        let non_nan = self & non_nan_mask;
+        let flip_to_max: i32x4 = cast(self.cmp_ge(Self::splat(2147483648.0)));
+        let cast: i32x4 = cast(truncate_m128_to_m128i(non_nan.sse));
+        flip_to_max ^ cast
+      } else if #[cfg(target_feature="simd128")] {
+        cast(Self { simd: i32x4_trunc_sat_f32x4(self.simd) })
+      } else {
+        let n: [f32;4] = cast(self);
+        cast([
+          n[0] as i32,
+          n[1] as i32,
+          n[2] as i32,
+          n[3] as i32,
+        ])
+      }
+    }
+  }
+
+  /// Converts the f32 elements within this struct to i32 elements.
+  ///
+  /// The decimal portions of the values are rounded to the nearest integer.
+  ///
+  /// This is a faster implementation than `to_i32x4_round`,
+  /// but it doesn't handle out of range values or NaNs. For those
+  /// values you get implementation defined behavior.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x4_round_fast(self) -> i32x4 {
+    pick! {
+      if #[cfg(target_feature="sse2")] {
+        cast(convert_to_i32_m128i_from_m128(self.sse))
+      } else {
+        self.to_i32x4_round()
+      }
+    }
+  }
+
+  /// Converts the f32 elements within this struct to i32 elements.
+  ///
+  /// The decimal portions of the values are rounded to the nearest integer.
+  ///
+  /// Rounds each lane into an integer. This saturates out of range values and
+  /// turns NaNs into 0. Use `to_i32x4_round_fast` for a faster implementation that
+  /// doesn't handle out of range values or NaNs.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x4_round(self) -> i32x4 {
+    pick! {
+      if #[cfg(target_feature="sse2")] {
+        // Based on: https://github.com/v8/v8/blob/210987a552a2bf2a854b0baa9588a5959ff3979d/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.h#L489-L504
+        let non_nan_mask = self.cmp_eq(self);
+        let non_nan = self & non_nan_mask;
+        let flip_to_max: i32x4 = cast(self.cmp_ge(Self::splat(2147483648.0)));
+        let cast: i32x4 = cast(convert_to_i32_m128i_from_m128(non_nan.sse));
+        flip_to_max ^ cast
+      } else if #[cfg(target_feature="simd128")] {
+        cast(Self { simd: i32x4_trunc_sat_f32x4(f32x4_nearest(self.simd)) })
+      } else {
+        let rounded: [f32; 4] = cast(self.round());
+        cast([
+          rounded[0] as i32,
+          rounded[1] as i32,
+          rounded[2] as i32,
+          rounded[3] as i32,
+        ])
+      }
+    }
+  }
 }
diff --git a/src/f32x8_.rs b/src/f32x8_.rs
index 070d7f3a..9e85a4d7 100644
--- a/src/f32x8_.rs
+++ b/src/f32x8_.rs
@@ -4,17 +4,17 @@ pick! {
   if #[cfg(target_feature="avx")] {
     #[derive(Default, Clone, Copy, PartialEq)]
     #[repr(C, align(32))]
-    pub struct f32x8 { avx: m256 }
+    pub struct f32x8 { pub(crate) avx: m256 }
   } else if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq)]
     #[repr(C, align(32))]
-    pub struct f32x8 { sse0: m128, sse1: m128 }
+    pub struct f32x8 { pub(crate) sse0: m128, pub(crate)  sse1: m128 }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(C, align(32))]
-    pub struct f32x8 { simd0: v128, simd1: v128 }
+    pub struct f32x8 { pub(crate) simd0: v128, pub(crate) simd1: v128 }
 
     impl Default for f32x8 {
       fn default() -> Self {
@@ -31,7 +31,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq)]
     #[repr(C, align(32))]
-    pub struct f32x8 { arr: [f32;8] }
+    pub struct f32x8 { pub(crate) arr: [f32;8] }
   }
 }
 
@@ -786,16 +786,9 @@ impl f32x8 {
   /// values you get implementation defined behavior.
   #[inline]
   #[must_use]
+  #[deprecated(since = "0.7.6", note = "use `to_i32x8_round_fast` instead")]
   pub fn fast_round_int(self) -> i32x8 {
-    pick! {
-      if #[cfg(target_feature="avx")] {
-        cast(convert_to_i32_m256i_from_m256(self.avx))
-      } else if #[cfg(target_feature="sse2")] {
-        i32x8 { sse0: convert_to_i32_m128i_from_m128(self.sse0), sse1: convert_to_i32_m128i_from_m128(self.sse1) }
-      } else {
-        self.round_int()
-      }
-    }
+    self.to_i32x8_round_fast()
   }
 
   /// Rounds each lane into an integer. This saturates out of range values and
@@ -803,41 +796,9 @@ impl f32x8 {
   /// doesn't handle out of range values or NaNs.
   #[inline]
   #[must_use]
+  #[deprecated(since = "0.7.6", note = "use `to_i32x8_round` instead")]
   pub fn round_int(self) -> i32x8 {
-    pick! {
-      if #[cfg(target_feature="avx")] {
-        // Based on: https://github.com/v8/v8/blob/210987a552a2bf2a854b0baa9588a5959ff3979d/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.h#L489-L504
-        let non_nan_mask = self.cmp_eq(self);
-        let non_nan = self & non_nan_mask;
-        let flip_to_max: i32x8 = cast(self.cmp_ge(Self::splat(2147483648.0)));
-        let cast: i32x8 = cast(convert_to_i32_m256i_from_m256(non_nan.avx));
-        flip_to_max ^ cast
-      } else if #[cfg(target_feature="sse2")] {
-        // Based on: https://github.com/v8/v8/blob/210987a552a2bf2a854b0baa9588a5959ff3979d/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.h#L489-L504
-        let non_nan_mask = self.cmp_eq(self);
-        let non_nan = self & non_nan_mask;
-        let flip_to_max: i32x8 = cast(self.cmp_ge(Self::splat(2147483648.0)));
-        let cast: i32x8 = i32x8 { sse0: convert_to_i32_m128i_from_m128(non_nan.sse0), sse1: convert_to_i32_m128i_from_m128(non_nan.sse1) };
-        flip_to_max ^ cast
-      } else if #[cfg(target_feature="simd128")] {
-        cast(Self {
-          simd0: i32x4_trunc_sat_f32x4(f32x4_nearest(self.simd0)),
-          simd1: i32x4_trunc_sat_f32x4(f32x4_nearest(self.simd1)),
-        })
-      } else {
-        let rounded: [f32; 8] = cast(self.round());
-        cast([
-          rounded[0] as i32,
-          rounded[1] as i32,
-          rounded[2] as i32,
-          rounded[3] as i32,
-          rounded[4] as i32,
-          rounded[5] as i32,
-          rounded[6] as i32,
-          rounded[7] as i32,
-        ])
-      }
-    }
+    self.to_i32x8_round()
   }
 
   /// Truncates each lane into an integer. This is a faster implementation than
@@ -845,16 +806,9 @@ impl f32x8 {
   /// values you get implementation defined behavior.
   #[inline]
   #[must_use]
+  #[deprecated(since = "0.7.6", note = "use `to_i32x8_truncate_fast` instead")]
   pub fn fast_trunc_int(self) -> i32x8 {
-    pick! {
-      if #[cfg(all(target_feature="avx"))] {
-        cast(convert_truncate_to_i32_m256i_from_m256(self.avx))
-      } else if #[cfg(target_feature="sse2")] {
-        i32x8 { sse0: truncate_m128_to_m128i(self.sse0), sse1: truncate_m128_to_m128i(self.sse1) }
-      } else {
-        self.trunc_int()
-      }
-    }
+    self.to_i32x8_truncate_fast()
   }
 
   /// Truncates each lane into an integer. This saturates out of range values
@@ -862,41 +816,9 @@ impl f32x8 {
   /// that doesn't handle out of range values or NaNs.
   #[inline]
   #[must_use]
+  #[deprecated(since = "0.7.6", note = "use `to_i32x8_truncate` instead")]
   pub fn trunc_int(self) -> i32x8 {
-    pick! {
-        if #[cfg(target_feature="avx")] {
-        // Based on: https://github.com/v8/v8/blob/210987a552a2bf2a854b0baa9588a5959ff3979d/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.h#L489-L504
-        let non_nan_mask = self.cmp_eq(self);
-        let non_nan = self & non_nan_mask;
-        let flip_to_max: i32x8 = cast(self.cmp_ge(Self::splat(2147483648.0)));
-        let cast: i32x8 = cast(convert_truncate_to_i32_m256i_from_m256(non_nan.avx));
-        flip_to_max ^ cast
-      } else if #[cfg(target_feature="sse2")] {
-        // Based on: https://github.com/v8/v8/blob/210987a552a2bf2a854b0baa9588a5959ff3979d/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.h#L489-L504
-        let non_nan_mask = self.cmp_eq(self);
-        let non_nan = self & non_nan_mask;
-        let flip_to_max: i32x8 = cast(self.cmp_ge(Self::splat(2147483648.0)));
-        let cast: i32x8 = i32x8 { sse0: truncate_m128_to_m128i(non_nan.sse0), sse1: truncate_m128_to_m128i(non_nan.sse1) };
-        flip_to_max ^ cast
-      } else if #[cfg(target_feature="simd128")] {
-        cast(Self {
-          simd0: i32x4_trunc_sat_f32x4(self.simd0),
-          simd1: i32x4_trunc_sat_f32x4(self.simd1),
-        })
-      } else {
-        let n: [f32; 8] = cast(self);
-        cast([
-          n[0] as i32,
-          n[1] as i32,
-          n[2] as i32,
-          n[3] as i32,
-          n[4] as i32,
-          n[5] as i32,
-          n[6] as i32,
-          n[7] as i32,
-        ])
-      }
-    }
+    self.to_i32x8_truncate()
   }
   #[inline]
   #[must_use]
@@ -1200,7 +1122,7 @@ impl f32x8 {
 
     // Find quadrant
     let y = (xa * TWO_OVER_PI).round();
-    let q: i32x8 = y.round_int();
+    let q: i32x8 = y.to_i32x8_round();
 
     let x = y.mul_neg_add(DP3F, y.mul_neg_add(DP2F, y.mul_neg_add(DP1F, xa)));
 
@@ -1635,7 +1557,7 @@ impl f32x8 {
     );
 
     let ee = e1 + e2 + e3;
-    let ei = cast::<_, i32x8>(ee.round_int());
+    let ei = cast::<_, i32x8>(ee.to_i32x8_round());
     let ej = cast::<_, i32x8>(ei + (cast::<_, i32x8>(z) >> 23));
 
     let overflow = cast::<_, f32x8>(ej.cmp_gt(i32x8::splat(0x0FF)))
@@ -1665,7 +1587,7 @@ impl f32x8 {
       let yi = y.cmp_eq(y.round());
 
       // Is y odd?
-      let y_odd = cast::<_, i32x8>(y.round_int() << 31).round_float();
+      let y_odd = cast::<_, i32x8>(y.to_i32x8_round() << 31).round_float();
 
       let z1 =
         yi.blend(z | y_odd, self.cmp_eq(Self::ZERO).blend(z, Self::nan_pow()));
@@ -1698,6 +1620,140 @@ impl f32x8 {
   pub fn as_array_ref(&self) -> &[f32; 8] {
     cast_ref(self)
   }
+
+  /// Converts the f32 elements within this struct to i32 elements.
+  ///
+  /// The decimal portions of the values are truncated.
+  ///
+  /// This is a faster implementation than
+  /// `to_i32x8_truncate`, but it doesn't handle out of range values or NaNs. For those
+  /// values you get implementation defined behavior.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x8_truncate_fast(self) -> i32x8 {
+    pick! {
+      if #[cfg(all(target_feature="avx"))] {
+        cast(convert_truncate_to_i32_m256i_from_m256(self.avx))
+      } else if #[cfg(target_feature="sse2")] {
+        i32x8 { sse0: truncate_m128_to_m128i(self.sse0), sse1: truncate_m128_to_m128i(self.sse1) }
+      } else {
+        self.trunc_int()
+      }
+    }
+  }
+
+  /// Converts the f32 elements within this struct to i32 elements.
+  ///
+  /// The decimal portions of the values are truncated.
+  ///
+  /// This saturates out of range values
+  /// and turns NaNs into 0. Use `to_i32x8_truncate_fast` for a faster implementation
+  /// that doesn't handle out of range values or NaNs.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x8_truncate(self) -> i32x8 {
+    pick! {
+        if #[cfg(target_feature="avx")] {
+        // Based on: https://github.com/v8/v8/blob/210987a552a2bf2a854b0baa9588a5959ff3979d/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.h#L489-L504
+        let non_nan_mask = self.cmp_eq(self);
+        let non_nan = self & non_nan_mask;
+        let flip_to_max: i32x8 = cast(self.cmp_ge(Self::splat(2147483648.0)));
+        let cast: i32x8 = cast(convert_truncate_to_i32_m256i_from_m256(non_nan.avx));
+        flip_to_max ^ cast
+      } else if #[cfg(target_feature="sse2")] {
+        // Based on: https://github.com/v8/v8/blob/210987a552a2bf2a854b0baa9588a5959ff3979d/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.h#L489-L504
+        let non_nan_mask = self.cmp_eq(self);
+        let non_nan = self & non_nan_mask;
+        let flip_to_max: i32x8 = cast(self.cmp_ge(Self::splat(2147483648.0)));
+        let cast: i32x8 = i32x8 { sse0: truncate_m128_to_m128i(non_nan.sse0), sse1: truncate_m128_to_m128i(non_nan.sse1) };
+        flip_to_max ^ cast
+      } else if #[cfg(target_feature="simd128")] {
+        cast(Self {
+          simd0: i32x4_trunc_sat_f32x4(self.simd0),
+          simd1: i32x4_trunc_sat_f32x4(self.simd1),
+        })
+      } else {
+        let n: [f32; 8] = cast(self);
+        cast([
+          n[0] as i32,
+          n[1] as i32,
+          n[2] as i32,
+          n[3] as i32,
+          n[4] as i32,
+          n[5] as i32,
+          n[6] as i32,
+          n[7] as i32,
+        ])
+      }
+    }
+  }
+
+  /// Converts the f32 elements within this struct to i32 elements.
+  ///
+  /// The decimal portions of the values are rounded to the nearest integer.
+  ///
+  /// This is a faster implementation than
+  /// `to_i32x8_round`, but it doesn't handle out of range values or NaNs. For those
+  /// values you get implementation defined behavior.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x8_round_fast(self) -> i32x8 {
+    pick! {
+      if #[cfg(target_feature="avx")] {
+        cast(convert_to_i32_m256i_from_m256(self.avx))
+      } else if #[cfg(target_feature="sse2")] {
+        i32x8 { sse0: convert_to_i32_m128i_from_m128(self.sse0), sse1: convert_to_i32_m128i_from_m128(self.sse1) }
+      } else {
+        self.round_int()
+      }
+    }
+  }
+
+  /// Converts the f32 elements within this struct to i32 elements.
+  ///
+  /// The decimal portions of the values are rounded to the nearest integer.
+  ///
+  /// This saturates out of range values and
+  /// turns NaNs into 0. Use `to_i32x8_round_fast` for a faster implementation that
+  /// doesn't handle out of range values or NaNs.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x8_round(self) -> i32x8 {
+    pick! {
+      if #[cfg(target_feature="avx")] {
+        // Based on: https://github.com/v8/v8/blob/210987a552a2bf2a854b0baa9588a5959ff3979d/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.h#L489-L504
+        let non_nan_mask = self.cmp_eq(self);
+        let non_nan = self & non_nan_mask;
+        let flip_to_max: i32x8 = cast(self.cmp_ge(Self::splat(2147483648.0)));
+        let cast: i32x8 = cast(convert_to_i32_m256i_from_m256(non_nan.avx));
+        flip_to_max ^ cast
+      } else if #[cfg(target_feature="sse2")] {
+        // Based on: https://github.com/v8/v8/blob/210987a552a2bf2a854b0baa9588a5959ff3979d/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.h#L489-L504
+        let non_nan_mask = self.cmp_eq(self);
+        let non_nan = self & non_nan_mask;
+        let flip_to_max: i32x8 = cast(self.cmp_ge(Self::splat(2147483648.0)));
+        let cast: i32x8 = i32x8 { sse0: convert_to_i32_m128i_from_m128(non_nan.sse0), sse1: convert_to_i32_m128i_from_m128(non_nan.sse1) };
+        flip_to_max ^ cast
+      } else if #[cfg(target_feature="simd128")] {
+        cast(Self {
+          simd0: i32x4_trunc_sat_f32x4(f32x4_nearest(self.simd0)),
+          simd1: i32x4_trunc_sat_f32x4(f32x4_nearest(self.simd1)),
+        })
+      } else {
+        let rounded: [f32; 8] = cast(self.round());
+        cast([
+          rounded[0] as i32,
+          rounded[1] as i32,
+          rounded[2] as i32,
+          rounded[3] as i32,
+          rounded[4] as i32,
+          rounded[5] as i32,
+          rounded[6] as i32,
+          rounded[7] as i32,
+        ])
+      }
+    }
+  }
 }
 
 impl Not for f32x8 {
diff --git a/src/f64x2_.rs b/src/f64x2_.rs
index 356e46c2..a3085592 100644
--- a/src/f64x2_.rs
+++ b/src/f64x2_.rs
@@ -4,13 +4,13 @@ pick! {
   if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq)]
     #[repr(C, align(16))]
-    pub struct f64x2 { sse: m128d }
+    pub struct f64x2 { pub(crate) sse: m128d }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(transparent)]
-    pub struct f64x2 { simd: v128 }
+    pub struct f64x2 { pub(crate) simd: v128 }
 
     impl Default for f64x2 {
       fn default() -> Self {
@@ -26,7 +26,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq)]
     #[repr(C, align(16))]
-    pub struct f64x2 { arr: [f64;2] }
+    pub struct f64x2 { pub(crate) arr: [f64;2] }
   }
 }
 
@@ -599,9 +599,9 @@ impl f64x2 {
   }
   #[inline]
   #[must_use]
+  #[deprecated(since = "0.7.6", note = "use `to_i64x2_round` instead")]
   pub fn round_int(self) -> i64x2 {
-    let rounded: [f64; 2] = cast(self.round());
-    cast([rounded[0] as i64, rounded[1] as i64])
+    self.to_i64x2_round()
   }
   #[inline]
   #[must_use]
@@ -1098,7 +1098,7 @@ impl f64x2 {
     let xa = self.abs();
 
     let y = (xa * TWO_OVER_PI).round();
-    let q = y.round_int();
+    let q = y.to_i64x2_round();
 
     let x = y.mul_neg_add(DP3, y.mul_neg_add(DP2, y.mul_neg_add(DP1, xa)));
 
@@ -1459,7 +1459,7 @@ impl f64x2 {
       polynomial_13m!(x, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13)
         + f64x2::ONE;
     let ee = e1 + e2 + e3;
-    let ei = cast::<_, i64x2>(ee.round_int());
+    let ei = cast::<_, i64x2>(ee.to_i64x2_round());
     let ej = cast::<_, i64x2>(ei + (cast::<_, i64x2>(z) >> 52));
 
     let overflow = cast::<_, f64x2>(!ej.cmp_lt(i64x2::splat(0x07FF)))
@@ -1493,7 +1493,7 @@ impl f64x2 {
       // Y into an integer
       let yi = y.cmp_eq(y.round());
       // Is y odd?
-      let y_odd = cast::<_, i64x2>(y.round_int() << 63).round_float();
+      let y_odd = cast::<_, i64x2>(y.to_i64x2_round() << 63).round_float();
 
       let z1 =
         yi.blend(z | y_odd, self.cmp_eq(Self::ZERO).blend(z, Self::nan_pow()));
@@ -1527,6 +1527,88 @@ impl f64x2 {
   pub fn as_array_ref(&self) -> &[f64; 2] {
     cast_ref(self)
   }
+
+  /// Converts the f64 elements within this struct to f32 elements.
+  ///
+  /// The first two elements will be the downcast values from this struct.
+  /// The remaining elements will be zero.
+  #[inline]
+  #[must_use]
+  pub fn to_f32x4(self) -> f32x4 {
+    pick! {
+      if #[cfg(target_feature="sse2")] {
+        f32x4 { sse: convert_to_m128_from_m128d(self.sse) }
+      } else {
+        let arr = self.to_array();
+        f32x4::new([
+          arr[0] as f32,
+          arr[1] as f32,
+          0.0f32,
+          0.0f32
+        ])
+      }
+    }
+  }
+
+  /// Converts the f64 elements within this struct to i32 elements.
+  ///
+  /// The decimal portions of the values are truncated.
+  ///
+  /// The first two elements will be the downcast values from this struct.
+  /// The remaining elements will be zero.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x4_truncate(self) -> i32x4 {
+    pick! {
+      if #[cfg(target_feature="sse2")] {
+        i32x4 { sse: truncate_m128d_to_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i32x4::new([
+          arr[0] as i32,
+          arr[1] as i32,
+          0i32,
+          0i32,
+        ])
+      }
+    }
+  }
+
+  /// Converts the f64 elements within this struct to i32 elements.
+  ///
+  /// The decimal portions of the values are rounded to the nearest integer.
+  ///
+  /// The first two elements will be the downcast values from this struct.
+  /// The remaining elements will be zero.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x4_round(self) -> i32x4 {
+    pick! {
+      if #[cfg(target_feature="sse2")] {
+        i32x4 { sse: convert_to_i32_m128i_from_m128d(self.sse) }
+      } else {
+        let rounded = self.round().to_array();
+        i32x4::new([
+          rounded[0] as i32,
+          rounded[1] as i32,
+          0i32,
+          0i32,
+        ])
+      }
+    }
+  }
+
+  /// Converts the f64 elements within this struct to i64 elements.
+  ///
+  /// The decimal portions of the values are rounded to the nearest integer.
+  ///
+  /// There is no direct SIMD instruction for this, so it may be slower than `to_i32x4_round`.
+  #[inline]
+  #[must_use]
+  pub fn to_i64x2_round(self) -> i64x2 {
+    let rounded: [f64; 2] = cast(self.round());
+    cast([rounded[0] as i64, rounded[1] as i64])
+  }
 }
 
 impl Not for f64x2 {
diff --git a/src/f64x4_.rs b/src/f64x4_.rs
index 6e709ff6..72982e41 100644
--- a/src/f64x4_.rs
+++ b/src/f64x4_.rs
@@ -4,17 +4,17 @@ pick! {
   if #[cfg(target_feature="avx")] {
     #[derive(Default, Clone, Copy, PartialEq)]
     #[repr(C, align(32))]
-    pub struct f64x4 { avx: m256d }
+    pub struct f64x4 { pub(crate) avx: m256d }
   } else if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq)]
     #[repr(C, align(32))]
-    pub struct f64x4 { sse0: m128d, sse1: m128d }
+    pub struct f64x4 { pub(crate) sse0: m128d, pub(crate) sse1: m128d }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(C, align(32))]
-    pub struct f64x4 { simd0: v128, simd1: v128 }
+    pub struct f64x4 { pub(crate) simd0: v128, pub(crate) simd1: v128 }
 
     impl Default for f64x4 {
       fn default() -> Self {
@@ -31,7 +31,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq)]
     #[repr(C, align(32))]
-    pub struct f64x4 { arr: [f64;4] }
+    pub struct f64x4 { pub(crate) arr: [f64;4] }
   }
 }
 
@@ -694,15 +694,9 @@ impl f64x4 {
 
   #[inline]
   #[must_use]
+  #[deprecated(since = "0.7.6", note = "use `to_i64x4_round` instead")]
   pub fn round_int(self) -> i64x4 {
-    // NOTE:No optimization for this currently available so delegate to LLVM
-    let rounded: [f64; 4] = cast(self.round());
-    cast([
-      rounded[0] as i64,
-      rounded[1] as i64,
-      rounded[2] as i64,
-      rounded[3] as i64,
-    ])
+    self.to_i64x4_round()
   }
 
   #[inline]
@@ -1213,7 +1207,7 @@ impl f64x4 {
     let xa = self.abs();
 
     let y = (xa * TWO_OVER_PI).round();
-    let q = y.round_int();
+    let q = y.to_i64x4_round();
 
     let x = y.mul_neg_add(DP3, y.mul_neg_add(DP2, y.mul_neg_add(DP1, xa)));
 
@@ -1587,7 +1581,7 @@ impl f64x4 {
       polynomial_13m!(x, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13)
         + f64x4::ONE;
     let ee = e1 + e2 + e3;
-    let ei = cast::<_, i64x4>(ee.round_int());
+    let ei = cast::<_, i64x4>(ee.to_i64x4_round());
     let ej = cast::<_, i64x4>(ei + (cast::<_, i64x4>(z) >> 52));
 
     let overflow = cast::<_, f64x4>(!ej.cmp_lt(i64x4::splat(0x07FF)))
@@ -1622,7 +1616,7 @@ impl f64x4 {
       // Y into an integer
       let yi = y.cmp_eq(y.round());
       // Is y odd?
-      let y_odd = cast::<_, i64x4>(y.round_int() << 63).round_float();
+      let y_odd = cast::<_, i64x4>(y.to_i64x4_round() << 63).round_float();
       let z1 =
         yi.blend(z | y_odd, self.cmp_eq(Self::ZERO).blend(z, Self::nan_pow()));
       x_sign.blend(z1, z)
@@ -1654,6 +1648,84 @@ impl f64x4 {
   pub fn as_array_ref(&self) -> &[f64; 4] {
     cast_ref(self)
   }
+
+  /// Converts the f64 elements within this struct to f32 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_f32x4(self) -> f32x4 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        f32x4 { sse: convert_to_m128_from_m256d(self.avx) }
+      } else {
+        let arr = self.to_array();
+        f32x4::new([
+          arr[0] as f32,
+          arr[1] as f32,
+          arr[2] as f32,
+          arr[3] as f32,
+        ])
+      }
+    }
+  }
+
+  /// Converts the f64 elements within this struct to i32 elements.
+  ///
+  /// The decimal portions of the values are truncated.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x4_truncate(self) -> i32x4 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        i32x4 { sse: convert_truncate_to_i32_m128i_from_m256d(self.avx) }
+      } else {
+        let arr = self.to_array();
+        i32x4::new([
+          arr[0] as i32,
+          arr[1] as i32,
+          arr[2] as i32,
+          arr[3] as i32,
+        ])
+      }
+    }
+  }
+
+  /// Converts the f64 elements within this struct to i32 elements.
+  ///
+  /// The decimal portions of the values are rounded to the nearest integer.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x4_round(self) -> i32x4 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        i32x4 { sse: convert_to_i32_m128i_from_m256d(self.avx) }
+      } else {
+        let rounded = self.round().to_array();
+        i32x4::new([
+          rounded[0] as i32,
+          rounded[1] as i32,
+          rounded[2] as i32,
+          rounded[3] as i32,
+        ])
+      }
+    }
+  }
+
+  /// Converts the f64 elements within this struct to i64 elements.
+  ///
+  /// The decimal portions of the values are rounded to the nearest integer.
+  ///
+  /// There is no direct SIMD instruction for this, so it may be slower than `to_i32x4_round`.
+  #[inline]
+  #[must_use]
+  pub fn to_i64x4_round(self) -> i64x4 {
+    let rounded: [f64; 4] = cast(self.round());
+    cast([
+      rounded[0] as i64,
+      rounded[1] as i64,
+      rounded[2] as i64,
+      rounded[3] as i64,
+    ])
+  }
 }
 
 impl Not for f64x4 {
diff --git a/src/i16x16_.rs b/src/i16x16_.rs
index 59654d65..f691fbb1 100644
--- a/src/i16x16_.rs
+++ b/src/i16x16_.rs
@@ -4,7 +4,7 @@ pick! {
   if #[cfg(target_feature="avx2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct i16x16 { avx2: m256i }
+    pub struct i16x16 { pub(crate) avx2: m256i }
   } else if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
@@ -14,7 +14,7 @@ pick! {
 
     #[derive(Clone, Copy)]
     #[repr(C, align(32))]
-    pub struct i16x16 { simd0: v128, simd1: v128 }
+    pub struct i16x16 { pub(crate) simd0: v128, pub(crate) simd1: v128 }
 
     impl Default for i16x16 {
       fn default() -> Self {
@@ -32,7 +32,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct i16x16 { arr: [i16;16] }
+    pub struct i16x16 { pub(crate) arr: [i16;16] }
   }
 }
 
diff --git a/src/i16x8_.rs b/src/i16x8_.rs
index f0a13f84..78720635 100644
--- a/src/i16x8_.rs
+++ b/src/i16x8_.rs
@@ -4,13 +4,13 @@ pick! {
   if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct i16x8 { sse: m128i }
+    pub struct i16x8 { pub(crate) sse: m128i }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(transparent)]
-    pub struct i16x8 { simd: v128 }
+    pub struct i16x8 { pub(crate) simd: v128 }
 
     impl Default for i16x8 {
       fn default() -> Self {
@@ -28,7 +28,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct i16x8 { arr: [i16;8] }
+    pub struct i16x8 { pub(crate) arr: [i16;8] }
   }
 }
 
@@ -471,4 +471,90 @@ impl i16x8 {
   pub fn as_array_ref(&self) -> &[i16; 8] {
     cast_ref(self)
   }
+
+  /// Converts the first four i16 elements within this struct to i32 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x4(self) -> i32x4 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        i32x4 { sse: convert_to_i32_m128i_from_lower4_i16_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i32x4::new([
+          i32::from(arr[0]),
+          i32::from(arr[1]),
+          i32::from(arr[2]),
+          i32::from(arr[3]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the i16 elements within this struct to i32 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x8(self) -> i32x8 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        i32x8 { avx2: convert_to_i32_m256i_from_i16_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i32x8::new([
+          i32::from(arr[0]),
+          i32::from(arr[1]),
+          i32::from(arr[2]),
+          i32::from(arr[3]),
+          i32::from(arr[4]),
+          i32::from(arr[5]),
+          i32::from(arr[6]),
+          i32::from(arr[7]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first two i16 elements within this struct to i64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i64x2(self) -> i64x2 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        // Pretty sure this function is misnamed in the `safe_arch` crate.
+        // It calls the `_mm_cvtepi16_epi64` intrinsic.
+        i64x2 { sse: convert_to_i16_m128i_from_lower2_i16_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i64x2::new([
+          i64::from(arr[0]),
+          i64::from(arr[1]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first four i16 elements within this struct to i64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i64x4(self) -> i64x4 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        i64x4 { avx2: convert_to_i64_m256i_from_lower4_i16_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i64x4::new([
+          i64::from(arr[0]),
+          i64::from(arr[1]),
+          i64::from(arr[2]),
+          i64::from(arr[3]),
+        ])
+      }
+    }
+  }
 }
diff --git a/src/i32x4_.rs b/src/i32x4_.rs
index f015a61c..c683df27 100644
--- a/src/i32x4_.rs
+++ b/src/i32x4_.rs
@@ -4,13 +4,13 @@ pick! {
   if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct i32x4 { sse: m128i }
+    pub struct i32x4 { pub(crate) sse: m128i }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(transparent)]
-    pub struct i32x4 { simd: v128 }
+    pub struct i32x4 { pub(crate) simd: v128 }
 
     impl Default for i32x4 {
       fn default() -> Self {
@@ -28,7 +28,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct i32x4 { arr: [i32;4] }
+    pub struct i32x4 { pub(crate) arr: [i32;4] }
   }
 }
 
@@ -489,4 +489,99 @@ impl i32x4 {
   pub fn as_array_ref(&self) -> &[i32; 4] {
     cast_ref(self)
   }
+
+  /// Converts the first two i32 elements within this struct to i64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i64x2(self) -> i64x2 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        i64x2 { sse: convert_to_i64_m128i_from_lower2_i32_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i64x2::new([
+          i64::from(arr[0]),
+          i64::from(arr[1]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the i32 elements within this struct to i64 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_i64x4(self) -> i64x4 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        i64x4 { avx2: convert_to_i64_m256i_from_i32_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i64x4::new([
+          i64::from(arr[0]),
+          i64::from(arr[1]),
+          i64::from(arr[2]),
+          i64::from(arr[3]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first two i32 elements within this struct to f64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_f64x2(self) -> f64x2 {
+    pick! {
+      if #[cfg(target_feature="sse2")] {
+        f64x2 { sse: convert_to_m128d_from_lower2_i32_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        f64x2::new([
+          f64::from(arr[0]),
+          f64::from(arr[1]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the i32 elements within this struct to f64 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_f64x4(self) -> f64x4 {
+    pick! {
+      if #[cfg(target_feature="avx")] {
+        f64x4 { avx: convert_to_m256d_from_i32_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        f64x4::new([
+          f64::from(arr[0]),
+          f64::from(arr[1]),
+          f64::from(arr[2]),
+          f64::from(arr[3]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the i32 elements within this struct to f32 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_f32x4(self) -> f32x4 {
+    pick! {
+      if #[cfg(target_feature="sse2")] {
+        f32x4 { sse: convert_to_m128_from_i32_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        f32x4::new([
+          arr[0] as f32,
+          arr[1] as f32,
+          arr[2] as f32,
+          arr[3] as f32,
+        ])
+      }
+    }
+  }
 }
diff --git a/src/i32x8_.rs b/src/i32x8_.rs
index dcab8712..a4f8ae91 100644
--- a/src/i32x8_.rs
+++ b/src/i32x8_.rs
@@ -4,7 +4,7 @@ pick! {
   if #[cfg(target_feature="avx2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct i32x8 { avx2: m256i }
+    pub struct i32x8 { pub(crate) avx2: m256i }
   } else if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
@@ -14,7 +14,7 @@ pick! {
 
     #[derive(Clone, Copy)]
     #[repr(C, align(32))]
-    pub struct i32x8 { simd0: v128, simd1: v128 }
+    pub struct i32x8 { pub(crate) simd0: v128, pub(crate) simd1: v128 }
 
     impl Default for i32x8 {
       fn default() -> Self {
@@ -32,7 +32,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct i32x8 { arr: [i32;8] }
+    pub struct i32x8 { pub(crate) arr: [i32;8] }
   }
 }
 
@@ -586,6 +586,29 @@ impl i32x8 {
   pub fn as_array_ref(&self) -> &[i32; 8] {
     cast_ref(self)
   }
+
+  /// Converts the i32 elements within this struct to f32 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_f32x8(self) -> f32x8 {
+    pick! {
+      if #[cfg(target_feature="avx")] {
+        f32x8 { avx: convert_to_m256_from_i32_m256i(self.avx2) }
+      } else {
+        let arr = self.to_array();
+        f32x8::new([
+          arr[0] as f32,
+          arr[1] as f32,
+          arr[2] as f32,
+          arr[3] as f32,
+          arr[4] as f32,
+          arr[5] as f32,
+          arr[6] as f32,
+          arr[7] as f32,
+        ])
+      }
+    }
+  }
 }
 
 impl Not for i32x8 {
diff --git a/src/i64x2_.rs b/src/i64x2_.rs
index 5a3cc18b..e1e0820e 100644
--- a/src/i64x2_.rs
+++ b/src/i64x2_.rs
@@ -4,13 +4,13 @@ pick! {
   if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct i64x2 { sse: m128i }
+    pub struct i64x2 { pub(crate) sse: m128i }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(transparent)]
-    pub struct i64x2 { simd: v128 }
+    pub struct i64x2 { pub(crate) simd: v128 }
 
     impl Default for i64x2 {
       fn default() -> Self {
@@ -28,7 +28,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct i64x2 { arr: [i64;2] }
+    pub struct i64x2 { pub(crate) arr: [i64;2] }
   }
 }
 
diff --git a/src/i64x4_.rs b/src/i64x4_.rs
index d5aabb13..e7f9cd51 100644
--- a/src/i64x4_.rs
+++ b/src/i64x4_.rs
@@ -4,17 +4,17 @@ pick! {
   if #[cfg(target_feature="avx2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct i64x4 { avx2: m256i }
+    pub struct i64x4 { pub(crate) avx2: m256i }
   } else if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct i64x4 { sse0: m128i, sse1: m128i }
+    pub struct i64x4 { pub(crate) sse0: m128i, pub(crate) sse1: m128i }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(C, align(32))]
-    pub struct i64x4 { simd0: v128, simd1: v128 }
+    pub struct i64x4 { pub(crate) simd0: v128, pub(crate) simd1: v128 }
 
     impl Default for i64x4 {
       fn default() -> Self {
@@ -32,7 +32,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct i64x4 { arr: [i64;4] }
+    pub struct i64x4 { pub(crate) arr: [i64;4] }
   }
 }
 
diff --git a/src/i8x16_.rs b/src/i8x16_.rs
index c924a6a4..7240a3b3 100644
--- a/src/i8x16_.rs
+++ b/src/i8x16_.rs
@@ -4,13 +4,13 @@ pick! {
   if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct i8x16 { sse: m128i }
+    pub struct i8x16 { pub(crate) sse: m128i }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(transparent)]
-    pub struct i8x16 { simd: v128 }
+    pub struct i8x16 { pub(crate) simd: v128 }
 
     impl Default for i8x16 {
       fn default() -> Self {
@@ -28,7 +28,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct i8x16 { arr: [i8;16] }
+    pub struct i8x16 { pub(crate) arr: [i8;16] }
   }
 }
 
@@ -488,4 +488,146 @@ impl i8x16 {
   pub fn as_array_ref(&self) -> &[i8; 16] {
     cast_ref(self)
   }
+
+  /// Converts the first eight i8 elements within this struct to i16 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i16x8(self) -> i16x8 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        i16x8 { sse: convert_to_i16_m128i_from_lower8_i8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i16x8::new([
+          i16::from(arr[0]),
+          i16::from(arr[1]),
+          i16::from(arr[2]),
+          i16::from(arr[3]),
+          i16::from(arr[4]),
+          i16::from(arr[5]),
+          i16::from(arr[6]),
+          i16::from(arr[7]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the i8 elements within this struct to i16 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_i16x16(self) -> i16x16 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        i16x16 { avx2: convert_to_i16_m256i_from_i8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i16x16::new([
+          i16::from(arr[0]),
+          i16::from(arr[1]),
+          i16::from(arr[2]),
+          i16::from(arr[3]),
+          i16::from(arr[4]),
+          i16::from(arr[5]),
+          i16::from(arr[6]),
+          i16::from(arr[7]),
+          i16::from(arr[8]),
+          i16::from(arr[9]),
+          i16::from(arr[10]),
+          i16::from(arr[11]),
+          i16::from(arr[12]),
+          i16::from(arr[13]),
+          i16::from(arr[14]),
+          i16::from(arr[15]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first four i8 elements within this struct to i32 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x4(self) -> i32x4 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        i32x4 { sse: convert_to_i32_m128i_from_lower4_i8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i32x4::new([
+          i32::from(arr[0]),
+          i32::from(arr[1]),
+          i32::from(arr[2]),
+          i32::from(arr[3]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first eight i8 elements within this struct to i32 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x8(self) -> i32x8 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        i32x8 { avx2: convert_to_i32_m256i_from_lower8_i8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i32x8::new([
+          i32::from(arr[0]),
+          i32::from(arr[1]),
+          i32::from(arr[2]),
+          i32::from(arr[3]),
+          i32::from(arr[4]),
+          i32::from(arr[5]),
+          i32::from(arr[6]),
+          i32::from(arr[7]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first two i8 elements within this struct to i64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i64x2(self) -> i64x2 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        i64x2 { sse: convert_to_i64_m128i_from_lower2_i8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i64x2::new([
+          i64::from(arr[0]),
+          i64::from(arr[1]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first four i8 elements within this struct to i64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i64x4(self) -> i64x4 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        i64x4 { avx2: convert_to_i64_m256i_from_lower4_i8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i64x4::new([
+          i64::from(arr[0]),
+          i64::from(arr[1]),
+          i64::from(arr[2]),
+          i64::from(arr[3]),
+        ])
+      }
+    }
+  }
 }
diff --git a/src/i8x32_.rs b/src/i8x32_.rs
index fdff9ede..31786fd3 100644
--- a/src/i8x32_.rs
+++ b/src/i8x32_.rs
@@ -4,17 +4,17 @@ pick! {
   if #[cfg(target_feature="avx2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct i8x32 { avx: m256i }
+    pub struct i8x32 { pub(crate) avx: m256i }
   } else if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct i8x32 { sse0: m128i, sse1: m128i }
+    pub struct i8x32 { pub(crate) sse0: m128i, pub(crate) sse1: m128i }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(C, align(32))]
-    pub struct i8x32 { simd0: v128, simd1: v128 }
+    pub struct i8x32 { pub(crate) simd0: v128, pub(crate) simd1: v128 }
 
     impl Default for i8x32 {
       fn default() -> Self {
@@ -32,7 +32,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct i8x32 { arr: [i8;32] }
+    pub struct i8x32 { pub(crate) arr: [i8;32] }
   }
 }
 
diff --git a/src/u16x8_.rs b/src/u16x8_.rs
index b800f28e..6d69eba1 100644
--- a/src/u16x8_.rs
+++ b/src/u16x8_.rs
@@ -4,13 +4,13 @@ pick! {
   if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct u16x8 { sse: m128i }
+    pub struct u16x8 { pub(crate) sse: m128i }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(transparent)]
-    pub struct u16x8 { simd: v128 }
+    pub struct u16x8 { pub(crate) simd: v128 }
 
     impl Default for u16x8 {
       fn default() -> Self {
@@ -28,7 +28,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct u16x8 { arr: [u16;8] }
+    pub struct u16x8 { pub(crate) arr: [u16;8] }
   }
 }
 
@@ -414,4 +414,172 @@ impl u16x8 {
   pub fn as_array_ref(&self) -> &[u16; 8] {
     cast_ref(self)
   }
+
+  /// Converts the first four u16 elements within this struct to u32 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_u32x4(self) -> u32x4 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        u32x4 { sse: convert_to_u32_m128i_from_lower4_u16_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        u32x4::new([
+          u32::from(arr[0]),
+          u32::from(arr[1]),
+          u32::from(arr[2]),
+          u32::from(arr[3]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first four u16 elements within this struct to i32 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x4(self) -> i32x4 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        i32x4 { sse: convert_to_u32_m128i_from_lower4_u16_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i32x4::new([
+          i32::from(arr[0]),
+          i32::from(arr[1]),
+          i32::from(arr[2]),
+          i32::from(arr[3]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the u16 elements within this struct to u32 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_u32x8(self) -> u32x8 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        u32x8 { avx2: convert_to_i32_m256i_from_u16_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        u32x8::new([
+          u32::from(arr[0]),
+          u32::from(arr[1]),
+          u32::from(arr[2]),
+          u32::from(arr[3]),
+          u32::from(arr[4]),
+          u32::from(arr[5]),
+          u32::from(arr[6]),
+          u32::from(arr[7]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the u16 elements within this struct to i32 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x8(self) -> i32x8 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        i32x8 { avx2: convert_to_i32_m256i_from_u16_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i32x8::new([
+          i32::from(arr[0]),
+          i32::from(arr[1]),
+          i32::from(arr[2]),
+          i32::from(arr[3]),
+          i32::from(arr[4]),
+          i32::from(arr[5]),
+          i32::from(arr[6]),
+          i32::from(arr[7]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first two u16 elements within this struct to u64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_u64x2(self) -> u64x2 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        u64x2 { sse: convert_to_u64_m128i_from_lower2_u16_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        u64x2::new([
+          u64::from(arr[0]),
+          u64::from(arr[1]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first two u16 elements within this struct to i64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i64x2(self) -> i64x2 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        i64x2 { sse: convert_to_u64_m128i_from_lower2_u16_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i64x2::new([
+          i64::from(arr[0]),
+          i64::from(arr[1]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first four u16 elements within this struct to u64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_u64x4(self) -> u64x4 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        u64x4 { avx2: convert_to_i64_m256i_from_lower4_u16_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        u64x4::new([
+          u64::from(arr[0]),
+          u64::from(arr[1]),
+          u64::from(arr[2]),
+          u64::from(arr[3]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first four u16 elements within this struct to i64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i64x4(self) -> i64x4 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        i64x4 { avx2: convert_to_i64_m256i_from_lower4_u16_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i64x4::new([
+          i64::from(arr[0]),
+          i64::from(arr[1]),
+          i64::from(arr[2]),
+          i64::from(arr[3]),
+        ])
+      }
+    }
+  }
 }
diff --git a/src/u32x4_.rs b/src/u32x4_.rs
index f7f673ad..0c06dc3c 100644
--- a/src/u32x4_.rs
+++ b/src/u32x4_.rs
@@ -4,13 +4,13 @@ pick! {
   if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct u32x4 { sse: m128i }
+    pub struct u32x4 { pub(crate) sse: m128i }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(transparent)]
-    pub struct u32x4 { simd: v128 }
+    pub struct u32x4 { pub(crate) simd: v128 }
 
     impl Default for u32x4 {
       fn default() -> Self {
@@ -28,7 +28,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct u32x4 { arr: [u32;4] }
+    pub struct u32x4 { pub(crate) arr: [u32;4] }
   }
 }
 
@@ -372,4 +372,137 @@ impl u32x4 {
   pub fn as_array_ref(&self) -> &[u32; 4] {
     cast_ref(self)
   }
+
+  /// Converts the first two u32 elements within this struct to u64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_u64x2(self) -> u64x2 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        u64x2 { sse: convert_to_u64_m128i_from_lower2_u32_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        u64x2::new([
+          u64::from(arr[0]),
+          u64::from(arr[1]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first two u32 elements within this struct to i64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i64x2(self) -> i64x2 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        i64x2 { sse: convert_to_u64_m128i_from_lower2_u32_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i64x2::new([
+          i64::from(arr[0]),
+          i64::from(arr[1]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the u32 elements within this struct to u64 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_u64x4(self) -> u64x4 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        u64x4 { avx2: convert_to_i64_m256i_from_lower4_u16_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        u64x4::new([
+          u64::from(arr[0]),
+          u64::from(arr[1]),
+          u64::from(arr[2]),
+          u64::from(arr[3]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the u32 elements within this struct to i64 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_i64x4(self) -> i64x4 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        i64x4 { avx2: convert_to_i64_m256i_from_lower4_u16_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i64x4::new([
+          i64::from(arr[0]),
+          i64::from(arr[1]),
+          i64::from(arr[2]),
+          i64::from(arr[3]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first two u32 elements within this struct to f64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_f64x2(self) -> f64x2 {
+    pick! {
+      if #[cfg(target_feature="sse2")] {
+        f64x2 { sse: convert_to_m128d_from_lower2_i32_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        f64x2::new([
+          f64::from(arr[0]),
+          f64::from(arr[1]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the u32 elements within this struct to f64 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_f64x4(self) -> f64x4 {
+    pick! {
+      if #[cfg(target_feature="avx")] {
+        f64x4 { avx: convert_to_m256d_from_i32_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        f64x4::new([
+          f64::from(arr[0]),
+          f64::from(arr[1]),
+          f64::from(arr[2]),
+          f64::from(arr[3]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the u32 elements within this struct to f32 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_f32x4(self) -> f32x4 {
+    pick! {
+      if #[cfg(target_feature="sse2")] {
+        f32x4 { sse: convert_to_m128_from_i32_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        f32x4::new([
+          arr[0] as f32,
+          arr[1] as f32,
+          arr[2] as f32,
+          arr[3] as f32,
+        ])
+      }
+    }
+  }
 }
diff --git a/src/u32x8_.rs b/src/u32x8_.rs
index 4934d8e0..97cd2cf9 100644
--- a/src/u32x8_.rs
+++ b/src/u32x8_.rs
@@ -4,17 +4,17 @@ pick! {
   if #[cfg(target_feature="avx2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct u32x8 { avx2: m256i }
+    pub struct u32x8 { pub(crate) avx2: m256i }
   } else if #[cfg(target_feature="sse")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct u32x8 { sse0: m128i, sse1: m128i }
+    pub struct u32x8 { pub(crate) sse0: m128i, pub(crate) sse1: m128i }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(C, align(32))]
-    pub struct u32x8 { simd0: v128, simd1: v128 }
+    pub struct u32x8 { pub(crate) simd0: v128, pub(crate) simd1: v128 }
 
     impl Default for u32x8 {
       fn default() -> Self {
@@ -32,7 +32,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct u32x8 { arr: [u32;8] }
+    pub struct u32x8 { pub(crate) arr: [u32;8] }
   }
 }
 
@@ -422,6 +422,29 @@ impl u32x8 {
   pub fn as_array_ref(&self) -> &[u32; 8] {
     cast_ref(self)
   }
+
+  /// Converts the u32 elements within this struct to f32 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_f32x8(self) -> f32x8 {
+    pick! {
+      if #[cfg(target_feature="avx")] {
+        f32x8 { avx: convert_to_m256_from_i32_m256i(self.avx2) }
+      } else {
+        let arr = self.to_array();
+        f32x8::new([
+          arr[0] as f32,
+          arr[1] as f32,
+          arr[2] as f32,
+          arr[3] as f32,
+          arr[4] as f32,
+          arr[5] as f32,
+          arr[6] as f32,
+          arr[7] as f32,
+        ])
+      }
+    }
+  }
 }
 
 impl Not for u32x8 {
diff --git a/src/u64x2_.rs b/src/u64x2_.rs
index ad706feb..1a77bc97 100644
--- a/src/u64x2_.rs
+++ b/src/u64x2_.rs
@@ -4,13 +4,13 @@ pick! {
   if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct u64x2 { sse: m128i }
+    pub struct u64x2 { pub(crate) sse: m128i }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(transparent)]
-    pub struct u64x2 { simd: v128 }
+    pub struct u64x2 { pub(crate) simd: v128 }
 
     impl Default for u64x2 {
       fn default() -> Self {
@@ -28,7 +28,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct u64x2 { arr: [u64;2] }
+    pub struct u64x2 { pub(crate) arr: [u64;2] }
   }
 }
 
diff --git a/src/u64x4_.rs b/src/u64x4_.rs
index 0171e981..1c4f5fd9 100644
--- a/src/u64x4_.rs
+++ b/src/u64x4_.rs
@@ -4,17 +4,17 @@ pick! {
   if #[cfg(target_feature="avx2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct u64x4 { avx2: m256i }
+    pub struct u64x4 { pub(crate) avx2: m256i }
   } else if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct u64x4 { sse0: m128i, sse1: m128i }
+    pub struct u64x4 { pub(crate) sse0: m128i, sse1: m128i }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(C, align(32))]
-    pub struct u64x4 { simd0: v128, simd1: v128 }
+    pub struct u64x4 { pub(crate) simd0: v128, pub(crate) simd1: v128 }
 
     impl Default for u64x4 {
       fn default() -> Self {
@@ -32,7 +32,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(32))]
-    pub struct u64x4 { arr: [u64;4] }
+    pub struct u64x4 { pub(crate) arr: [u64;4] }
   }
 }
 
diff --git a/src/u8x16_.rs b/src/u8x16_.rs
index c21ca005..0af3c4c2 100644
--- a/src/u8x16_.rs
+++ b/src/u8x16_.rs
@@ -4,13 +4,13 @@ pick! {
   if #[cfg(target_feature="sse2")] {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct u8x16 { sse: m128i }
+    pub struct u8x16 { pub(crate) sse: m128i }
   } else if #[cfg(target_feature="simd128")] {
     use core::arch::wasm32::*;
 
     #[derive(Clone, Copy)]
     #[repr(transparent)]
-    pub struct u8x16 { simd: v128 }
+    pub struct u8x16 { pub(crate) simd: v128 }
 
     impl Default for u8x16 {
       fn default() -> Self {
@@ -28,7 +28,7 @@ pick! {
   } else {
     #[derive(Default, Clone, Copy, PartialEq, Eq)]
     #[repr(C, align(16))]
-    pub struct u8x16 { arr: [u8;16] }
+    pub struct u8x16 { pub(crate) arr: [u8;16] }
   }
 }
 
@@ -362,4 +362,265 @@ impl u8x16 {
   pub fn as_array_ref(&self) -> &[u8; 16] {
     cast_ref(self)
   }
+
+  /// Converts the first eight u8 elements within this struct to u16 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_u16x8(self) -> u16x8 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        u16x8 { sse: convert_to_u16_m128i_from_lower8_u8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        u16x8::new([
+          u16::from(arr[0]),
+          u16::from(arr[1]),
+          u16::from(arr[2]),
+          u16::from(arr[3]),
+          u16::from(arr[4]),
+          u16::from(arr[5]),
+          u16::from(arr[6]),
+          u16::from(arr[7]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first eight u8 elements within this struct to i16 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i16x8(self) -> i16x8 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        i16x8 { sse: convert_to_u16_m128i_from_lower8_u8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i16x8::new([
+          i16::from(arr[0]),
+          i16::from(arr[1]),
+          i16::from(arr[2]),
+          i16::from(arr[3]),
+          i16::from(arr[4]),
+          i16::from(arr[5]),
+          i16::from(arr[6]),
+          i16::from(arr[7]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the u8 elements within this struct to i16 elements.
+  #[inline]
+  #[must_use]
+  pub fn to_i16x16(self) -> i16x16 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        i16x16 { avx2: convert_to_i16_m256i_from_u8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i16x16::new([
+          i16::from(arr[0]),
+          i16::from(arr[1]),
+          i16::from(arr[2]),
+          i16::from(arr[3]),
+          i16::from(arr[4]),
+          i16::from(arr[5]),
+          i16::from(arr[6]),
+          i16::from(arr[7]),
+          i16::from(arr[8]),
+          i16::from(arr[9]),
+          i16::from(arr[10]),
+          i16::from(arr[11]),
+          i16::from(arr[12]),
+          i16::from(arr[13]),
+          i16::from(arr[14]),
+          i16::from(arr[15]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first four u8 elements within this struct to u32 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_u32x4(self) -> u32x4 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        u32x4 { sse: convert_to_u32_m128i_from_lower4_u8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        u32x4::new([
+          u32::from(arr[0]),
+          u32::from(arr[1]),
+          u32::from(arr[2]),
+          u32::from(arr[3]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first four u8 elements within this struct to i32 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x4(self) -> i32x4 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        i32x4 { sse: convert_to_u32_m128i_from_lower4_u8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i32x4::new([
+          i32::from(arr[0]),
+          i32::from(arr[1]),
+          i32::from(arr[2]),
+          i32::from(arr[3]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first eight u8 elements within this struct to u32 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_u32x8(self) -> u32x8 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        // This function is named wrong in `safe_arch`.
+        // It calls `_mm256_cvtepu8_epi32`.
+        u32x8 { avx2: convert_to_i16_m256i_from_lower8_u8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        u32x8::new([
+          u32::from(arr[0]),
+          u32::from(arr[1]),
+          u32::from(arr[2]),
+          u32::from(arr[3]),
+          u32::from(arr[4]),
+          u32::from(arr[5]),
+          u32::from(arr[6]),
+          u32::from(arr[7]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first eight u8 elements within this struct to i32 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i32x8(self) -> i32x8 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        // This function is named wrong in `safe_arch`.
+        // It calls `_mm256_cvtepu8_epi32`.
+        i32x8 { avx2: convert_to_i16_m256i_from_lower8_u8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i32x8::new([
+          i32::from(arr[0]),
+          i32::from(arr[1]),
+          i32::from(arr[2]),
+          i32::from(arr[3]),
+          i32::from(arr[4]),
+          i32::from(arr[5]),
+          i32::from(arr[6]),
+          i32::from(arr[7]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first two u8 elements within this struct to u64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_u64x2(self) -> u64x2 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        u64x2 { sse: convert_to_u64_m128i_from_lower2_u8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        u64x2::new([
+          u64::from(arr[0]),
+          u64::from(arr[1]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first two u8 elements within this struct to i64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i64x2(self) -> i64x2 {
+    pick! {
+      if #[cfg(target_feature="sse4.1")] {
+        i64x2 { sse: convert_to_u64_m128i_from_lower2_u8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i64x2::new([
+          i64::from(arr[0]),
+          i64::from(arr[1]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first four u8 elements within this struct to u64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_u64x4(self) -> u64x4 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        // This function is named wrong in `safe_arch`.
+        // It calls `_mm256_cvtepu8_epi64`.
+        u64x4 { avx2: convert_to_i16_m256i_from_lower4_u8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        u64x4::new([
+          u64::from(arr[0]),
+          u64::from(arr[1]),
+          u64::from(arr[2]),
+          u64::from(arr[3]),
+        ])
+      }
+    }
+  }
+
+  /// Converts the first four u8 elements within this struct to i64 elements.
+  ///
+  /// The remaining elements will be discarded.
+  #[inline]
+  #[must_use]
+  pub fn to_i64x4(self) -> i64x4 {
+    pick! {
+      if #[cfg(target_feature="avx2")] {
+        // This function is named wrong in `safe_arch`.
+        // It calls `_mm256_cvtepu8_epi64`.
+        i64x4 { avx2: convert_to_i16_m256i_from_lower4_u8_m128i(self.sse) }
+      } else {
+        let arr = self.to_array();
+        i64x4::new([
+          i64::from(arr[0]),
+          i64::from(arr[1]),
+          i64::from(arr[2]),
+          i64::from(arr[3]),
+        ])
+      }
+    }
+  }
 }