Skip to content

Commit

Permalink
Fix x86
Browse files Browse the repository at this point in the history
Signed-off-by: Heinz N. Gies <heinz@licenser.net>
  • Loading branch information
Licenser committed Aug 31, 2024
1 parent e872158 commit 15c04c8
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 32 deletions.
10 changes: 5 additions & 5 deletions src/impls/avx2/stage1.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
#![allow(dead_code)]
use crate::{static_cast_i32, static_cast_i64, static_cast_u32, Stage1Parse};
use crate::{static_cast_i32, static_cast_i64, static_cast_u32, Stage1Parse, SIMDINPUT_LENGTH};
#[cfg(target_arch = "x86")]
use std::arch::x86 as arch;

#[cfg(target_arch = "x86_64")]
use std::arch::x86_64 as arch;

use arch::{
__m256i, _mm256_add_epi32, _mm256_and_si256, _mm256_cmpeq_epi8, _mm256_loadu_si256,
_mm256_max_epu8, _mm256_movemask_epi8, _mm256_set1_epi8, _mm256_set_epi32, _mm256_setr_epi8,
_mm256_setzero_si256, _mm256_shuffle_epi8, _mm256_srli_epi32, _mm256_storeu_si256,
_mm_clmulepi64_si128, _mm_set1_epi8, _mm_set_epi64x,
__m256i, _mm256_add_epi32, _mm256_and_si256, _mm256_cmpeq_epi8, _mm256_load_si256,
_mm256_loadu_si256, _mm256_max_epu8, _mm256_movemask_epi8, _mm256_set1_epi8, _mm256_set_epi32,
_mm256_setr_epi8, _mm256_setzero_si256, _mm256_shuffle_epi8, _mm256_srli_epi32,
_mm256_storeu_si256, _mm_clmulepi64_si128, _mm_set1_epi8, _mm_set_epi64x,
};

macro_rules! low_nibble_mask {
Expand Down
2 changes: 1 addition & 1 deletion src/impls/native/stage1.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#![allow(clippy::cast_lossless, clippy::cast_sign_loss)]

use crate::{static_cast_i32, Stage1Parse};
use crate::{static_cast_i32, Stage1Parse, SIMDINPUT_LENGTH};

type V128 = [u8; 16];

Expand Down
3 changes: 0 additions & 3 deletions src/impls/neon/stage1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,6 @@ pub unsafe fn neon_movemask_bulk(

// /NEON-SPECIFIC

//pub const SIMDJSON_PADDING: usize = mem::size_of::<uint8x16_t>() * 4;
//pub const SIMDINPUT_LENGTH: usize = 64;

#[derive(Debug)]
pub(crate) struct SimdInput {
v0: uint8x16_t,
Expand Down
2 changes: 1 addition & 1 deletion src/impls/portable/stage1.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::simd::{prelude::*, ToBitMask};

use crate::{static_cast_i32, Stage1Parse};
use crate::{static_cast_i32, Stage1Parse, SIMDINPUT_LENGTH};
#[derive(Debug)]
pub(crate) struct SimdInput {
v: u8x64,
Expand Down
2 changes: 1 addition & 1 deletion src/impls/simd128/stage1.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::Stage1Parse;
use crate::{Stage1Parse, SIMDINPUT_LENGTH};
use std::arch::wasm32::{
i8x16_splat, u32x4, u32x4_add, u32x4_splat, u8x16, u8x16_bitmask, u8x16_eq, u8x16_le,
u8x16_shr, u8x16_splat, u8x16_swizzle, v128, v128_and, v128_load, v128_store,
Expand Down
13 changes: 7 additions & 6 deletions src/impls/sse42/stage1.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{static_cast_i32, static_cast_u32, Stage1Parse};
use crate::{static_cast_i32, static_cast_u32, Stage1Parse, SIMDINPUT_LENGTH};
#[cfg(target_arch = "x86")]
use std::arch::x86 as arch;

Expand All @@ -8,15 +8,16 @@ use std::arch::x86_64 as arch;
#[cfg(target_arch = "x86")]
use arch::{
__m128i, _mm_add_epi32, _mm_and_si128, _mm_cmpeq_epi8, _mm_cmpgt_epi8, _mm_loadu_si128,
_mm_max_epu8, _mm_movemask_epi8, _mm_or_si128, _mm_set1_epi8, _mm_set_epi32, _mm_setr_epi8,
_mm_setzero_si128, _mm_shuffle_epi8, _mm_srli_epi32, _mm_storeu_si128, _mm_testz_si128,
_mm_loadu_si128, _mm_max_epu8, _mm_movemask_epi8, _mm_or_si128, _mm_set1_epi8, _mm_set_epi32,
_mm_setr_epi8, _mm_setzero_si128, _mm_shuffle_epi8, _mm_srli_epi32, _mm_storeu_si128,
_mm_testz_si128,
};

#[cfg(target_arch = "x86_64")]
use arch::{
__m128i, _mm_add_epi32, _mm_and_si128, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_max_epu8,
_mm_movemask_epi8, _mm_set1_epi8, _mm_set_epi32, _mm_setr_epi8, _mm_setzero_si128,
_mm_shuffle_epi8, _mm_srli_epi32, _mm_storeu_si128,
__m128i, _mm_add_epi32, _mm_and_si128, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_loadu_si128,
_mm_max_epu8, _mm_movemask_epi8, _mm_set1_epi8, _mm_set_epi32, _mm_setr_epi8,
_mm_setzero_si128, _mm_shuffle_epi8, _mm_srli_epi32, _mm_storeu_si128,
};

macro_rules! low_nibble_mask {
Expand Down
38 changes: 23 additions & 15 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,8 @@ type ParseStrFn = for<'invoke, 'de> unsafe fn(
any(target_arch = "x86_64", target_arch = "x86"),
))]
type FindStructuralBitsFn = unsafe fn(
input: &[u8],
input: &AlignedBuf,
len: usize,
structural_indexes: &mut Vec<u32>,
) -> std::result::Result<(), ErrorType>;

Expand Down Expand Up @@ -698,7 +699,8 @@ impl<'de> Deserializer<'de> {
any(target_arch = "x86_64", target_arch = "x86"),
))]
pub(crate) unsafe fn find_structural_bits(
input: &[u8],
input: &AlignedBuf,
len: usize,
structural_indexes: &mut Vec<u32>,
) -> std::result::Result<(), ErrorType> {
use std::sync::atomic::{AtomicPtr, Ordering};
Expand All @@ -722,16 +724,17 @@ impl<'de> Deserializer<'de> {

#[cfg_attr(not(feature = "no-inline"), inline)]
unsafe fn get_fastest(
input: &[u8],
input: &AlignedBuf,
len: usize,
structural_indexes: &mut Vec<u32>,
) -> core::result::Result<(), error::ErrorType> {
let fun = get_fastest_available_implementation();
FN.store(fun as FnRaw, Ordering::Relaxed);
(fun)(input, structural_indexes)
(fun)(input, len, structural_indexes)
}

let fun = FN.load(Ordering::Relaxed);
mem::transmute::<FnRaw, FindStructuralBitsFn>(fun)(input, structural_indexes)
mem::transmute::<FnRaw, FindStructuralBitsFn>(fun)(input, len, structural_indexes)
}

#[cfg(not(any(
Expand All @@ -747,7 +750,8 @@ impl<'de> Deserializer<'de> {
)))]
#[cfg_attr(not(feature = "no-inline"), inline)]
pub(crate) unsafe fn find_structural_bits(
input: &[u8],
input: &AlignedBuf,
len: usize,
structural_indexes: &mut Vec<u32>,
) -> std::result::Result<(), ErrorType> {
// This is a nasty hack, we don't have a chunked implementation for native rust
Expand All @@ -757,16 +761,17 @@ impl<'de> Deserializer<'de> {
Err(_) => return Err(ErrorType::InvalidUtf8),
};
#[cfg(not(feature = "portable"))]
Self::_find_structural_bits::<impls::native::SimdInput>(input, structural_indexes)
Self::_find_structural_bits::<impls::native::SimdInput>(input, len, structural_indexes)
}

#[cfg(all(feature = "portable", not(feature = "runtime-detection")))]
#[cfg_attr(not(feature = "no-inline"), inline)]
pub(crate) unsafe fn find_structural_bits(
input: &[u8],
input: &AlignedBuf,
len: usize,
structural_indexes: &mut Vec<u32>,
) -> std::result::Result<(), ErrorType> {
Self::_find_structural_bits::<impls::portable::SimdInput>(input, structural_indexes)
Self::_find_structural_bits::<impls::portable::SimdInput>(input, len, structural_indexes)
}

#[cfg(all(
Expand All @@ -776,10 +781,11 @@ impl<'de> Deserializer<'de> {
))]
#[cfg_attr(not(feature = "no-inline"), inline)]
pub(crate) unsafe fn find_structural_bits(
input: &[u8],
input: &AlignedBuf,
len: usize,
structural_indexes: &mut Vec<u32>,
) -> std::result::Result<(), ErrorType> {
Self::_find_structural_bits::<impls::avx2::SimdInput>(input, structural_indexes)
Self::_find_structural_bits::<impls::avx2::SimdInput>(input, len, structural_indexes)
}

#[cfg(all(
Expand All @@ -790,10 +796,11 @@ impl<'de> Deserializer<'de> {
))]
#[cfg_attr(not(feature = "no-inline"), inline)]
pub(crate) unsafe fn find_structural_bits(
input: &[u8],
input: &AlignedBuf,
len: usize,
structural_indexes: &mut Vec<u32>,
) -> std::result::Result<(), ErrorType> {
Self::_find_structural_bits::<impls::sse42::SimdInput>(input, structural_indexes)
Self::_find_structural_bits::<impls::sse42::SimdInput>(input, len, structural_indexes)
}

#[cfg(all(target_arch = "aarch64", not(feature = "portable")))]
Expand All @@ -809,10 +816,11 @@ impl<'de> Deserializer<'de> {
#[cfg(all(target_feature = "simd128", not(feature = "portable")))]
#[cfg_attr(not(feature = "no-inline"), inline)]
pub(crate) unsafe fn find_structural_bits(
input: &[u8],
input: &AlignedBuf,
len: usize,
structural_indexes: &mut Vec<u32>,
) -> std::result::Result<(), ErrorType> {
Self::_find_structural_bits::<impls::simd128::SimdInput>(input, structural_indexes)
Self::_find_structural_bits::<impls::simd128::SimdInput>(input, len, structural_indexes)
}
}

Expand Down

0 comments on commit 15c04c8

Please sign in to comment.