5#if defined(_ARCH_X86) || defined(_ARCH_X64)
12typedef __m128i simd128;
16_meta_inline simd128 simd128Set1_u8(uint8 val)
18 return _mm_set1_epi8((
char)val);
21_meta_inline simd128 simd128Load(
const void *ptr)
23 return _mm_loadu_si128((
const __m128i*)ptr);
26_meta_inline uint32 simd128CmpEq_u8_mask(simd128 a, simd128 b)
28 return (uint32)_mm_movemask_epi8(_mm_cmpeq_epi8(a, b));
31#elif defined(_ARCH_ARM64)
37typedef uint8x16_t simd128;
39_meta_inline simd128 simd128Set1_u8(uint8 val)
41 return vdupq_n_u8(val);
44_meta_inline simd128 simd128Load(
const void *ptr)
46 return vld1q_u8((
const uint8_t*)ptr);
49_meta_inline uint32 simd128CmpEq_u8_mask(simd128 a, simd128 b)
52 uint8x16_t cmp = vceqq_u8(a, b);
55 static const uint8_t bit_positions[16] = {
56 1<<0, 1<<1, 1<<2, 1<<3, 1<<4, 1<<5, 1<<6, 1<<7,
57 1<<0, 1<<1, 1<<2, 1<<3, 1<<4, 1<<5, 1<<6, 1<<7
60 uint8x16_t bits = vandq_u8(cmp, vld1q_u8(bit_positions));
61 uint8x8_t low = vget_low_u8(bits);
62 uint8x8_t high = vget_high_u8(bits);
64 return vaddv_u8(low) | ((uint32)vaddv_u8(high) << 8);
Compiler and platform detection macros.