From 0118bef2a2fd7e9c8ff3b6d36a27ab540bdebee7 Mon Sep 17 00:00:00 2001 From: zhagnlu <1542303831@qq.com> Date: Wed, 21 Feb 2024 10:04:54 +0800 Subject: [PATCH] fix: replace sse2 simd interface with older version (#30668) #30667 Signed-off-by: luzhang Co-authored-by: luzhang --- internal/core/src/simd/avx512.cpp | 8 ++++---- internal/core/src/simd/sse2.cpp | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/core/src/simd/avx512.cpp b/internal/core/src/simd/avx512.cpp index e1bc4da3ff..2fb8f7f539 100644 --- a/internal/core/src/simd/avx512.cpp +++ b/internal/core/src/simd/avx512.cpp @@ -381,7 +381,7 @@ struct CompareValAVX512Impl { target, (CompareOperator::ComparePredicate)); __m128i cmp_res = _mm_maskz_set1_epi8(mask, 0x01); - _mm_storeu_si64((__m128i*)(res + i), cmp_res); + _mm_storel_epi64((__m128i_u*)(res + i), cmp_res); } for (size_t i = middle; i < size; ++i) { @@ -429,7 +429,7 @@ struct CompareValAVX512Impl { target, (CompareOperator::ComparePredicate)); __m128i cmp_res = _mm_maskz_set1_epi8(cmp_res_mask, 0x01); - _mm_storeu_si64((res + i), cmp_res); + _mm_storel_epi64((__m128i_u*)(res + i), cmp_res); } for (size_t i = middle; i < size; ++i) { @@ -611,7 +611,7 @@ struct CompareColumnAVX512Impl { (CompareOperator::ComparePredicate)); __m128i cmp_res = _mm_maskz_set1_epi8(mask, 0x01); - _mm_storeu_si64((__m128i*)(res + i), cmp_res); + _mm_storel_epi64((__m128i_u*)(res + i), cmp_res); } } @@ -668,7 +668,7 @@ struct CompareColumnAVX512Impl { (CompareOperator::ComparePredicate)); __m128i cmp_res = _mm_maskz_set1_epi8(cmp_res_mask, 0x01); - _mm_storeu_si64((res + i), cmp_res); + _mm_storel_epi64((__m128i_u*)(res + i), cmp_res); } for (size_t i = middle; i < size; ++i) { diff --git a/internal/core/src/simd/sse2.cpp b/internal/core/src/simd/sse2.cpp index 9726aec946..c0060ef856 100644 --- a/internal/core/src/simd/sse2.cpp +++ b/internal/core/src/simd/sse2.cpp @@ -34,9 +34,9 @@ GetBitsetBlockSSE2(const bool* src) { tmp[i] = _mm_movemask_epi8(highbits); } - __m128i tmpvec = _mm_loadu_si64(tmp); + __m128i tmpvec = _mm_loadl_epi64((__m128i_u*)tmp); BitsetBlockType res; - _mm_storeu_si64(&res, tmpvec); + _mm_storel_epi64((__m128i_u*)&res, tmpvec); return res; } else { // Others has 32 bits