From 382281994295309e2cd8a86bca15f92c99b8cd60 Mon Sep 17 00:00:00 2001 From: Bingyi Sun Date: Tue, 31 Dec 2024 10:42:52 +0800 Subject: [PATCH] enhance: Remove an undefined behavior in index writer (#38657) Signed-off-by: sunby --- .../tantivy-binding/src/index_writer_c.rs | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs index 9f8a1f3a61..1c5fe2642a 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs @@ -1,5 +1,8 @@ use core::slice; -use std::ffi::{c_char, c_void, CStr}; +use std::{ + ffi::{c_char, c_void, CStr}, + ptr::null, +}; use tantivy::Index; @@ -12,6 +15,16 @@ use crate::{ util::{create_binding, free_binding}, }; +macro_rules! convert_to_rust_slice { + ($arr: expr, $len: expr) => { + match $arr { + // there is a UB in slice::from_raw_parts if the pointer is null + x if x.is_null() => &[], + _ => slice::from_raw_parts($arr, $len), + } + }; +} + #[no_mangle] pub extern "C" fn tantivy_create_index( field_name: *const c_char, @@ -195,7 +208,7 @@ pub extern "C" fn tantivy_index_add_multi_int8s( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_i8s(arr, offset).into() } } @@ -209,7 +222,7 @@ pub extern "C" fn tantivy_index_add_multi_int16s( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_i16s(arr, offset).into() } } @@ -223,7 +236,7 @@ pub extern "C" fn tantivy_index_add_multi_int32s( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_i32s(arr, offset).into() } } @@ -237,7 +250,7 @@ pub extern "C" fn tantivy_index_add_multi_int64s( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_i64s(arr, offset).into() } } @@ -251,7 +264,7 @@ pub extern "C" fn tantivy_index_add_multi_f32s( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_f32s(arr, offset).into() } } @@ -265,7 +278,7 @@ pub extern "C" fn tantivy_index_add_multi_f64s( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_f64s(arr, offset).into() } } @@ -279,7 +292,7 @@ pub extern "C" fn tantivy_index_add_multi_bools( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_bools(arr, offset).into() } } @@ -293,7 +306,7 @@ pub extern "C" fn tantivy_index_add_multi_keywords( ) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { - let arr = slice::from_raw_parts(array, len); + let arr = convert_to_rust_slice!(array, len); (*real).add_multi_keywords(arr, offset).into() } }