enhance: [2.5]Make GeometryCache an optional configuration (#45197)

issue: #45187 
master pr: #45192

---------

Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
This commit is contained in:
cai.zhang 2025-11-03 20:31:34 +08:00 committed by GitHub
parent dfcef7d14d
commit cc9735ff4f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 187 additions and 95 deletions

View File

@ -425,6 +425,7 @@ queryNode:
memExpansionRate: 1.15 # extra memory needed by building interim index memExpansionRate: 1.15 # extra memory needed by building interim index
buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num
multipleChunkedEnable: true # Enable multiple chunked search multipleChunkedEnable: true # Enable multiple chunked search
enableGeometryCache: false # Enable geometry cache for geometry data
deleteDumpBatchSize: 10000 # Batch size for delete snapshot dump in segcore. deleteDumpBatchSize: 10000 # Batch size for delete snapshot dump in segcore.
knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic
jsonKeyStatsCommitInterval: 200 # the commit interval for the JSON key Stats to commit jsonKeyStatsCommitInterval: 200 # the commit interval for the JSON key Stats to commit

View File

@ -114,7 +114,7 @@ class SimpleGeometryCacheManager {
SimpleGeometryCacheManager() = default; SimpleGeometryCacheManager() = default;
SimpleGeometryCache& SimpleGeometryCache&
GetCache(int64_t segment_id, FieldId field_id) { GetOrCreateCache(int64_t segment_id, FieldId field_id) {
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
auto key = MakeCacheKey(segment_id, field_id); auto key = MakeCacheKey(segment_id, field_id);
auto it = caches_.find(key); auto it = caches_.find(key);
@ -128,6 +128,17 @@ class SimpleGeometryCacheManager {
return *cache_ptr; return *cache_ptr;
} }
SimpleGeometryCache*
GetCache(int64_t segment_id, FieldId field_id) {
std::lock_guard<std::mutex> lock(mutex_);
auto key = MakeCacheKey(segment_id, field_id);
auto it = caches_.find(key);
if (it != caches_.end()) {
return it->second.get();
}
return nullptr;
}
void void
RemoveCache(GEOSContextHandle_t ctx, int64_t segment_id, FieldId field_id) { RemoveCache(GEOSContextHandle_t ctx, int64_t segment_id, FieldId field_id) {
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
@ -184,26 +195,4 @@ class SimpleGeometryCacheManager {
} // namespace exec } // namespace exec
// Convenient global functions for direct access to geometry cache
inline const Geometry*
GetGeometryByOffset(int64_t segment_id, FieldId field_id, size_t offset) {
auto& cache = exec::SimpleGeometryCacheManager::Instance().GetCache(
segment_id, field_id);
return cache.GetByOffset(offset);
}
inline void
RemoveGeometryCache(GEOSContextHandle_t ctx,
int64_t segment_id,
FieldId field_id) {
exec::SimpleGeometryCacheManager::Instance().RemoveCache(
ctx, segment_id, field_id);
}
inline void
RemoveSegmentGeometryCaches(GEOSContextHandle_t ctx, int64_t segment_id) {
exec::SimpleGeometryCacheManager::Instance().RemoveSegmentCaches(
ctx, segment_id);
}
} // namespace milvus } // namespace milvus

View File

@ -20,44 +20,55 @@
namespace milvus { namespace milvus {
namespace exec { namespace exec {
#define GEOMETRY_EXECUTE_SUB_BATCH_WITH_COMPARISON(_DataType, method) \ #define GEOMETRY_EXECUTE_SUB_BATCH_WITH_COMPARISON(_DataType, method) \
auto execute_sub_batch = [this](const _DataType* data, \ auto execute_sub_batch = [this](const _DataType* data, \
const bool* valid_data, \ const bool* valid_data, \
const int32_t* offsets, \ const int32_t* offsets, \
const int32_t* segment_offsets, \ const int32_t* segment_offsets, \
const int size, \ const int size, \
TargetBitmapView res, \ TargetBitmapView res, \
TargetBitmapView valid_res, \ TargetBitmapView valid_res, \
const Geometry& right_source) { \ const Geometry& right_source) { \
AssertInfo(segment_offsets != nullptr, \ AssertInfo(segment_offsets != nullptr, \
"segment_offsets should not be nullptr"); \ "segment_offsets should not be nullptr"); \
/* Unified path using simple WKB-content-based cache for both sealed and growing segments. */ \ auto* geometry_cache = \
auto& geometry_cache = \ SimpleGeometryCacheManager::Instance().GetCache( \
SimpleGeometryCacheManager::Instance().GetCache( \ this->segment_->get_segment_id(), field_id_); \
this->segment_->get_segment_id(), field_id_); \ if (geometry_cache) { \
auto cache_lock = geometry_cache.AcquireReadLock(); \ auto cache_lock = geometry_cache->AcquireReadLock(); \
for (int i = 0; i < size; ++i) { \ for (int i = 0; i < size; ++i) { \
if (valid_data != nullptr && !valid_data[i]) { \ if (valid_data != nullptr && !valid_data[i]) { \
res[i] = valid_res[i] = false; \ res[i] = valid_res[i] = false; \
continue; \ continue; \
} \ } \
auto absolute_offset = segment_offsets[i]; \ auto absolute_offset = segment_offsets[i]; \
auto cached_geometry = \ auto cached_geometry = \
geometry_cache.GetByOffsetUnsafe(absolute_offset); \ geometry_cache->GetByOffsetUnsafe(absolute_offset); \
AssertInfo(cached_geometry != nullptr, \ AssertInfo(cached_geometry != nullptr, \
"cached geometry is nullptr"); \ "cached geometry is nullptr"); \
res[i] = cached_geometry->method(right_source); \ res[i] = cached_geometry->method(right_source); \
} \ } \
}; \ } else { \
int64_t processed_size = ProcessDataChunks<_DataType, true>( \ GEOSContextHandle_t ctx_ = GEOS_init_r(); \
execute_sub_batch, std::nullptr_t{}, res, valid_res, right_source); \ for (int i = 0; i < size; ++i) { \
AssertInfo(processed_size == real_batch_size, \ if (valid_data != nullptr && !valid_data[i]) { \
"internal error: expr processed rows {} not equal " \ res[i] = valid_res[i] = false; \
"expect batch size {}", \ continue; \
processed_size, \ } \
real_batch_size); \ res[i] = Geometry(ctx_, data[i].data(), data[i].size()) \
.method(right_source); \
} \
GEOS_finish_r(ctx_); \
} \
}; \
int64_t processed_size = ProcessDataChunks<_DataType, true>( \
execute_sub_batch, std::nullptr_t{}, res, valid_res, right_source); \
AssertInfo(processed_size == real_batch_size, \
"internal error: expr processed rows {} not equal " \
"expect batch size {}", \
processed_size, \
real_batch_size); \
return res_vec; return res_vec;
// Specialized macro for distance-based operations (ST_DWITHIN) // Specialized macro for distance-based operations (ST_DWITHIN)
#define GEOMETRY_EXECUTE_SUB_BATCH_WITH_COMPARISON_DISTANCE(_DataType, method) \ #define GEOMETRY_EXECUTE_SUB_BATCH_WITH_COMPARISON_DISTANCE(_DataType, method) \
auto execute_sub_batch = [this](const _DataType* data, \ auto execute_sub_batch = [this](const _DataType* data, \
@ -70,21 +81,35 @@ namespace exec {
const Geometry& right_source) { \ const Geometry& right_source) { \
AssertInfo(segment_offsets != nullptr, \ AssertInfo(segment_offsets != nullptr, \
"segment_offsets should not be nullptr"); \ "segment_offsets should not be nullptr"); \
auto& geometry_cache = \ auto* geometry_cache = \
SimpleGeometryCacheManager::Instance().GetCache( \ SimpleGeometryCacheManager::Instance().GetCache( \
this->segment_->get_segment_id(), field_id_); \ this->segment_->get_segment_id(), field_id_); \
auto cache_lock = geometry_cache.AcquireReadLock(); \ if (geometry_cache) { \
for (int i = 0; i < size; ++i) { \ auto cache_lock = geometry_cache->AcquireReadLock(); \
if (valid_data != nullptr && !valid_data[i]) { \ for (int i = 0; i < size; ++i) { \
res[i] = valid_res[i] = false; \ if (valid_data != nullptr && !valid_data[i]) { \
continue; \ res[i] = valid_res[i] = false; \
continue; \
} \
auto absolute_offset = segment_offsets[i]; \
auto cached_geometry = \
geometry_cache->GetByOffsetUnsafe(absolute_offset); \
AssertInfo(cached_geometry != nullptr, \
"cached geometry is nullptr"); \
res[i] = \
cached_geometry->method(right_source, expr_->distance_); \
} \ } \
auto absolute_offset = segment_offsets[i]; \ } else { \
auto cached_geometry = \ GEOSContextHandle_t ctx_ = GEOS_init_r(); \
geometry_cache.GetByOffsetUnsafe(absolute_offset); \ for (int i = 0; i < size; ++i) { \
AssertInfo(cached_geometry != nullptr, \ if (valid_data != nullptr && !valid_data[i]) { \
"cached geometry is nullptr"); \ res[i] = valid_res[i] = false; \
res[i] = cached_geometry->method(right_source, expr_->distance_); \ continue; \
} \
res[i] = Geometry(ctx_, data[i].data(), data[i].size()) \
.method(right_source, expr_->distance_); \
} \
GEOS_finish_r(ctx_); \
} \ } \
}; \ }; \
int64_t processed_size = ProcessDataChunks<_DataType, true>( \ int64_t processed_size = ProcessDataChunks<_DataType, true>( \
@ -358,21 +383,22 @@ PhyGISFunctionFilterExpr::EvalForIndexSegment() {
}; };
// Lambda: Process sealed segment data using bulk_subscript with SimpleGeometryCache // Lambda: Process sealed segment data using bulk_subscript with SimpleGeometryCache
auto process_sealed_data = auto process_sealed_data = [&](const std::vector<int64_t>&
[&](const std::vector<int64_t>& hit_offsets) { hit_offsets) {
if (hit_offsets.empty()) if (hit_offsets.empty())
return; return;
// Get simple geometry cache for this segment+field // Get simple geometry cache for this segment+field
auto& geometry_cache = auto* geometry_cache =
SimpleGeometryCacheManager::Instance().GetCache( SimpleGeometryCacheManager::Instance().GetCache(
segment_->get_segment_id(), field_id_); segment_->get_segment_id(), field_id_);
auto cache_lock = geometry_cache.AcquireReadLock(); if (geometry_cache) {
auto cache_lock = geometry_cache->AcquireReadLock();
for (size_t i = 0; i < hit_offsets.size(); ++i) { for (size_t i = 0; i < hit_offsets.size(); ++i) {
const auto pos = hit_offsets[i]; const auto pos = hit_offsets[i];
auto cached_geometry = auto cached_geometry =
geometry_cache.GetByOffsetUnsafe(pos); geometry_cache->GetByOffsetUnsafe(pos);
// skip invalid geometry // skip invalid geometry
if (cached_geometry == nullptr) { if (cached_geometry == nullptr) {
continue; continue;
@ -384,7 +410,35 @@ PhyGISFunctionFilterExpr::EvalForIndexSegment() {
refined.set(pos); refined.set(pos);
} }
} }
}; } else {
auto data_array = segment_->bulk_subscript(
field_id_, hit_offsets.data(), hit_offsets.size());
auto geometry_array =
static_cast<const milvus::proto::schema::GeometryArray*>(
&data_array->scalars().geometry_data());
const auto& valid_data = data_array->valid_data();
GEOSContextHandle_t ctx = GEOS_init_r();
for (size_t i = 0; i < hit_offsets.size(); ++i) {
const auto pos = hit_offsets[i];
// Skip invalid data
if (!valid_data.empty() && !valid_data[i]) {
continue;
}
const auto& wkb_data = geometry_array->data(i);
Geometry left(ctx, wkb_data.data(), wkb_data.size());
bool result = evaluate_geometry(left, query_geometry);
if (result) {
refined.set(pos);
}
}
GEOS_finish_r(ctx);
}
};
auto hit_offsets = collect_hits(); auto hit_offsets = collect_hits();
process_sealed_data(hit_offsets); process_sealed_data(hit_offsets);

View File

@ -2238,8 +2238,8 @@ ChunkedSegmentSealedImpl::LoadGeometryCache(
try { try {
// Get geometry cache for this segment+field // Get geometry cache for this segment+field
auto& geometry_cache = auto& geometry_cache =
milvus::exec::SimpleGeometryCacheManager::Instance().GetCache( milvus::exec::SimpleGeometryCacheManager::Instance()
get_segment_id(), field_id); .GetOrCreateCache(get_segment_id(), field_id);
// Iterate through all chunks and collect WKB data // Iterate through all chunks and collect WKB data
auto num_chunks = var_column.num_chunks(); auto num_chunks = var_column.num_chunks();

View File

@ -147,6 +147,16 @@ class SegcoreConfig {
return refine_with_quant_flag_; return refine_with_quant_flag_;
} }
void
set_enable_geometry_cache(bool enable_geometry_cache) {
enable_geometry_cache_ = enable_geometry_cache;
}
bool
get_enable_geometry_cache() const {
return enable_geometry_cache_;
}
private: private:
inline static const std::unordered_set<std::string> inline static const std::unordered_set<std::string>
valid_dense_vector_index_type = { valid_dense_vector_index_type = {
@ -165,6 +175,7 @@ class SegcoreConfig {
inline static knowhere::RefineType refine_type_ = inline static knowhere::RefineType refine_type_ =
knowhere::RefineType::DATA_VIEW; knowhere::RefineType::DATA_VIEW;
inline static bool refine_with_quant_flag_ = false; inline static bool refine_with_quant_flag_ = false;
inline static bool enable_geometry_cache_ = false;
}; };
} // namespace milvus::segcore } // namespace milvus::segcore

View File

@ -179,7 +179,8 @@ SegmentGrowingImpl::Insert(int64_t reserved_offset,
} }
// Build geometry cache for GEOMETRY fields // Build geometry cache for GEOMETRY fields
if (field_meta.get_data_type() == DataType::GEOMETRY) { if (field_meta.get_data_type() == DataType::GEOMETRY &&
segcore_config_.get_enable_geometry_cache()) {
BuildGeometryCacheForInsert( BuildGeometryCacheForInsert(
field_id, field_id,
&insert_record_proto->fields_data(data_offset), &insert_record_proto->fields_data(data_offset),
@ -324,7 +325,8 @@ SegmentGrowingImpl::LoadFieldData(const LoadFieldDataInfo& infos) {
} }
// Build geometry cache for GEOMETRY fields // Build geometry cache for GEOMETRY fields
if (field_meta.get_data_type() == DataType::GEOMETRY) { if (field_meta.get_data_type() == DataType::GEOMETRY &&
segcore_config_.get_enable_geometry_cache()) {
BuildGeometryCacheForLoad(field_id, field_data); BuildGeometryCacheForLoad(field_id, field_data);
} }
@ -1028,8 +1030,8 @@ SegmentGrowingImpl::BuildGeometryCacheForInsert(FieldId field_id,
try { try {
// Get geometry cache for this segment+field // Get geometry cache for this segment+field
auto& geometry_cache = auto& geometry_cache =
milvus::exec::SimpleGeometryCacheManager::Instance().GetCache( milvus::exec::SimpleGeometryCacheManager::Instance()
get_segment_id(), field_id); .GetOrCreateCache(get_segment_id(), field_id);
// Process geometry data from DataArray // Process geometry data from DataArray
const auto& geometry_data = data_array->scalars().geometry_data(); const auto& geometry_data = data_array->scalars().geometry_data();
@ -1071,8 +1073,8 @@ SegmentGrowingImpl::BuildGeometryCacheForLoad(
try { try {
// Get geometry cache for this segment+field // Get geometry cache for this segment+field
auto& geometry_cache = auto& geometry_cache =
milvus::exec::SimpleGeometryCacheManager::Instance().GetCache( milvus::exec::SimpleGeometryCacheManager::Instance()
get_segment_id(), field_id); .GetOrCreateCache(get_segment_id(), field_id);
// Process each field data chunk // Process each field data chunk
for (const auto& data : field_data) { for (const auto& data : field_data) {

View File

@ -447,7 +447,9 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) {
field_data_size = var_column->DataByteSize(); field_data_size = var_column->DataByteSize();
// Construct GeometryCache for the entire field // Construct GeometryCache for the entire field
LoadGeometryCache(field_id, *var_column); if (segcore_config_.get_enable_geometry_cache()) {
LoadGeometryCache(field_id, *var_column);
}
column = std::move(var_column); column = std::move(var_column);
break; break;
@ -623,7 +625,9 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) {
var_column->Seal(std::move(indices)); var_column->Seal(std::move(indices));
// Construct GeometryCache for the entire field (mmap mode) // Construct GeometryCache for the entire field (mmap mode)
LoadGeometryCache(field_id, *var_column); if (segcore_config_.get_enable_geometry_cache()) {
LoadGeometryCache(field_id, *var_column);
}
column = std::move(var_column); column = std::move(var_column);
break; break;
@ -2275,8 +2279,8 @@ SegmentSealedImpl::LoadGeometryCache(
try { try {
// Get geometry cache for this segment+field // Get geometry cache for this segment+field
auto& geometry_cache = auto& geometry_cache =
milvus::exec::SimpleGeometryCacheManager::Instance().GetCache( milvus::exec::SimpleGeometryCacheManager::Instance()
get_segment_id(), field_id); .GetOrCreateCache(get_segment_id(), field_id);
// Get all string views from the single chunk // Get all string views from the single chunk
auto [string_views, valid_data] = var_column.StringViews(); auto [string_views, valid_data] = var_column.StringViews();

View File

@ -40,6 +40,13 @@ SegcoreSetEnableInterminSegmentIndex(const bool value) {
config.set_enable_interim_segment_index(value); config.set_enable_interim_segment_index(value);
} }
extern "C" void
SegcoreSetEnableGeometryCache(const bool value) {
milvus::segcore::SegcoreConfig& config =
milvus::segcore::SegcoreConfig::default_config();
config.set_enable_geometry_cache(value);
}
extern "C" void extern "C" void
SegcoreSetNlist(const int64_t value) { SegcoreSetNlist(const int64_t value) {
milvus::segcore::SegcoreConfig& config = milvus::segcore::SegcoreConfig& config =

View File

@ -28,6 +28,9 @@ SegcoreSetChunkRows(const int64_t);
void void
SegcoreSetEnableInterminSegmentIndex(const bool); SegcoreSetEnableInterminSegmentIndex(const bool);
void
SegcoreSetEnableGeometryCache(const bool);
void void
SegcoreSetNlist(const int64_t); SegcoreSetNlist(const int64_t);

View File

@ -351,6 +351,11 @@ func (node *QueryNode) InitSegcore() error {
return err return err
} }
err = initcore.InitGeometryCache(paramtable.Get())
if err != nil {
return err
}
initcore.InitTraceConfig(paramtable.Get()) initcore.InitTraceConfig(paramtable.Get())
C.InitExecExpressionFunctionFactory() C.InitExecExpressionFunctionFactory()
return nil return nil

View File

@ -266,6 +266,12 @@ func InitInterminIndexConfig(params *paramtable.ComponentParam) error {
return HandleCStatus(&status, "InitInterminIndexConfig failed") return HandleCStatus(&status, "InitInterminIndexConfig failed")
} }
func InitGeometryCache(params *paramtable.ComponentParam) error {
enableGeometryCache := C.bool(params.QueryNodeCfg.EnableGeometryCache.GetAsBool())
C.SegcoreSetEnableGeometryCache(enableGeometryCache)
return nil
}
func CleanRemoteChunkManager() { func CleanRemoteChunkManager() {
C.CleanRemoteChunkManagerSingleton() C.CleanRemoteChunkManagerSingleton()
} }

View File

@ -2790,6 +2790,7 @@ type queryNodeConfig struct {
InterimIndexMemExpandRate ParamItem `refreshable:"false"` InterimIndexMemExpandRate ParamItem `refreshable:"false"`
InterimIndexBuildParallelRate ParamItem `refreshable:"false"` InterimIndexBuildParallelRate ParamItem `refreshable:"false"`
MultipleChunkedEnable ParamItem `refreshable:"false"` MultipleChunkedEnable ParamItem `refreshable:"false"`
EnableGeometryCache ParamItem `refreshable:"false"`
// delete snapshot dump // delete snapshot dump
DeleteDumpBatchSize ParamItem `refreshable:"false"` DeleteDumpBatchSize ParamItem `refreshable:"false"`
@ -3075,6 +3076,15 @@ This defaults to true, indicating that Milvus creates temporary index for growin
} }
p.MultipleChunkedEnable.Init(base.mgr) p.MultipleChunkedEnable.Init(base.mgr)
p.EnableGeometryCache = ParamItem{
Key: "queryNode.segcore.enableGeometryCache",
Version: "2.5.21",
DefaultValue: "false",
Doc: "Enable geometry cache for geometry data",
Export: true,
}
p.EnableGeometryCache.Init(base.mgr)
p.InterimIndexNProbe = ParamItem{ p.InterimIndexNProbe = ParamItem{
Key: "queryNode.segcore.interimIndex.nprobe", Key: "queryNode.segcore.interimIndex.nprobe",
Version: "2.0.0", Version: "2.0.0",