diff --git a/internal/core/src/exec/expression/BinaryRangeExpr.cpp b/internal/core/src/exec/expression/BinaryRangeExpr.cpp index 17fa00ae92..057fed18cc 100644 --- a/internal/core/src/exec/expression/BinaryRangeExpr.cpp +++ b/internal/core/src/exec/expression/BinaryRangeExpr.cpp @@ -722,9 +722,7 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJsonStats() { } } }; - if (!index->CanSkipShared(pointer)) { - index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); - } + index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); cached_index_chunk_id_ = 0; } diff --git a/internal/core/src/exec/expression/ExistsExpr.cpp b/internal/core/src/exec/expression/ExistsExpr.cpp index a1c8bfb297..d475bd4659 100644 --- a/internal/core/src/exec/expression/ExistsExpr.cpp +++ b/internal/core/src/exec/expression/ExistsExpr.cpp @@ -216,16 +216,14 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegmentByStats() { res_view |= temp_valid_view; } - if (!index->CanSkipShared(pointer)) { - // process shared data, need to check if the value is empty - // which match the semantics of exists in Json.h - index->ExecuteForSharedData( - op_ctx_, - pointer, - [&](BsonView bson, uint32_t row_id, uint32_t offset) { - res_view[row_id] = !bson.IsBsonValueEmpty(offset); - }); - } + // process shared data, need to check if the value is empty + // which match the semantics of exists in Json.h + index->ExecuteForSharedData( + op_ctx_, + pointer, + [&](BsonView bson, uint32_t row_id, uint32_t offset) { + res_view[row_id] = !bson.IsBsonValueEmpty(offset); + }); } TargetBitmap result; diff --git a/internal/core/src/exec/expression/JsonContainsExpr.cpp b/internal/core/src/exec/expression/JsonContainsExpr.cpp index f9a03ab06d..94b6de2755 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.cpp +++ b/internal/core/src/exec/expression/JsonContainsExpr.cpp @@ -508,9 +508,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsByStats() { } } }; - if (!index->CanSkipShared(pointer)) { - index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); - } + index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); cached_index_chunk_id_ = 0; } @@ -709,9 +707,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArrayByStats() { } return false; }; - if (!index->CanSkipShared(pointer)) { - index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); - } + index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); cached_index_chunk_id_ = 0; } @@ -1046,9 +1042,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllByStats() { } res_view[row_offset] = tmp_elements.empty(); }; - if (!index->CanSkipShared(pointer)) { - index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); - } + index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); cached_index_chunk_id_ = 0; } @@ -1377,9 +1371,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffTypeByStats() { } res_view[row_offset] = tmp_elements_index.size() == 0; }; - if (!index->CanSkipShared(pointer)) { - index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); - } + index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); cached_index_chunk_id_ = 0; } @@ -1584,9 +1576,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArrayByStats() { res_view[row_offset] = exist_elements_index.size() == elements.size(); }; - if (!index->CanSkipShared(pointer)) { - index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); - } + index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); cached_index_chunk_id_ = 0; } @@ -1890,9 +1880,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffTypeByStats() { } } }; - if (!index->CanSkipShared(pointer)) { - index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); - } + index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); cached_index_chunk_id_ = 0; } diff --git a/internal/core/src/exec/expression/TermExpr.cpp b/internal/core/src/exec/expression/TermExpr.cpp index ece522624f..5544cd47db 100644 --- a/internal/core/src/exec/expression/TermExpr.cpp +++ b/internal/core/src/exec/expression/TermExpr.cpp @@ -687,9 +687,7 @@ PhyTermFilterExpr::ExecJsonInVariableByStats() { return; } }; - if (!index->CanSkipShared(pointer)) { - index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); - } + index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); cached_index_chunk_id_ = 0; } diff --git a/internal/core/src/exec/expression/UnaryExpr.cpp b/internal/core/src/exec/expression/UnaryExpr.cpp index a62ae71cb0..3cbfd24d5e 100644 --- a/internal/core/src/exec/expression/UnaryExpr.cpp +++ b/internal/core/src/exec/expression/UnaryExpr.cpp @@ -1265,9 +1265,7 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJsonByStats() { ms); }); - if (!index->CanSkipShared(pointer, target_types)) { - index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); - } + index->ExecuteForSharedData(op_ctx_, pointer, shared_executor); } // for NotEqual: flip the result diff --git a/internal/core/src/index/json_stats/JsonKeyStats.cpp b/internal/core/src/index/json_stats/JsonKeyStats.cpp index 2471cbf111..57d57370ad 100644 --- a/internal/core/src/index/json_stats/JsonKeyStats.cpp +++ b/internal/core/src/index/json_stats/JsonKeyStats.cpp @@ -809,8 +809,12 @@ JsonKeyStats::GetCommonMetaFromParquet(const std::string& file) { try { auto layout_type_json = nlohmann::json::parse(value); for (const auto& [k, v] : layout_type_json.items()) { - field_layout_type_map_[k] = JsonKeyLayoutTypeFromString(v); - key_data_type_map_[k] = GetJsonTypeFromKeyName(k); + auto layout_type = JsonKeyLayoutTypeFromString(v); + // Only store metadata for shredding columns (TYPED/DYNAMIC), + // skip SHARED keys to save memory + if (layout_type == JsonKeyLayoutType::SHARED) { + continue; + } key_field_map_[GetKeyFromColumnName(k)].insert(k); } } catch (const std::exception& e) { @@ -1062,4 +1066,4 @@ JsonKeyStats::Upload(const Config& config) { std::move(index_files)); } -} // namespace milvus::index \ No newline at end of file +} // namespace milvus::index diff --git a/internal/core/src/index/json_stats/JsonKeyStats.h b/internal/core/src/index/json_stats/JsonKeyStats.h index 4615ace0d3..49af1a33ab 100644 --- a/internal/core/src/index/json_stats/JsonKeyStats.h +++ b/internal/core/src/index/json_stats/JsonKeyStats.h @@ -168,6 +168,9 @@ class JsonKeyStats : public ScalarIndex { const std::string& path, std::function func) { + if (shared_column_ == nullptr || bson_inverted_index_ == nullptr) { + return; + } bson_inverted_index_->TermQuery( path, [this, &func, op_ctx](const uint32_t* row_id_array, @@ -181,6 +184,9 @@ class JsonKeyStats : public ScalarIndex { void ExecuteExistsPathForSharedData(const std::string& path, TargetBitmapView bitset) { + if (bson_inverted_index_ == nullptr) { + return; + } bson_inverted_index_->TermQueryEach( path, [&bitset](uint32_t row_id, uint32_t offset) { bitset[row_id] = true; @@ -302,53 +308,6 @@ class JsonKeyStats : public ScalarIndex { return processed_size; } - // Whether shared columns can be skipped for this path (type-agnostic) - bool - CanSkipShared(const std::string& path) { - auto it = key_field_map_.find(path); - if (it == key_field_map_.end()) { - return true; - } - - const auto& field_names = it->second; - for (const auto& field_name : field_names) { - if (field_layout_type_map_[field_name] == - JsonKeyLayoutType::SHARED) { - return false; - } - } - - return true; - } - - // Whether shared columns can be skipped for this path with type filter - bool - CanSkipShared(const std::string& path, - const std::set& target_types) { - auto it = key_field_map_.find(path); - if (it == key_field_map_.end()) { - return true; - } - - const auto& field_names = it->second; - for (const auto& field_name : field_names) { - if (field_layout_type_map_[field_name] != - JsonKeyLayoutType::SHARED) { - continue; - } - - if (!target_types.empty() && - target_types.find(key_data_type_map_[field_name]) == - target_types.end()) { - continue; - } - - return false; - } - - return true; - } - std::set GetShreddingFields(const std::string& pointer) { std::set fields; @@ -679,14 +638,10 @@ class JsonKeyStats : public ScalarIndex { milvus::proto::common::LoadPriority load_priority_; // some meta cache for searching - // json_path -> [json_path_int, json_path_array, json_path_object, ...], only for all keys + // json_path -> [json_path_int, json_path_string, ...], only for shredding columns std::unordered_map> key_field_map_; // field_name -> data_type, such as json_path_int -> JSONType::INT64, only for real shredding columns std::unordered_map shred_field_data_type_map_; - // key_name -> data_type, such as json_path_int -> JSONType::INT64, for all keys - std::unordered_map key_data_type_map_; - // field_name -> key_type, such as json_path_int -> JsonKeyLayoutType::TYPED, for all keys - std::unordered_map field_layout_type_map_; // field_name -> field_id, such as json_path_int -> 1001 std::unordered_map field_name_to_id_map_; // field_id -> field_name, such as 1001 -> json_path_int diff --git a/internal/core/unittest/test_json_stats/test_json_key_stats.cpp b/internal/core/unittest/test_json_stats/test_json_key_stats.cpp index eaae69fedc..aef4c74717 100644 --- a/internal/core/unittest/test_json_stats/test_json_key_stats.cpp +++ b/internal/core/unittest/test_json_stats/test_json_key_stats.cpp @@ -267,10 +267,7 @@ TEST_P(JsonKeyStatsTest, TestExecutorForGettingValid) { index_->ExecutorForGettingValid(nullptr, field, valid_res_view); EXPECT_EQ(processed_size, size_); } - if (!index_->CanSkipShared(path)) { - std::cout << "can not skip shared" << std::endl; - index_->ExecuteExistsPathForSharedData(path, valid_res_view); - } + index_->ExecuteExistsPathForSharedData(path, valid_res_view); std::cout << "valid_res.count(): " << valid_res.count() << std::endl; if (nullable_) { EXPECT_EQ(valid_res.count(), 400); @@ -323,15 +320,6 @@ TEST_P(JsonKeyStatsTest, TestGetShreddingFields) { EXPECT_FALSE(typed_fields.empty()); } -TEST_P(JsonKeyStatsTest, TestCanSkipShared) { - std::string path = "/int"; - std::set target_types = {JSONType::INT64}; - EXPECT_TRUE(index_->CanSkipShared(path, target_types)); - - target_types = {JSONType::STRING}; - EXPECT_TRUE(index_->CanSkipShared(path, target_types)); -} - class JsonKeyStatsUploadLoadTest : public ::testing::Test { protected: void