From 982dd2834b6a58aa3669250b53d1bc4ffc234082 Mon Sep 17 00:00:00 2001 From: Patrick Weizhi Xu Date: Thu, 21 Mar 2024 11:19:07 +0800 Subject: [PATCH] enhance: add materialized view search info (#30888) issue: #29892 This PR 1. Pass Materialized View (MV) search information obtained from the expression parsing planning procedure to Knowhere. It only performs when MV is enabled and the partition key is involved in the expression. The search information includes: 1. Touched field_id and the count of related categories in the expression. E.g., `color == red && color == blue` yields `field_id -> 2`. 2. Whether the expression only includes AND (&&) logical operator, default `true`. 3. Whether the expression has NOT (!) operator, default `false`. 4. Store if turning on MV on the proxy to eliminate reading from paramtable for every search request. 5. Renames to MV. ## Rebuttals 1. Did not write in `ExtractInfoPlanNodeVisitor` since the new scalar framework was introduced and this part might be removed in the future. 2. Currently only interested in `==` and `in` expression, `string` data type, anything else is a bonus. 3. Leave handling expressions like `F == A || F == A` for future works of the optimizer. ## Detailed MV Info ![image](https://github.com/milvus-io/milvus/assets/6563846/b27c08a0-9fd3-4474-8897-30a3d6d6b36f) Signed-off-by: Patrick Weizhi Xu --- internal/core/src/common/QueryInfo.h | 1 + internal/core/src/expr/ITypeExpr.h | 144 +++ internal/core/src/plan/PlanNode.h | 12 + internal/core/src/query/PlanProto.cpp | 2 + .../query/visitors/ExecPlanNodeVisitor.cpp | 25 +- internal/core/unittest/CMakeLists.txt | 1 + .../unittest/test_expr_materialized_view.cpp | 975 ++++++++++++++++++ internal/datacoord/index_builder.go | 2 +- internal/datacoord/index_builder_test.go | 10 +- internal/proto/plan.proto | 1 + internal/proxy/impl.go | 11 +- internal/proxy/proxy.go | 5 + internal/proxy/search_util.go | 6 + internal/proxy/task_search.go | 11 +- pkg/util/paramtable/component_param.go | 8 +- 15 files changed, 1193 insertions(+), 21 deletions(-) create mode 100644 internal/core/unittest/test_expr_materialized_view.cpp diff --git a/internal/core/src/common/QueryInfo.h b/internal/core/src/common/QueryInfo.h index 9fd7534976..03d86f9965 100644 --- a/internal/core/src/common/QueryInfo.h +++ b/internal/core/src/common/QueryInfo.h @@ -32,6 +32,7 @@ struct SearchInfo { knowhere::Json search_params_; std::optional group_by_field_id_; tracer::TraceContext trace_ctx_; + bool materialized_view_involved = false; }; using SearchInfoPtr = std::shared_ptr; diff --git a/internal/core/src/expr/ITypeExpr.h b/internal/core/src/expr/ITypeExpr.h index cc66f54947..102709aa16 100644 --- a/internal/core/src/expr/ITypeExpr.h +++ b/internal/core/src/expr/ITypeExpr.h @@ -21,6 +21,7 @@ #include #include +#include "common/Exception.h" #include "common/Schema.h" #include "common/Types.h" #include "common/Utils.h" @@ -29,6 +30,86 @@ namespace milvus { namespace expr { +// Collect information from expressions +struct ExprInfo { + struct GenericValueEqual { + using GenericValue = proto::plan::GenericValue; + bool + operator()(const GenericValue& lhs, const GenericValue& rhs) const { + if (lhs.val_case() != rhs.val_case()) + return false; + switch (lhs.val_case()) { + case GenericValue::kBoolVal: + return lhs.bool_val() == rhs.bool_val(); + case GenericValue::kInt64Val: + return lhs.int64_val() == rhs.int64_val(); + case GenericValue::kFloatVal: + return lhs.float_val() == rhs.float_val(); + case GenericValue::kStringVal: + return lhs.string_val() == rhs.string_val(); + case GenericValue::VAL_NOT_SET: + return true; + default: + throw NotImplementedException( + "Not supported GenericValue type"); + } + } + }; + + struct GenericValueHasher { + using GenericValue = proto::plan::GenericValue; + std::size_t + operator()(const GenericValue& value) const { + std::size_t h = 0; + switch (value.val_case()) { + case GenericValue::kBoolVal: + h = std::hash()(value.bool_val()); + break; + case GenericValue::kInt64Val: + h = std::hash()(value.int64_val()); + break; + case GenericValue::kFloatVal: + h = std::hash()(value.float_val()); + break; + case GenericValue::kStringVal: + h = std::hash()(value.string_val()); + break; + case GenericValue::VAL_NOT_SET: + break; + default: + throw NotImplementedException( + "Not supported GenericValue type"); + } + return h; + } + }; + + /* For Materialized View (vectors and scalars), that is when performing filtered search. */ + // The map describes which scalar field is involved during search, + // and the set of category values + // for example, if we have scalar field `color` with field id `111` and it has three categories: red, green, blue + // expression `color == "red"`, yields `111 -> (red)` + // expression `color == "red" && color == "green"`, yields `111 -> (red, green)` + std::unordered_map> + field_id_to_values; + // whether the search exression has AND (&&) logical operator only + bool is_pure_and = true; + // whether the search expression has NOT (!) logical unary operator + bool has_not = false; +}; + +inline bool +IsMaterializedViewSupported(const DataType& data_type) { + return data_type == DataType::BOOL || data_type == DataType::INT8 || + data_type == DataType::INT16 || data_type == DataType::INT32 || + data_type == DataType::INT64 || data_type == DataType::FLOAT || + data_type == DataType::DOUBLE || data_type == DataType::VARCHAR || + data_type == DataType::STRING; +} + struct ColumnInfo { FieldId field_id_; DataType data_type_; @@ -111,6 +192,9 @@ class ITypeExpr { return inputs_; } + virtual void + GatherInfo(ExprInfo& info) const {}; + protected: DataType type_; std::vector> inputs_; @@ -235,6 +319,31 @@ class UnaryRangeFilterExpr : public ITypeFilterExpr { return ss.str(); } + void + GatherInfo(ExprInfo& info) const override { + if (IsMaterializedViewSupported(column_.data_type_)) { + info.field_id_to_values[column_.field_id_.get()].insert(val_); + + // for expression `Field == Value`, we do nothing else + if (op_type_ == proto::plan::OpType::Equal) { + return; + } + + // for expression `Field != Value`, we consider it equivalent + // as `not (Field == Value)`, so we set `has_not` to true + if (op_type_ == proto::plan::OpType::NotEqual) { + info.has_not = true; + return; + } + + // for other unary range filter <, >, <=, >= + // we add a dummy value to indicate multiple values + // this double insertion is intentional and the default GenericValue + // will be considered as equal in the unordered_set + info.field_id_to_values[column_.field_id_.get()].emplace(); + } + } + public: const ColumnInfo column_; const proto::plan::OpType op_type_; @@ -293,6 +402,15 @@ class LogicalUnaryExpr : public ITypeFilterExpr { inputs_[0]->ToString()); } + void + GatherInfo(ExprInfo& info) const override { + if (op_type_ == OpType::LogicalNot) { + info.has_not = true; + } + assert(inputs_.size() == 1); + inputs_[0]->GatherInfo(info); + } + const OpType op_type_; }; @@ -323,6 +441,14 @@ class TermFilterExpr : public ITypeFilterExpr { return ss.str(); } + void + GatherInfo(ExprInfo& info) const override { + if (IsMaterializedViewSupported(column_.data_type_)) { + info.field_id_to_values[column_.field_id_.get()].insert( + vals_.begin(), vals_.end()); + } + } + public: const ColumnInfo column_; const std::vector vals_; @@ -368,6 +494,16 @@ class LogicalBinaryExpr : public ITypeFilterExpr { return GetOpTypeString(); } + void + GatherInfo(ExprInfo& info) const override { + if (op_type_ == OpType::Or) { + info.is_pure_and = false; + } + assert(inputs_.size() == 2); + inputs_[0]->GatherInfo(info); + inputs_[1]->GatherInfo(info); + } + public: const OpType op_type_; }; @@ -400,6 +536,14 @@ class BinaryRangeFilterExpr : public ITypeFilterExpr { return ss.str(); } + void + GatherInfo(ExprInfo& info) const override { + if (IsMaterializedViewSupported(column_.data_type_)) { + info.field_id_to_values[column_.field_id_.get()].insert(lower_val_); + info.field_id_to_values[column_.field_id_.get()].insert(upper_val_); + } + } + const ColumnInfo column_; const proto::plan::GenericValue lower_val_; const proto::plan::GenericValue upper_val_; diff --git a/internal/core/src/plan/PlanNode.h b/internal/core/src/plan/PlanNode.h index f149b834bd..04cfe5f219 100644 --- a/internal/core/src/plan/PlanNode.h +++ b/internal/core/src/plan/PlanNode.h @@ -63,6 +63,11 @@ class PlanNode { virtual std::string_view name() const = 0; + virtual expr::ExprInfo + GatherInfo() const { + return {}; + }; + private: PlanNodeId id_; }; @@ -243,6 +248,13 @@ class FilterBitsNode : public PlanNode { filter_->ToString()); } + expr::ExprInfo + GatherInfo() const override { + expr::ExprInfo info; + filter_->GatherInfo(info); + return info; + } + private: const std::vector sources_; const expr::TypedExprPtr filter_; diff --git a/internal/core/src/query/PlanProto.cpp b/internal/core/src/query/PlanProto.cpp index 66702469a1..72d788b0db 100644 --- a/internal/core/src/query/PlanProto.cpp +++ b/internal/core/src/query/PlanProto.cpp @@ -202,6 +202,8 @@ ProtoParser::PlanNodeFromProto(const planpb::PlanNode& plan_node_proto) { search_info.round_decimal_ = query_info_proto.round_decimal(); search_info.search_params_ = nlohmann::json::parse(query_info_proto.search_params()); + search_info.materialized_view_involved = + query_info_proto.materialized_view_involved(); if (query_info_proto.group_by_field_id() > 0) { auto group_by_field_id = FieldId(query_info_proto.group_by_field_id()); diff --git a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp index 23e03d1c0e..72efef10f2 100644 --- a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp +++ b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp @@ -11,8 +11,10 @@ #include "query/generated/ExecPlanNodeVisitor.h" +#include #include +#include "expr/ITypeExpr.h" #include "query/PlanImpl.h" #include "query/SubSearchResult.h" #include "query/generated/ExecExprVisitor.h" @@ -24,7 +26,7 @@ #include "exec/Task.h" #include "segcore/SegmentInterface.h" #include "query/GroupByOperator.h" - +#include "knowhere/comp/materialized_view.h" namespace milvus::query { namespace impl { @@ -139,6 +141,11 @@ ExecPlanNodeVisitor::ExecuteExprNodeInternal( // std::cout << bitset_holder->size() << " . " << s << std::endl; } +expr::ExprInfo +GatherInfoBasedOnExpr(const std::shared_ptr& node) { + return node->GatherInfo(); +} + template void ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) { @@ -165,6 +172,22 @@ ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) { std::unique_ptr bitset_holder; if (node.filter_plannode_.has_value()) { + if (node.search_info_.materialized_view_involved) { + knowhere::MaterializedViewSearchInfo materialized_view_search_info; + const auto expr_info = + GatherInfoBasedOnExpr(node.filter_plannode_.value()); + for (const auto& [field_id, vals] : expr_info.field_id_to_values) { + materialized_view_search_info + .field_id_to_touched_categories_cnt[field_id] = vals.size(); + } + materialized_view_search_info.is_pure_and = expr_info.is_pure_and; + materialized_view_search_info.has_not = expr_info.has_not; + + node.search_info_ + .search_params_[knowhere::meta::MATERIALIZED_VIEW_SEARCH_INFO] = + materialized_view_search_info; + } + BitsetType expr_res; ExecuteExprNode( node.filter_plannode_.value(), segment, active_count, expr_res); diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index 3318141200..373a6eb478 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -25,6 +25,7 @@ set(MILVUS_TEST_FILES test_common.cpp test_concurrent_vector.cpp test_c_api.cpp + test_expr_materialized_view.cpp test_expr.cpp test_float16.cpp test_growing.cpp diff --git a/internal/core/unittest/test_expr_materialized_view.cpp b/internal/core/unittest/test_expr_materialized_view.cpp new file mode 100644 index 0000000000..e1262841df --- /dev/null +++ b/internal/core/unittest/test_expr_materialized_view.cpp @@ -0,0 +1,975 @@ +// Copyright (C) 2019-2024 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/FieldDataInterface.h" +#include "common/Schema.h" +#include "common/Types.h" +#include "expr/ITypeExpr.h" +#include "knowhere/comp/index_param.h" +#include "knowhere/comp/materialized_view.h" +#include "knowhere/config.h" +#include "query/Plan.h" +#include "query/PlanImpl.h" +#include "query/generated/ExecPlanNodeVisitor.h" +#include "plan/PlanNode.h" +#include "segcore/SegmentSealed.h" +#include "segcore/SegmentSealedImpl.h" +#include "test_utils/DataGen.h" + +using DataType = milvus::DataType; +using Schema = milvus::Schema; +using FieldName = milvus::FieldName; +using FieldId = milvus::FieldId; + +namespace { + +// DataType::String is not supported in this test +const std::unordered_map kDataTypeFieldName = { + {DataType::VECTOR_FLOAT, "VectorFloatField"}, + {DataType::BOOL, "BoolField"}, + {DataType::INT8, "Int8Field"}, + {DataType::INT16, "Int16Field"}, + {DataType::INT32, "Int32Field"}, + {DataType::INT64, "Int64Field"}, + {DataType::FLOAT, "FloatField"}, + {DataType::DOUBLE, "DoubleField"}, + {DataType::VARCHAR, "VarCharField"}, + {DataType::JSON, "JSONField"}, +}; + +// use field name to get schema pb string +std::string +GetDataTypeSchemapbStr(const DataType& data_type) { + if (kDataTypeFieldName.find(data_type) == kDataTypeFieldName.end()) { + throw std::runtime_error("GetDataTypeSchemapbStr: Invalid data type " + + std::to_string(static_cast(data_type))); + } + + std::string str = kDataTypeFieldName.at(data_type); + str.erase(str.find("Field"), 5); + return str; +} + +constexpr size_t kFieldIdToTouchedCategoriesCntDefault = 0; +constexpr bool kIsPureAndDefault = true; +constexpr bool kHasNotDefault = false; + +const std::string kFieldIdPlaceholder = "FID"; +const std::string kVecFieldIdPlaceholder = "VEC_FID"; +const std::string kDataTypePlaceholder = "DT"; +const std::string kValPlaceholder = "VAL"; +const std::string kPredicatePlaceholder = "PREDICATE_PLACEHOLDER"; +} // namespace + +class ExprMaterializedViewTest : public testing::Test { + public: + // NOTE: If your test fixture defines SetUpTestSuite() or TearDownTestSuite() + // they must be declared public rather than protected in order to use TEST_P. + // https://google.github.io/googletest/advanced.html#value-parameterized-tests + static void + SetUpTestSuite() { + // create schema and assign field_id + schema = std::make_shared(); + for (const auto& [data_type, field_name] : kDataTypeFieldName) { + if (data_type == DataType::VECTOR_FLOAT) { + schema->AddDebugField( + field_name, data_type, kDim, knowhere::metric::L2); + } else { + schema->AddDebugField(field_name, data_type); + } + data_field_info[data_type].field_id = + schema->get_field_id(FieldName(field_name)).get(); + std::cout << field_name << " with id " + << data_field_info[data_type].field_id << std::endl; + } + + // generate data and prepare for search + gen_data = std::make_unique( + milvus::segcore::DataGen(schema, N)); + segment = milvus::segcore::CreateSealedSegment(schema); + auto fields = schema->get_fields(); + milvus::segcore::SealedLoadFieldData(*gen_data, *segment); + exec_plan_node_visitor = + std::make_unique( + *segment, milvus::MAX_TIMESTAMP); + + // prepare plan template + plan_template = R"(vector_anns: < + field_id: VEC_FID + predicates: < + PREDICATE_PLACEHOLDER + > + query_info: < + topk: 1 + round_decimal: 3 + metric_type: "L2" + search_params: "{\"nprobe\": 1}" + > + placeholder_tag: "$0">)"; + const int64_t vec_field_id = + data_field_info[DataType::VECTOR_FLOAT].field_id; + ReplaceAllOccurrence(plan_template, + kVecFieldIdPlaceholder, + std::to_string(vec_field_id)); + + // collect mv supported data type + numeric_str_scalar_data_types.clear(); + for (const auto& e : kDataTypeFieldName) { + if (e.first != DataType::VECTOR_FLOAT && + e.first != DataType::JSON) { + numeric_str_scalar_data_types.insert(e.first); + } + } + } + + static void + TearDownTestSuite() { + } + + protected: + // this function takes an predicate string in schemapb format + // and return a vector search plan + std::unique_ptr + CreatePlan(const std::string& predicate_str) { + auto plan_str = InterpolateTemplate(predicate_str); + auto binary_plan = milvus::segcore::translate_text_plan_to_binary_plan( + plan_str.c_str()); + return milvus::query::CreateSearchPlanByExpr( + *schema, binary_plan.data(), binary_plan.size()); + } + + knowhere::MaterializedViewSearchInfo + ExecutePlan(const std::unique_ptr& plan) { + auto ph_group_raw = milvus::segcore::CreatePlaceholderGroup(1, kDim); + auto ph_group = + ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + segment->Search(plan.get(), ph_group.get(), milvus::MAX_TIMESTAMP); + return plan->plan_node_->search_info_ + .search_params_[knowhere::meta::MATERIALIZED_VIEW_SEARCH_INFO]; + } + + // replace field id, data type and scalar value in a single expr schemapb plan + std::string + InterpolateSingleExpr(const std::string& expr_in, + const DataType& data_type) { + std::string expr = expr_in; + const int64_t field_id = data_field_info[data_type].field_id; + ReplaceAllOccurrence( + expr, kFieldIdPlaceholder, std::to_string(field_id)); + ReplaceAllOccurrence( + expr, kDataTypePlaceholder, GetDataTypeSchemapbStr(data_type)); + + // The user can use value placeholder and numeric values after it to distinguish different values + // eg. VAL1, VAL2, VAL3 should be replaced with different values of the same data type + std::regex pattern("VAL(\\d+)"); + std::string replacement = ""; + while (std::regex_search(expr, pattern)) { + switch (data_type) { + case DataType::BOOL: + ReplaceAllOccurrence(expr, "VAL0", "bool_val:false"); + ReplaceAllOccurrence(expr, "VAL1", "bool_val:true"); + break; + case DataType::INT8: + case DataType::INT16: + case DataType::INT32: + case DataType::INT64: + replacement = "int64_val:$1"; + expr = std::regex_replace(expr, pattern, replacement); + break; + case DataType::FLOAT: + case DataType::DOUBLE: + replacement = "float_val:$1"; + expr = std::regex_replace(expr, pattern, replacement); + break; + case DataType::VARCHAR: + replacement = "string_val:\"str$1\""; + expr = std::regex_replace(expr, pattern, replacement); + break; + case DataType::JSON: + break; + default: + throw std::runtime_error( + "InterpolateSingleExpr: Invalid data type " + + fmt::format("{}", data_type)); + } + + // fmt::print("expr {} data_type {}\n", expr, data_type); + } + return expr; + } + + knowhere::MaterializedViewSearchInfo + TranslateThenExecuteWhenMvInolved(const std::string& predicate_str) { + auto plan = CreatePlan(predicate_str); + plan->plan_node_->search_info_.materialized_view_involved = true; + return ExecutePlan(plan); + } + + knowhere::MaterializedViewSearchInfo + TranslateThenExecuteWhenMvNotInolved(const std::string& predicate_str) { + auto plan = CreatePlan(predicate_str); + plan->plan_node_->search_info_.materialized_view_involved = false; + return ExecutePlan(plan); + } + + static const std::unordered_set& + GetNumericAndVarcharScalarDataTypes() { + return numeric_str_scalar_data_types; + } + + int64_t + GetFieldID(const DataType& data_type) { + if (data_field_info.find(data_type) == data_field_info.end()) { + throw std::runtime_error("Invalid data type " + + fmt::format("{}", data_type)); + } + + return data_field_info[data_type].field_id; + } + + void + TestMvExpectDefault(knowhere::MaterializedViewSearchInfo& mv) { + EXPECT_EQ(mv.field_id_to_touched_categories_cnt.size(), + kFieldIdToTouchedCategoriesCntDefault); + EXPECT_EQ(mv.is_pure_and, kIsPureAndDefault); + EXPECT_EQ(mv.has_not, kHasNotDefault); + } + + static void + ReplaceAllOccurrence(std::string& str, + const std::string& occ, + const std::string& replace) { + str = std::regex_replace(str, std::regex(occ), replace); + } + + private: + std::string + InterpolateTemplate(const std::string& predicate_str) { + std::string plan_str = plan_template; + ReplaceAllOccurrence(plan_str, kPredicatePlaceholder, predicate_str); + return plan_str; + } + + protected: + struct DataFieldInfo { + std::string field_name; + int64_t field_id; + }; + + static std::shared_ptr schema; + static std::unordered_map data_field_info; + + private: + static std::unique_ptr gen_data; + static milvus::segcore::SegmentSealedUPtr segment; + static std::unique_ptr + exec_plan_node_visitor; + static std::unordered_set numeric_str_scalar_data_types; + static std::string plan_template; + + constexpr static size_t N = 1000; + constexpr static size_t kDim = 16; +}; + +std::unordered_map + ExprMaterializedViewTest::data_field_info = {}; +std::shared_ptr ExprMaterializedViewTest::schema = nullptr; +std::unique_ptr + ExprMaterializedViewTest::gen_data = nullptr; +milvus::segcore::SegmentSealedUPtr ExprMaterializedViewTest::segment = nullptr; +std::unique_ptr + ExprMaterializedViewTest::exec_plan_node_visitor = nullptr; +std::unordered_set + ExprMaterializedViewTest::numeric_str_scalar_data_types = {}; +std::string ExprMaterializedViewTest::plan_template = ""; + +/*************** Test Cases Start ***************/ + +// Test plan without expr +// Should return default values +TEST_F(ExprMaterializedViewTest, TestMvNoExpr) { + for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) { + for (const auto& mv_involved : {true, false}) { + std::string plan_str = R"(vector_anns: < + field_id: VEC_FID + query_info: < + topk: 1 + round_decimal: 3 + metric_type: "L2" + search_params: "{\"nprobe\": 1}" + > + placeholder_tag: "$0">)"; + const int64_t vec_field_id = + data_field_info[DataType::VECTOR_FLOAT].field_id; + ReplaceAllOccurrence( + plan_str, kVecFieldIdPlaceholder, std::to_string(vec_field_id)); + auto binary_plan = + milvus::segcore::translate_text_plan_to_binary_plan( + plan_str.c_str()); + auto plan = milvus::query::CreateSearchPlanByExpr( + *schema, binary_plan.data(), binary_plan.size()); + plan->plan_node_->search_info_.materialized_view_involved = + mv_involved; + auto mv = ExecutePlan(plan); + TestMvExpectDefault(mv); + } + } +} + +TEST_F(ExprMaterializedViewTest, TestMvNotInvolvedExpr) { + for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) { + std::string predicate = R"( + term_expr: < + column_info: < + field_id: FID + data_type: DT + > + values: < VAL1 > + > + )"; + predicate = InterpolateSingleExpr(predicate, data_type); + auto plan = CreatePlan(predicate); + plan->plan_node_->search_info_.materialized_view_involved = false; + auto mv = ExecutePlan(plan); + TestMvExpectDefault(mv); + } +} + +TEST_F(ExprMaterializedViewTest, TestMvNotInvolvedJsonExpr) { + std::string predicate = + InterpolateSingleExpr( + R"( json_contains_expr: )", + DataType::JSON) + + InterpolateSingleExpr( + R"( elements: op:Contains elements_same_type:true>)", + DataType::INT64); + auto plan = CreatePlan(predicate); + plan->plan_node_->search_info_.materialized_view_involved = false; + auto mv = ExecutePlan(plan); + TestMvExpectDefault(mv); +} + +// Test json_contains +TEST_F(ExprMaterializedViewTest, TestJsonContainsExpr) { + std::string predicate = + InterpolateSingleExpr( + R"( json_contains_expr: )", + DataType::JSON) + + InterpolateSingleExpr( + R"( elements: op:Contains elements_same_type:true>)", + DataType::INT64); + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + TestMvExpectDefault(mv); +} + +// Test numeric and varchar expr: F0 in [A] +TEST_F(ExprMaterializedViewTest, TestInExpr) { + for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) { + std::string predicate = R"( + term_expr:< + column_info:< + field_id: FID + data_type: DT + > + values:< VAL1 > + > + )"; + predicate = InterpolateSingleExpr(predicate, data_type); + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + // fmt::print("Predicate: {}\n", predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1); + auto field_id = GetFieldID(data_type); + ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) != + mv.field_id_to_touched_categories_cnt.end()); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 1); + EXPECT_EQ(mv.has_not, false); + EXPECT_EQ(mv.is_pure_and, true); + } +} + +// Test numeric and varchar expr: F0 in [A, A, A] +TEST_F(ExprMaterializedViewTest, TestInDuplicatesExpr) { + for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) { + std::string predicate = R"( + term_expr:< + column_info:< + field_id: FID + data_type: DT + > + values:< VAL1 > + values:< VAL1 > + values:< VAL1 > + > + )"; + predicate = InterpolateSingleExpr(predicate, data_type); + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + // fmt::print("Predicate: {}\n", predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1); + auto field_id = GetFieldID(data_type); + ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) != + mv.field_id_to_touched_categories_cnt.end()); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 1); + EXPECT_EQ(mv.has_not, false); + EXPECT_EQ(mv.is_pure_and, true); + } +} + +// Test numeric and varchar expr: F0 not in [A] +TEST_F(ExprMaterializedViewTest, TestUnaryLogicalNotInExpr) { + for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) { + std::string predicate = R"( + unary_expr:< + op:Not + child: < + term_expr:< + column_info:< + field_id: FID + data_type: DT + > + values:< VAL1 > + > + > + > + )"; + predicate = InterpolateSingleExpr(predicate, data_type); + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1); + auto field_id = GetFieldID(data_type); + ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) != + mv.field_id_to_touched_categories_cnt.end()); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 1); + EXPECT_EQ(mv.has_not, true); + EXPECT_EQ(mv.is_pure_and, true); + } +} + +// Test numeric and varchar expr: F0 == A +TEST_F(ExprMaterializedViewTest, TestUnaryRangeEqualExpr) { + for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) { + std::string predicate = R"( + unary_range_expr:< + column_info:< + field_id:FID + data_type: DT + > + op:Equal + value: < VAL1 > + > + )"; + predicate = InterpolateSingleExpr(predicate, data_type); + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1); + auto field_id = GetFieldID(data_type); + ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) != + mv.field_id_to_touched_categories_cnt.end()); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 1); + EXPECT_EQ(mv.has_not, false); + EXPECT_EQ(mv.is_pure_and, true); + } +} + +// Test numeric and varchar expr: F0 != A +TEST_F(ExprMaterializedViewTest, TestUnaryRangeNotEqualExpr) { + for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) { + std::string predicate = R"( + unary_range_expr:< + column_info:< + field_id:FID + data_type: DT + > + op: NotEqual + value: < VAL1 > + > + )"; + predicate = InterpolateSingleExpr(predicate, data_type); + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1); + auto field_id = GetFieldID(data_type); + ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) != + mv.field_id_to_touched_categories_cnt.end()); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 1); + EXPECT_EQ(mv.has_not, true); + EXPECT_EQ(mv.is_pure_and, true); + } +} + +// Test numeric and varchar expr: F0 < A, F0 <= A, F0 > A, F0 >= A +TEST_F(ExprMaterializedViewTest, TestUnaryRangeCompareExpr) { + const std::vector ops = { + "LessThan", "LessEqual", "GreaterThan", "GreaterEqual"}; + for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) { + for (const auto& ops_str : ops) { + std::string predicate = R"( + unary_range_expr:< + column_info:< + field_id:FID + data_type: DT + > + op: )" + ops_str + + R"( + value: < VAL1 > + > + )"; + predicate = InterpolateSingleExpr(predicate, data_type); + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1); + auto field_id = GetFieldID(data_type); + ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) != + mv.field_id_to_touched_categories_cnt.end()); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 2); + EXPECT_EQ(mv.has_not, false); + EXPECT_EQ(mv.is_pure_and, true); + } + } +} + +// Test numeric and varchar expr: F in [A, B, C] +TEST_F(ExprMaterializedViewTest, TestInMultipleExpr) { + for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) { + std::string predicate = R"( + term_expr:< + column_info:< + field_id: FID + data_type: DT + > + values:< VAL0 > + values:< VAL1 > + > + )"; + predicate = InterpolateSingleExpr(predicate, data_type); + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1); + auto field_id = GetFieldID(data_type); + ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) != + mv.field_id_to_touched_categories_cnt.end()); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 2); + EXPECT_EQ(mv.has_not, false); + EXPECT_EQ(mv.is_pure_and, true); + } +} + +// Test numeric and varchar expr: F0 not in [A] +TEST_F(ExprMaterializedViewTest, TestUnaryLogicalNotInMultipleExpr) { + for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) { + std::string predicate = R"( + unary_expr:< + op:Not + child: < + term_expr:< + column_info:< + field_id: FID + data_type: DT + > + values:< VAL0 > + values:< VAL1 > + > + > + > + )"; + predicate = InterpolateSingleExpr(predicate, data_type); + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1); + auto field_id = GetFieldID(data_type); + ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) != + mv.field_id_to_touched_categories_cnt.end()); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 2); + EXPECT_EQ(mv.has_not, true); + EXPECT_EQ(mv.is_pure_and, true); + } +} + +// Test expr: F0 == A && F1 == B +TEST_F(ExprMaterializedViewTest, TestEqualAndEqualExpr) { + const DataType c0_data_type = DataType::VARCHAR; + const DataType c1_data_type = DataType::INT32; + std::string c0 = R"( + unary_range_expr:< + column_info:< + field_id:FID + data_type: DT + > + op:Equal + value: < VAL1 > + > + )"; + c0 = InterpolateSingleExpr(c0, c0_data_type); + std::string c1 = R"( + unary_range_expr:< + column_info:< + field_id:FID + data_type: DT + > + op:Equal + value: < VAL2 > + > + )"; + c1 = InterpolateSingleExpr(c1, c1_data_type); + std::string predicate = R"( + binary_expr:< + op:LogicalAnd + left: <)" + c0 + + R"(> + right: <)" + c1 + + R"(> + > + )"; + + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 2); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c0_data_type)], + 1); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c1_data_type)], + 1); + EXPECT_EQ(mv.has_not, false); + EXPECT_EQ(mv.is_pure_and, true); +} + +// Test expr: F0 == A && F1 in [A, B] +TEST_F(ExprMaterializedViewTest, TestEqualAndInExpr) { + const DataType c0_data_type = DataType::VARCHAR; + const DataType c1_data_type = DataType::INT32; + + std::string c0 = R"( + unary_range_expr:< + column_info:< + field_id:FID + data_type: DT + > + op:Equal + value: < VAL1 > + > + )"; + c0 = InterpolateSingleExpr(c0, c0_data_type); + std::string c1 = R"( + term_expr:< + column_info:< + field_id: FID + data_type: DT + > + values:< VAL1 > + values:< VAL2 > + > + )"; + c1 = InterpolateSingleExpr(c1, c1_data_type); + std::string predicate = R"( + binary_expr:< + op:LogicalAnd + left: <)" + c0 + + R"(> + right: <)" + c1 + + R"(> + > + )"; + + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 2); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c0_data_type)], + 1); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c1_data_type)], + 2); + EXPECT_EQ(mv.has_not, false); + EXPECT_EQ(mv.is_pure_and, true); +} + +// Test expr: F0 == A && F1 not in [A, B] +TEST_F(ExprMaterializedViewTest, TestEqualAndNotInExpr) { + const DataType c0_data_type = DataType::VARCHAR; + const DataType c1_data_type = DataType::INT32; + + std::string c0 = R"( + unary_range_expr:< + column_info:< + field_id:FID + data_type: DT + > + op:Equal + value: < VAL1 > + > + )"; + c0 = InterpolateSingleExpr(c0, c0_data_type); + std::string c1 = R"( + unary_expr:< + op:Not + child: < + term_expr:< + column_info:< + field_id: FID + data_type: DT + > + values:< VAL1 > + values:< VAL2 > + > + > + > + )"; + c1 = InterpolateSingleExpr(c1, c1_data_type); + std::string predicate = R"( + binary_expr:< + op:LogicalAnd + left: <)" + c0 + + R"(> + right: <)" + c1 + + R"(> + > + )"; + + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 2); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c0_data_type)], + 1); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c1_data_type)], + 2); + EXPECT_EQ(mv.has_not, true); + EXPECT_EQ(mv.is_pure_and, true); +} + +// Test expr: F0 == A || F1 == B +TEST_F(ExprMaterializedViewTest, TestEqualOrEqualExpr) { + const DataType c0_data_type = DataType::VARCHAR; + const DataType c1_data_type = DataType::INT32; + + std::string c0 = R"( + unary_range_expr:< + column_info:< + field_id:FID + data_type: DT + > + op:Equal + value: < VAL1 > + > + )"; + c0 = InterpolateSingleExpr(c0, c0_data_type); + std::string c1 = R"( + unary_range_expr:< + column_info:< + field_id:FID + data_type: DT + > + op:Equal + value: < VAL2 > + > + )"; + c1 = InterpolateSingleExpr(c1, c1_data_type); + std::string predicate = R"( + binary_expr:< + op:LogicalOr + left: <)" + c0 + + R"(> + right: <)" + c1 + + R"(> + > + )"; + + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 2); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c0_data_type)], + 1); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c1_data_type)], + 1); + EXPECT_EQ(mv.has_not, false); + EXPECT_EQ(mv.is_pure_and, false); +} + +// Test expr: F0 == A && F1 in [A, B] || F2 == A +TEST_F(ExprMaterializedViewTest, TestEqualAndInOrEqualExpr) { + const DataType c0_data_type = DataType::VARCHAR; + const DataType c1_data_type = DataType::INT32; + const DataType c2_data_type = DataType::INT16; + + std::string c0 = R"( + unary_range_expr:< + column_info:< + field_id:FID + data_type: DT + > + op:Equal + value: < VAL1 > + > + )"; + c0 = InterpolateSingleExpr(c0, c0_data_type); + std::string c1 = R"( + term_expr:< + column_info:< + field_id: FID + data_type: DT + > + values:< VAL1 > + values:< VAL2 > + > + )"; + c1 = InterpolateSingleExpr(c1, c1_data_type); + std::string c2 = R"( + unary_range_expr:< + column_info:< + field_id:FID + data_type: DT + > + op:Equal + value: < VAL3 > + > + )"; + c2 = InterpolateSingleExpr(c2, c2_data_type); + + std::string predicate = R"( + binary_expr:< + op:LogicalAnd + left: <)" + c0 + + R"(> + right: < + binary_expr:< + op:LogicalOr + left: <)" + + c1 + + R"(> + right: <)" + + c2 + + R"(> + > + > + > + )"; + + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 3); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c0_data_type)], + 1); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c1_data_type)], + 2); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c2_data_type)], + 1); + EXPECT_EQ(mv.has_not, false); + EXPECT_EQ(mv.is_pure_and, false); +} + +// Test expr: F0 == A && not (F1 == B) || F2 == A +TEST_F(ExprMaterializedViewTest, TestEqualAndNotEqualOrEqualExpr) { + const DataType c0_data_type = DataType::VARCHAR; + const DataType c1_data_type = DataType::INT32; + const DataType c2_data_type = DataType::INT16; + + std::string c0 = R"( + unary_range_expr:< + column_info:< + field_id:FID + data_type: DT + > + op:Equal + value: < VAL1 > + > + )"; + c0 = InterpolateSingleExpr(c0, c0_data_type); + std::string c1 = R"( + unary_expr:< + op:Not + child: < + unary_range_expr:< + column_info:< + field_id:FID + data_type: DT + > + op:Equal + value: < VAL2 > + > + > + > + )"; + c1 = InterpolateSingleExpr(c1, c1_data_type); + std::string c2 = R"( + unary_range_expr:< + column_info:< + field_id:FID + data_type: DT + > + op:Equal + value: < VAL1 > + > + )"; + c2 = InterpolateSingleExpr(c2, c2_data_type); + + std::string predicate = R"( + binary_expr:< + op:LogicalAnd + left: <)" + c0 + + R"(> + right: < + binary_expr:< + op:LogicalOr + left: <)" + + c1 + + R"(> + right: <)" + + c2 + + R"(> + > + > + > + )"; + + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 3); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c0_data_type)], + 1); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c1_data_type)], + 1); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[GetFieldID(c2_data_type)], + 1); + EXPECT_EQ(mv.has_not, true); + EXPECT_EQ(mv.is_pure_and, false); +} + +// Test expr: A < F0 < B +TEST_F(ExprMaterializedViewTest, TestBinaryRangeExpr) { + for (const auto& data_type : GetNumericAndVarcharScalarDataTypes()) { + std::string predicate = R"( + binary_range_expr: < + column_info:< + field_id:FID + data_type: DT + > + lower_value: < VAL0 > + upper_value: < VAL1 > + > + )"; + predicate = InterpolateSingleExpr(predicate, data_type); + auto mv = TranslateThenExecuteWhenMvInolved(predicate); + + ASSERT_EQ(mv.field_id_to_touched_categories_cnt.size(), 1); + auto field_id = GetFieldID(data_type); + ASSERT_TRUE(mv.field_id_to_touched_categories_cnt.find(field_id) != + mv.field_id_to_touched_categories_cnt.end()); + EXPECT_EQ(mv.field_id_to_touched_categories_cnt[field_id], 2); + EXPECT_EQ(mv.has_not, false); + EXPECT_EQ(mv.is_pure_and, true); + } +} diff --git a/internal/datacoord/index_builder.go b/internal/datacoord/index_builder.go index cdc60875f4..70fd1a98c8 100644 --- a/internal/datacoord/index_builder.go +++ b/internal/datacoord/index_builder.go @@ -287,7 +287,7 @@ func (ib *indexBuilder) process(buildID UniqueID) bool { // vector index build needs information of optional scalar fields data optionalFields := make([]*indexpb.OptionalFieldInfo, 0) - if Params.CommonCfg.EnableNodeFilteringOnPartitionKey.GetAsBool() && isOptionalScalarFieldSupported(indexType) { + if Params.CommonCfg.EnableMaterializedView.GetAsBool() && isOptionalScalarFieldSupported(indexType) { colSchema := ib.meta.GetCollection(meta.CollectionID).Schema hasPartitionKey := typeutil.HasPartitionKey(colSchema) if hasPartitionKey { diff --git a/internal/datacoord/index_builder_test.go b/internal/datacoord/index_builder_test.go index 801ec5639f..b45c4e5e93 100644 --- a/internal/datacoord/index_builder_test.go +++ b/internal/datacoord/index_builder_test.go @@ -1413,8 +1413,8 @@ func TestVecIndexWithOptionalScalarField(t *testing.T) { mt.collections[collID].Schema.Fields[1].IsPartitionKey = true } - paramtable.Get().CommonCfg.EnableNodeFilteringOnPartitionKey.SwapTempValue("true") - defer paramtable.Get().CommonCfg.EnableNodeFilteringOnPartitionKey.SwapTempValue("false") + paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true") + defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false") ib := newIndexBuilder(ctx, &mt, nodeManager, cm, newIndexEngineVersionManager(), nil) t.Run("success to get opt field on startup", func(t *testing.T) { @@ -1464,7 +1464,7 @@ func TestVecIndexWithOptionalScalarField(t *testing.T) { // should still be able to build vec index when opt field is not set t.Run("enqueue returns empty optional field when cfg disable", func(t *testing.T) { - paramtable.Get().CommonCfg.EnableNodeFilteringOnPartitionKey.SwapTempValue("false") + paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false") ic.EXPECT().CreateJob(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn( func(ctx context.Context, in *indexpb.CreateJobRequest, opts ...grpc.CallOption) (*commonpb.Status, error) { assert.Zero(t, len(in.OptionalScalarFields), "optional scalar field should be set") @@ -1478,7 +1478,7 @@ func TestVecIndexWithOptionalScalarField(t *testing.T) { }) t.Run("enqueue returns empty optional field when index is not HNSW", func(t *testing.T) { - paramtable.Get().CommonCfg.EnableNodeFilteringOnPartitionKey.SwapTempValue("true") + paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true") mt.indexMeta.indexes[collID][indexID].IndexParams[1].Value = indexparamcheck.IndexDISKANN ic.EXPECT().CreateJob(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn( func(ctx context.Context, in *indexpb.CreateJobRequest, opts ...grpc.CallOption) (*commonpb.Status, error) { @@ -1493,7 +1493,7 @@ func TestVecIndexWithOptionalScalarField(t *testing.T) { }) t.Run("enqueue returns empty optional field when no partition key", func(t *testing.T) { - paramtable.Get().CommonCfg.EnableNodeFilteringOnPartitionKey.SwapTempValue("true") + paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true") mt.collections[collID].Schema.Fields[1].IsPartitionKey = false ic.EXPECT().CreateJob(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn( func(ctx context.Context, in *indexpb.CreateJobRequest, opts ...grpc.CallOption) (*commonpb.Status, error) { diff --git a/internal/proto/plan.proto b/internal/proto/plan.proto index ca19f76ba2..fd6710d1f5 100644 --- a/internal/proto/plan.proto +++ b/internal/proto/plan.proto @@ -60,6 +60,7 @@ message QueryInfo { string search_params = 4; int64 round_decimal = 5; int64 group_by_field_id = 6; + bool materialized_view_involved = 7; } message ColumnInfo { diff --git a/internal/proxy/impl.go b/internal/proxy/impl.go index 00badea4c6..d3af770284 100644 --- a/internal/proxy/impl.go +++ b/internal/proxy/impl.go @@ -2653,11 +2653,12 @@ func (node *Proxy) Search(ctx context.Context, request *milvuspb.SearchRequest) ), ReqID: paramtable.GetNodeID(), }, - request: request, - tr: timerecord.NewTimeRecorder("search"), - qc: node.queryCoord, - node: node, - lb: node.lbPolicy, + request: request, + tr: timerecord.NewTimeRecorder("search"), + qc: node.queryCoord, + node: node, + lb: node.lbPolicy, + enableMaterializedView: node.enableMaterializedView, } guaranteeTs := request.GuaranteeTimestamp diff --git a/internal/proxy/proxy.go b/internal/proxy/proxy.go index 28676d2cb7..9c240c731f 100644 --- a/internal/proxy/proxy.go +++ b/internal/proxy/proxy.go @@ -123,6 +123,9 @@ type Proxy struct { // resource manager resourceManager resource.Manager replicateStreamManager *ReplicateStreamManager + + // materialized view + enableMaterializedView bool } // NewProxy returns a Proxy struct. @@ -290,6 +293,8 @@ func (node *Proxy) Init() error { } log.Debug("init meta cache done", zap.String("role", typeutil.ProxyRole)) + node.enableMaterializedView = Params.CommonCfg.EnableMaterializedView.GetAsBool() + log.Info("init proxy done", zap.Int64("nodeID", paramtable.GetNodeID()), zap.String("Address", node.address)) return nil } diff --git a/internal/proxy/search_util.go b/internal/proxy/search_util.go index c6bb7cc64a..6b1f466778 100644 --- a/internal/proxy/search_util.go +++ b/internal/proxy/search_util.go @@ -111,6 +111,12 @@ func initSearchRequest(ctx context.Context, t *searchTask) error { log.Warn("failed to get partition ids", zap.Error(err)) return err } + + if t.enableMaterializedView { + if planPtr := plan.GetVectorAnns(); planPtr != nil { + planPtr.QueryInfo.MaterializedViewInvolved = true + } + } } } diff --git a/internal/proxy/task_search.go b/internal/proxy/task_search.go index da06fb3117..b08a057943 100644 --- a/internal/proxy/task_search.go +++ b/internal/proxy/task_search.go @@ -56,11 +56,12 @@ type searchTask struct { result *milvuspb.SearchResults request *milvuspb.SearchRequest - tr *timerecord.TimeRecorder - collectionName string - schema *schemaInfo - requery bool - partitionKeyMode bool + tr *timerecord.TimeRecorder + collectionName string + schema *schemaInfo + requery bool + partitionKeyMode bool + enableMaterializedView bool userOutputFields []string diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 23f8c49537..ac5a6c3a82 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -201,7 +201,7 @@ type commonConfig struct { HighPriorityThreadCoreCoefficient ParamItem `refreshable:"false"` MiddlePriorityThreadCoreCoefficient ParamItem `refreshable:"false"` LowPriorityThreadCoreCoefficient ParamItem `refreshable:"false"` - EnableNodeFilteringOnPartitionKey ParamItem `refreshable:"false"` + EnableMaterializedView ParamItem `refreshable:"false"` BuildIndexThreadPoolRatio ParamItem `refreshable:"false"` MaxDegree ParamItem `refreshable:"true"` SearchListSize ParamItem `refreshable:"true"` @@ -435,12 +435,12 @@ This configuration is only used by querynode and indexnode, it selects CPU instr } p.IndexSliceSize.Init(base.mgr) - p.EnableNodeFilteringOnPartitionKey = ParamItem{ - Key: "common.nodeFiltering.enableOnPartitionKey", + p.EnableMaterializedView = ParamItem{ + Key: "common.materializedView.enabled", Version: "2.5.0", DefaultValue: "false", } - p.EnableNodeFilteringOnPartitionKey.Init(base.mgr) + p.EnableMaterializedView.Init(base.mgr) p.MaxDegree = ParamItem{ Key: "common.DiskIndex.MaxDegree",