mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
feat: use namespace skip index when search (#44888)
issue: #44011 --------- Signed-off-by: sunby <sunbingyi1992@gmail.com>
This commit is contained in:
parent
6077178553
commit
52270701ce
@ -32,7 +32,7 @@
|
||||
namespace milvus {
|
||||
|
||||
using std::string;
|
||||
|
||||
const std::string namespace_field_name = "$namespace_id";
|
||||
std::shared_ptr<Schema>
|
||||
Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
@ -58,6 +58,9 @@ Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) {
|
||||
"repetitive dynamic field");
|
||||
schema->set_dynamic_field_id(field_id);
|
||||
}
|
||||
if (child.name() == namespace_field_name) {
|
||||
schema->set_namespace_field_id(field_id);
|
||||
}
|
||||
};
|
||||
|
||||
for (const milvus::proto::schema::FieldSchema& child :
|
||||
|
||||
@ -213,11 +213,21 @@ class Schema {
|
||||
this->dynamic_field_id_opt_ = field_id;
|
||||
}
|
||||
|
||||
void
|
||||
set_namespace_field_id(FieldId field_id) {
|
||||
this->namespace_field_id_opt_ = field_id;
|
||||
}
|
||||
|
||||
void
|
||||
set_schema_version(uint64_t version) {
|
||||
this->schema_version_ = version;
|
||||
}
|
||||
|
||||
std::optional<FieldId>
|
||||
get_namespace_field_id() const {
|
||||
return this->namespace_field_id_opt_;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
get_schema_version() const {
|
||||
return this->schema_version_;
|
||||
@ -348,6 +358,7 @@ class Schema {
|
||||
|
||||
std::optional<FieldId> primary_field_id_opt_;
|
||||
std::optional<FieldId> dynamic_field_id_opt_;
|
||||
std::optional<FieldId> namespace_field_id_opt_;
|
||||
|
||||
// field partial load list
|
||||
// work as hint now
|
||||
|
||||
@ -107,6 +107,13 @@ class PhyConjunctFilterExpr : public Expr {
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
SetNamespaceSkipFunc(SkipNamespaceFunc skip_namespace_func) override {
|
||||
for (auto& input : inputs_) {
|
||||
input->SetNamespaceSkipFunc(skip_namespace_func);
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
ToString() const {
|
||||
if (!input_order_.empty()) {
|
||||
|
||||
@ -667,8 +667,55 @@ RewriteConjunctExpr(std::shared_ptr<milvus::exec::PhyConjunctFilterExpr>& expr,
|
||||
}
|
||||
}
|
||||
|
||||
inline void
|
||||
SetNamespaceSkipIndex(std::shared_ptr<PhyConjunctFilterExpr> conjunct_expr,
|
||||
ExecContext* context) {
|
||||
auto schema = context->get_query_context()->get_segment()->get_schema();
|
||||
auto namespace_field_id = schema.get_namespace_field_id();
|
||||
auto inputs = conjunct_expr->GetInputsRef();
|
||||
std::shared_ptr<PhyUnaryRangeFilterExpr> namespace_expr = nullptr;
|
||||
for (const auto& input : inputs) {
|
||||
auto unary = std::dynamic_pointer_cast<PhyUnaryRangeFilterExpr>(input);
|
||||
if (!unary) {
|
||||
continue;
|
||||
}
|
||||
if (unary->GetColumnInfo().value().field_id_ ==
|
||||
namespace_field_id.value() &&
|
||||
unary->GetOpType() == proto::plan::OpType::Equal) {
|
||||
namespace_expr = unary;
|
||||
}
|
||||
}
|
||||
if (!namespace_expr) {
|
||||
return;
|
||||
}
|
||||
auto namespace_field_meta = schema[namespace_field_id.value()];
|
||||
auto& skip_index =
|
||||
context->get_query_context()->get_segment()->GetSkipIndex();
|
||||
if (namespace_field_meta.get_data_type() == DataType::INT64) {
|
||||
auto skip_namespace_func = [&](int64_t chunk_id) -> bool {
|
||||
return skip_index.CanSkipUnaryRange<int64_t>(
|
||||
namespace_field_id.value(),
|
||||
chunk_id,
|
||||
proto::plan::OpType::Equal,
|
||||
namespace_expr->GetLogicalExpr()->GetValue().int64_val());
|
||||
};
|
||||
namespace_expr->SetNamespaceSkipFunc(skip_namespace_func);
|
||||
} else {
|
||||
auto skip_namespace_func = [&](int64_t chunk_id) -> bool {
|
||||
return skip_index.CanSkipUnaryRange<std::string>(
|
||||
namespace_field_id.value(),
|
||||
chunk_id,
|
||||
proto::plan::OpType::Equal,
|
||||
namespace_expr->GetLogicalExpr()->GetValue().string_val());
|
||||
};
|
||||
namespace_expr->SetNamespaceSkipFunc(skip_namespace_func);
|
||||
}
|
||||
}
|
||||
|
||||
inline void
|
||||
OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs) {
|
||||
auto schema = context->get_query_context()->get_segment()->get_schema();
|
||||
auto namespace_field_id = schema.get_namespace_field_id();
|
||||
std::chrono::high_resolution_clock::time_point start =
|
||||
std::chrono::high_resolution_clock::now();
|
||||
for (const auto& expr : exprs) {
|
||||
@ -680,6 +727,9 @@ OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs) {
|
||||
bool has_heavy_operation = false;
|
||||
ReorderConjunctExpr(conjunct_expr, context, has_heavy_operation);
|
||||
LOG_DEBUG("after reorder filter expression: {}", expr->ToString());
|
||||
if (namespace_field_id.has_value()) {
|
||||
SetNamespaceSkipIndex(conjunct_expr, context);
|
||||
}
|
||||
}
|
||||
}
|
||||
std::chrono::high_resolution_clock::time_point end =
|
||||
|
||||
@ -126,6 +126,12 @@ class Expr {
|
||||
return inputs_;
|
||||
}
|
||||
|
||||
using SkipNamespaceFunc = std::function<bool(int64_t chunk_id)>;
|
||||
virtual void
|
||||
SetNamespaceSkipFunc(SkipNamespaceFunc skip_namespace_func) {
|
||||
namespace_skip_func_ = std::move(skip_namespace_func);
|
||||
}
|
||||
|
||||
protected:
|
||||
DataType type_;
|
||||
std::vector<std::shared_ptr<Expr>> inputs_;
|
||||
@ -135,6 +141,10 @@ class Expr {
|
||||
// whether we have offset input and do expr filtering on these data
|
||||
// default is false which means we will do expr filtering on the total segment data
|
||||
bool has_offset_input_ = false;
|
||||
// check if we can skip a chunk for namespace field.
|
||||
// if there's no namespace field, this is std::nullopt.
|
||||
// TODO: for expression like f1 > 1 and f2 > 2, we can use skip function of f1 when evaluating f2.
|
||||
std::optional<SkipNamespaceFunc> namespace_skip_func_;
|
||||
};
|
||||
|
||||
using ExprPtr = std::shared_ptr<milvus::exec::Expr>;
|
||||
@ -364,7 +374,9 @@ class SegmentExpr : public Expr {
|
||||
auto pw = segment_->get_batch_views<T>(
|
||||
op_ctx_, field_id_, 0, current_data_chunk_pos_, need_size);
|
||||
auto views_info = pw.get();
|
||||
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
||||
if ((!skip_func || !skip_func(skip_index, field_id_, 0)) &&
|
||||
(!namespace_skip_func_.has_value() ||
|
||||
!namespace_skip_func_.value()(0))) {
|
||||
// first is the raw data, second is valid_data
|
||||
// use valid_data to see if raw data is null
|
||||
if constexpr (NeedSegmentOffsets) {
|
||||
@ -419,7 +431,9 @@ class SegmentExpr : public Expr {
|
||||
auto pw =
|
||||
segment_->get_views_by_offsets<T>(op_ctx_, field_id_, 0, *input);
|
||||
auto [data_vec, valid_data] = pw.get();
|
||||
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
||||
if ((!skip_func || !skip_func(skip_index, field_id_, 0)) &&
|
||||
(!namespace_skip_func_.has_value() ||
|
||||
!namespace_skip_func_.value()(0))) {
|
||||
func(data_vec.data(),
|
||||
valid_data.data(),
|
||||
nullptr,
|
||||
@ -478,7 +492,9 @@ class SegmentExpr : public Expr {
|
||||
auto valid_result = index_ptr->IsNotNull();
|
||||
auto batch_size = input->size();
|
||||
|
||||
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
||||
if ((!skip_func || !skip_func(skip_index, field_id_, 0)) &&
|
||||
(!namespace_skip_func_.has_value() ||
|
||||
!namespace_skip_func_.value()(0))) {
|
||||
for (auto i = 0; i < batch_size; ++i) {
|
||||
auto offset = (*input)[i];
|
||||
auto raw = index_ptr->Reverse_Lookup(offset);
|
||||
@ -544,8 +560,10 @@ class SegmentExpr : public Expr {
|
||||
chunk_id,
|
||||
{int32_t(chunk_offset)});
|
||||
auto [data_vec, valid_data] = pw.get();
|
||||
if (!skip_func ||
|
||||
!skip_func(skip_index, field_id_, chunk_id)) {
|
||||
if ((!skip_func ||
|
||||
!skip_func(skip_index, field_id_, chunk_id)) &&
|
||||
(!namespace_skip_func_.has_value() ||
|
||||
!namespace_skip_func_.value()(chunk_id))) {
|
||||
func.template operator()<FilterType::random>(
|
||||
data_vec.data(),
|
||||
valid_data.data(),
|
||||
@ -577,8 +595,10 @@ class SegmentExpr : public Expr {
|
||||
if (valid_data != nullptr) {
|
||||
valid_data += chunk_offset;
|
||||
}
|
||||
if (!skip_func ||
|
||||
!skip_func(skip_index, field_id_, chunk_id)) {
|
||||
if ((!skip_func ||
|
||||
!skip_func(skip_index, field_id_, chunk_id)) &&
|
||||
(!namespace_skip_func_.has_value() ||
|
||||
!namespace_skip_func_.value()(chunk_id))) {
|
||||
func.template operator()<FilterType::random>(
|
||||
data,
|
||||
valid_data,
|
||||
@ -607,7 +627,9 @@ class SegmentExpr : public Expr {
|
||||
auto chunk = pw.get();
|
||||
const T* data = chunk.data();
|
||||
const bool* valid_data = chunk.valid_data();
|
||||
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
||||
if ((!skip_func || !skip_func(skip_index, field_id_, 0)) &&
|
||||
(!namespace_skip_func_.has_value() ||
|
||||
!namespace_skip_func_.value()(0))) {
|
||||
func.template operator()<FilterType::random>(data,
|
||||
valid_data,
|
||||
input->data(),
|
||||
@ -633,7 +655,10 @@ class SegmentExpr : public Expr {
|
||||
if (valid_data != nullptr) {
|
||||
valid_data += chunk_offset;
|
||||
}
|
||||
if (!skip_func || !skip_func(skip_index, field_id_, chunk_id)) {
|
||||
if ((!skip_func ||
|
||||
!skip_func(skip_index, field_id_, chunk_id)) &&
|
||||
(!namespace_skip_func_.has_value() ||
|
||||
!namespace_skip_func_.value()(chunk_id))) {
|
||||
func.template operator()<FilterType::random>(
|
||||
data,
|
||||
valid_data,
|
||||
@ -698,8 +723,9 @@ class SegmentExpr : public Expr {
|
||||
if (valid_data != nullptr) {
|
||||
valid_data += data_pos;
|
||||
}
|
||||
|
||||
if (!skip_func || !skip_func(skip_index, field_id_, i)) {
|
||||
if ((!skip_func || !skip_func(skip_index, field_id_, i)) &&
|
||||
(!namespace_skip_func_.has_value() ||
|
||||
!namespace_skip_func_.value()(i))) {
|
||||
const T* data = chunk.data() + data_pos;
|
||||
|
||||
if constexpr (NeedSegmentOffsets) {
|
||||
@ -784,7 +810,9 @@ class SegmentExpr : public Expr {
|
||||
segment_offsets_array[j] = static_cast<int32_t>(offset);
|
||||
}
|
||||
auto& skip_index = segment_->GetSkipIndex();
|
||||
if (!skip_func || !skip_func(skip_index, field_id_, i)) {
|
||||
if ((!skip_func || !skip_func(skip_index, field_id_, i)) &&
|
||||
(!namespace_skip_func_.has_value() ||
|
||||
!namespace_skip_func_.value()(i))) {
|
||||
bool is_seal = false;
|
||||
if constexpr (std::is_same_v<T, std::string_view> ||
|
||||
std::is_same_v<T, Json> ||
|
||||
|
||||
@ -431,6 +431,11 @@ class UnaryRangeFilterExpr : public ITypeFilterExpr {
|
||||
}
|
||||
}
|
||||
|
||||
const proto::plan::GenericValue
|
||||
GetValue() const {
|
||||
return val_;
|
||||
}
|
||||
|
||||
public:
|
||||
const ColumnInfo column_;
|
||||
const proto::plan::OpType op_type_;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user