mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 09:38:39 +08:00
feat: use namespace skip index when search (#44888)
issue: #44011 --------- Signed-off-by: sunby <sunbingyi1992@gmail.com>
This commit is contained in:
parent
6077178553
commit
52270701ce
@ -32,7 +32,7 @@
|
|||||||
namespace milvus {
|
namespace milvus {
|
||||||
|
|
||||||
using std::string;
|
using std::string;
|
||||||
|
const std::string namespace_field_name = "$namespace_id";
|
||||||
std::shared_ptr<Schema>
|
std::shared_ptr<Schema>
|
||||||
Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) {
|
Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) {
|
||||||
auto schema = std::make_shared<Schema>();
|
auto schema = std::make_shared<Schema>();
|
||||||
@ -58,6 +58,9 @@ Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) {
|
|||||||
"repetitive dynamic field");
|
"repetitive dynamic field");
|
||||||
schema->set_dynamic_field_id(field_id);
|
schema->set_dynamic_field_id(field_id);
|
||||||
}
|
}
|
||||||
|
if (child.name() == namespace_field_name) {
|
||||||
|
schema->set_namespace_field_id(field_id);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const milvus::proto::schema::FieldSchema& child :
|
for (const milvus::proto::schema::FieldSchema& child :
|
||||||
|
|||||||
@ -213,11 +213,21 @@ class Schema {
|
|||||||
this->dynamic_field_id_opt_ = field_id;
|
this->dynamic_field_id_opt_ = field_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
set_namespace_field_id(FieldId field_id) {
|
||||||
|
this->namespace_field_id_opt_ = field_id;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
set_schema_version(uint64_t version) {
|
set_schema_version(uint64_t version) {
|
||||||
this->schema_version_ = version;
|
this->schema_version_ = version;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<FieldId>
|
||||||
|
get_namespace_field_id() const {
|
||||||
|
return this->namespace_field_id_opt_;
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
get_schema_version() const {
|
get_schema_version() const {
|
||||||
return this->schema_version_;
|
return this->schema_version_;
|
||||||
@ -348,6 +358,7 @@ class Schema {
|
|||||||
|
|
||||||
std::optional<FieldId> primary_field_id_opt_;
|
std::optional<FieldId> primary_field_id_opt_;
|
||||||
std::optional<FieldId> dynamic_field_id_opt_;
|
std::optional<FieldId> dynamic_field_id_opt_;
|
||||||
|
std::optional<FieldId> namespace_field_id_opt_;
|
||||||
|
|
||||||
// field partial load list
|
// field partial load list
|
||||||
// work as hint now
|
// work as hint now
|
||||||
|
|||||||
@ -107,6 +107,13 @@ class PhyConjunctFilterExpr : public Expr {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
SetNamespaceSkipFunc(SkipNamespaceFunc skip_namespace_func) override {
|
||||||
|
for (auto& input : inputs_) {
|
||||||
|
input->SetNamespaceSkipFunc(skip_namespace_func);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string
|
std::string
|
||||||
ToString() const {
|
ToString() const {
|
||||||
if (!input_order_.empty()) {
|
if (!input_order_.empty()) {
|
||||||
|
|||||||
@ -667,8 +667,55 @@ RewriteConjunctExpr(std::shared_ptr<milvus::exec::PhyConjunctFilterExpr>& expr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void
|
||||||
|
SetNamespaceSkipIndex(std::shared_ptr<PhyConjunctFilterExpr> conjunct_expr,
|
||||||
|
ExecContext* context) {
|
||||||
|
auto schema = context->get_query_context()->get_segment()->get_schema();
|
||||||
|
auto namespace_field_id = schema.get_namespace_field_id();
|
||||||
|
auto inputs = conjunct_expr->GetInputsRef();
|
||||||
|
std::shared_ptr<PhyUnaryRangeFilterExpr> namespace_expr = nullptr;
|
||||||
|
for (const auto& input : inputs) {
|
||||||
|
auto unary = std::dynamic_pointer_cast<PhyUnaryRangeFilterExpr>(input);
|
||||||
|
if (!unary) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (unary->GetColumnInfo().value().field_id_ ==
|
||||||
|
namespace_field_id.value() &&
|
||||||
|
unary->GetOpType() == proto::plan::OpType::Equal) {
|
||||||
|
namespace_expr = unary;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!namespace_expr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto namespace_field_meta = schema[namespace_field_id.value()];
|
||||||
|
auto& skip_index =
|
||||||
|
context->get_query_context()->get_segment()->GetSkipIndex();
|
||||||
|
if (namespace_field_meta.get_data_type() == DataType::INT64) {
|
||||||
|
auto skip_namespace_func = [&](int64_t chunk_id) -> bool {
|
||||||
|
return skip_index.CanSkipUnaryRange<int64_t>(
|
||||||
|
namespace_field_id.value(),
|
||||||
|
chunk_id,
|
||||||
|
proto::plan::OpType::Equal,
|
||||||
|
namespace_expr->GetLogicalExpr()->GetValue().int64_val());
|
||||||
|
};
|
||||||
|
namespace_expr->SetNamespaceSkipFunc(skip_namespace_func);
|
||||||
|
} else {
|
||||||
|
auto skip_namespace_func = [&](int64_t chunk_id) -> bool {
|
||||||
|
return skip_index.CanSkipUnaryRange<std::string>(
|
||||||
|
namespace_field_id.value(),
|
||||||
|
chunk_id,
|
||||||
|
proto::plan::OpType::Equal,
|
||||||
|
namespace_expr->GetLogicalExpr()->GetValue().string_val());
|
||||||
|
};
|
||||||
|
namespace_expr->SetNamespaceSkipFunc(skip_namespace_func);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inline void
|
inline void
|
||||||
OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs) {
|
OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs) {
|
||||||
|
auto schema = context->get_query_context()->get_segment()->get_schema();
|
||||||
|
auto namespace_field_id = schema.get_namespace_field_id();
|
||||||
std::chrono::high_resolution_clock::time_point start =
|
std::chrono::high_resolution_clock::time_point start =
|
||||||
std::chrono::high_resolution_clock::now();
|
std::chrono::high_resolution_clock::now();
|
||||||
for (const auto& expr : exprs) {
|
for (const auto& expr : exprs) {
|
||||||
@ -680,6 +727,9 @@ OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs) {
|
|||||||
bool has_heavy_operation = false;
|
bool has_heavy_operation = false;
|
||||||
ReorderConjunctExpr(conjunct_expr, context, has_heavy_operation);
|
ReorderConjunctExpr(conjunct_expr, context, has_heavy_operation);
|
||||||
LOG_DEBUG("after reorder filter expression: {}", expr->ToString());
|
LOG_DEBUG("after reorder filter expression: {}", expr->ToString());
|
||||||
|
if (namespace_field_id.has_value()) {
|
||||||
|
SetNamespaceSkipIndex(conjunct_expr, context);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::chrono::high_resolution_clock::time_point end =
|
std::chrono::high_resolution_clock::time_point end =
|
||||||
|
|||||||
@ -126,6 +126,12 @@ class Expr {
|
|||||||
return inputs_;
|
return inputs_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using SkipNamespaceFunc = std::function<bool(int64_t chunk_id)>;
|
||||||
|
virtual void
|
||||||
|
SetNamespaceSkipFunc(SkipNamespaceFunc skip_namespace_func) {
|
||||||
|
namespace_skip_func_ = std::move(skip_namespace_func);
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DataType type_;
|
DataType type_;
|
||||||
std::vector<std::shared_ptr<Expr>> inputs_;
|
std::vector<std::shared_ptr<Expr>> inputs_;
|
||||||
@ -135,6 +141,10 @@ class Expr {
|
|||||||
// whether we have offset input and do expr filtering on these data
|
// whether we have offset input and do expr filtering on these data
|
||||||
// default is false which means we will do expr filtering on the total segment data
|
// default is false which means we will do expr filtering on the total segment data
|
||||||
bool has_offset_input_ = false;
|
bool has_offset_input_ = false;
|
||||||
|
// check if we can skip a chunk for namespace field.
|
||||||
|
// if there's no namespace field, this is std::nullopt.
|
||||||
|
// TODO: for expression like f1 > 1 and f2 > 2, we can use skip function of f1 when evaluating f2.
|
||||||
|
std::optional<SkipNamespaceFunc> namespace_skip_func_;
|
||||||
};
|
};
|
||||||
|
|
||||||
using ExprPtr = std::shared_ptr<milvus::exec::Expr>;
|
using ExprPtr = std::shared_ptr<milvus::exec::Expr>;
|
||||||
@ -364,7 +374,9 @@ class SegmentExpr : public Expr {
|
|||||||
auto pw = segment_->get_batch_views<T>(
|
auto pw = segment_->get_batch_views<T>(
|
||||||
op_ctx_, field_id_, 0, current_data_chunk_pos_, need_size);
|
op_ctx_, field_id_, 0, current_data_chunk_pos_, need_size);
|
||||||
auto views_info = pw.get();
|
auto views_info = pw.get();
|
||||||
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
if ((!skip_func || !skip_func(skip_index, field_id_, 0)) &&
|
||||||
|
(!namespace_skip_func_.has_value() ||
|
||||||
|
!namespace_skip_func_.value()(0))) {
|
||||||
// first is the raw data, second is valid_data
|
// first is the raw data, second is valid_data
|
||||||
// use valid_data to see if raw data is null
|
// use valid_data to see if raw data is null
|
||||||
if constexpr (NeedSegmentOffsets) {
|
if constexpr (NeedSegmentOffsets) {
|
||||||
@ -419,7 +431,9 @@ class SegmentExpr : public Expr {
|
|||||||
auto pw =
|
auto pw =
|
||||||
segment_->get_views_by_offsets<T>(op_ctx_, field_id_, 0, *input);
|
segment_->get_views_by_offsets<T>(op_ctx_, field_id_, 0, *input);
|
||||||
auto [data_vec, valid_data] = pw.get();
|
auto [data_vec, valid_data] = pw.get();
|
||||||
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
if ((!skip_func || !skip_func(skip_index, field_id_, 0)) &&
|
||||||
|
(!namespace_skip_func_.has_value() ||
|
||||||
|
!namespace_skip_func_.value()(0))) {
|
||||||
func(data_vec.data(),
|
func(data_vec.data(),
|
||||||
valid_data.data(),
|
valid_data.data(),
|
||||||
nullptr,
|
nullptr,
|
||||||
@ -478,7 +492,9 @@ class SegmentExpr : public Expr {
|
|||||||
auto valid_result = index_ptr->IsNotNull();
|
auto valid_result = index_ptr->IsNotNull();
|
||||||
auto batch_size = input->size();
|
auto batch_size = input->size();
|
||||||
|
|
||||||
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
if ((!skip_func || !skip_func(skip_index, field_id_, 0)) &&
|
||||||
|
(!namespace_skip_func_.has_value() ||
|
||||||
|
!namespace_skip_func_.value()(0))) {
|
||||||
for (auto i = 0; i < batch_size; ++i) {
|
for (auto i = 0; i < batch_size; ++i) {
|
||||||
auto offset = (*input)[i];
|
auto offset = (*input)[i];
|
||||||
auto raw = index_ptr->Reverse_Lookup(offset);
|
auto raw = index_ptr->Reverse_Lookup(offset);
|
||||||
@ -544,8 +560,10 @@ class SegmentExpr : public Expr {
|
|||||||
chunk_id,
|
chunk_id,
|
||||||
{int32_t(chunk_offset)});
|
{int32_t(chunk_offset)});
|
||||||
auto [data_vec, valid_data] = pw.get();
|
auto [data_vec, valid_data] = pw.get();
|
||||||
if (!skip_func ||
|
if ((!skip_func ||
|
||||||
!skip_func(skip_index, field_id_, chunk_id)) {
|
!skip_func(skip_index, field_id_, chunk_id)) &&
|
||||||
|
(!namespace_skip_func_.has_value() ||
|
||||||
|
!namespace_skip_func_.value()(chunk_id))) {
|
||||||
func.template operator()<FilterType::random>(
|
func.template operator()<FilterType::random>(
|
||||||
data_vec.data(),
|
data_vec.data(),
|
||||||
valid_data.data(),
|
valid_data.data(),
|
||||||
@ -577,8 +595,10 @@ class SegmentExpr : public Expr {
|
|||||||
if (valid_data != nullptr) {
|
if (valid_data != nullptr) {
|
||||||
valid_data += chunk_offset;
|
valid_data += chunk_offset;
|
||||||
}
|
}
|
||||||
if (!skip_func ||
|
if ((!skip_func ||
|
||||||
!skip_func(skip_index, field_id_, chunk_id)) {
|
!skip_func(skip_index, field_id_, chunk_id)) &&
|
||||||
|
(!namespace_skip_func_.has_value() ||
|
||||||
|
!namespace_skip_func_.value()(chunk_id))) {
|
||||||
func.template operator()<FilterType::random>(
|
func.template operator()<FilterType::random>(
|
||||||
data,
|
data,
|
||||||
valid_data,
|
valid_data,
|
||||||
@ -607,7 +627,9 @@ class SegmentExpr : public Expr {
|
|||||||
auto chunk = pw.get();
|
auto chunk = pw.get();
|
||||||
const T* data = chunk.data();
|
const T* data = chunk.data();
|
||||||
const bool* valid_data = chunk.valid_data();
|
const bool* valid_data = chunk.valid_data();
|
||||||
if (!skip_func || !skip_func(skip_index, field_id_, 0)) {
|
if ((!skip_func || !skip_func(skip_index, field_id_, 0)) &&
|
||||||
|
(!namespace_skip_func_.has_value() ||
|
||||||
|
!namespace_skip_func_.value()(0))) {
|
||||||
func.template operator()<FilterType::random>(data,
|
func.template operator()<FilterType::random>(data,
|
||||||
valid_data,
|
valid_data,
|
||||||
input->data(),
|
input->data(),
|
||||||
@ -633,7 +655,10 @@ class SegmentExpr : public Expr {
|
|||||||
if (valid_data != nullptr) {
|
if (valid_data != nullptr) {
|
||||||
valid_data += chunk_offset;
|
valid_data += chunk_offset;
|
||||||
}
|
}
|
||||||
if (!skip_func || !skip_func(skip_index, field_id_, chunk_id)) {
|
if ((!skip_func ||
|
||||||
|
!skip_func(skip_index, field_id_, chunk_id)) &&
|
||||||
|
(!namespace_skip_func_.has_value() ||
|
||||||
|
!namespace_skip_func_.value()(chunk_id))) {
|
||||||
func.template operator()<FilterType::random>(
|
func.template operator()<FilterType::random>(
|
||||||
data,
|
data,
|
||||||
valid_data,
|
valid_data,
|
||||||
@ -698,8 +723,9 @@ class SegmentExpr : public Expr {
|
|||||||
if (valid_data != nullptr) {
|
if (valid_data != nullptr) {
|
||||||
valid_data += data_pos;
|
valid_data += data_pos;
|
||||||
}
|
}
|
||||||
|
if ((!skip_func || !skip_func(skip_index, field_id_, i)) &&
|
||||||
if (!skip_func || !skip_func(skip_index, field_id_, i)) {
|
(!namespace_skip_func_.has_value() ||
|
||||||
|
!namespace_skip_func_.value()(i))) {
|
||||||
const T* data = chunk.data() + data_pos;
|
const T* data = chunk.data() + data_pos;
|
||||||
|
|
||||||
if constexpr (NeedSegmentOffsets) {
|
if constexpr (NeedSegmentOffsets) {
|
||||||
@ -784,7 +810,9 @@ class SegmentExpr : public Expr {
|
|||||||
segment_offsets_array[j] = static_cast<int32_t>(offset);
|
segment_offsets_array[j] = static_cast<int32_t>(offset);
|
||||||
}
|
}
|
||||||
auto& skip_index = segment_->GetSkipIndex();
|
auto& skip_index = segment_->GetSkipIndex();
|
||||||
if (!skip_func || !skip_func(skip_index, field_id_, i)) {
|
if ((!skip_func || !skip_func(skip_index, field_id_, i)) &&
|
||||||
|
(!namespace_skip_func_.has_value() ||
|
||||||
|
!namespace_skip_func_.value()(i))) {
|
||||||
bool is_seal = false;
|
bool is_seal = false;
|
||||||
if constexpr (std::is_same_v<T, std::string_view> ||
|
if constexpr (std::is_same_v<T, std::string_view> ||
|
||||||
std::is_same_v<T, Json> ||
|
std::is_same_v<T, Json> ||
|
||||||
|
|||||||
@ -431,6 +431,11 @@ class UnaryRangeFilterExpr : public ITypeFilterExpr {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const proto::plan::GenericValue
|
||||||
|
GetValue() const {
|
||||||
|
return val_;
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
const ColumnInfo column_;
|
const ColumnInfo column_;
|
||||||
const proto::plan::OpType op_type_;
|
const proto::plan::OpType op_type_;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user