From 6404e02d99c876bdf38eae5c4a5cb2c67eff629c Mon Sep 17 00:00:00 2001 From: Bingyi Sun Date: Mon, 9 Jun 2025 17:04:33 +0800 Subject: [PATCH] fix: Check cast type is array for json contains expr (#42184) issue: https://github.com/milvus-io/milvus/issues/42181 Signed-off-by: sunby --- internal/core/src/exec/expression/Expr.h | 18 ++++++++++-------- .../src/exec/expression/JsonContainsExpr.h | 4 +++- internal/core/src/index/Index.h | 2 +- internal/core/src/index/JsonInvertedIndex.cpp | 10 +++++++--- internal/core/src/index/JsonInvertedIndex.h | 2 +- internal/core/src/segcore/SegmentGrowingImpl.h | 3 ++- internal/core/src/segcore/SegmentInterface.h | 3 ++- internal/core/src/segcore/SegmentSealed.h | 5 +++-- .../util/indexparamcheck/inverted_checker.go | 2 +- 9 files changed, 30 insertions(+), 19 deletions(-) diff --git a/internal/core/src/exec/expression/Expr.h b/internal/core/src/exec/expression/Expr.h index 21edfa8fbf..285b5f9584 100644 --- a/internal/core/src/exec/expression/Expr.h +++ b/internal/core/src/exec/expression/Expr.h @@ -139,8 +139,8 @@ class SegmentExpr : public Expr { int64_t active_count, int64_t batch_size, int32_t consistency_level, - bool allow_any_json_cast_type = false) - + bool allow_any_json_cast_type = false, + bool is_json_contains = false) : Expr(DataType::BOOL, std::move(input), name), segment_(const_cast(segment)), field_id_(field_id), @@ -149,7 +149,8 @@ class SegmentExpr : public Expr { allow_any_json_cast_type_(allow_any_json_cast_type), active_count_(active_count), batch_size_(batch_size), - consistency_level_(consistency_level) { + consistency_level_(consistency_level), + is_json_contains_(is_json_contains) { size_per_chunk_ = segment_->size_per_chunk(); AssertInfo( batch_size_ > 0, @@ -173,11 +174,11 @@ class SegmentExpr : public Expr { if (field_meta.get_data_type() == DataType::JSON) { auto pointer = milvus::Json::pointer(nested_path_); - if (is_index_mode_ = - segment_->HasIndex(field_id_, - pointer, - value_type_, - allow_any_json_cast_type_)) { + if (is_index_mode_ = segment_->HasIndex(field_id_, + pointer, + value_type_, + allow_any_json_cast_type_, + is_json_contains_)) { num_index_chunk_ = 1; } } else { @@ -1254,6 +1255,7 @@ class SegmentExpr : public Expr { DataType field_type_; DataType value_type_; bool allow_any_json_cast_type_{false}; + bool is_json_contains_{false}; bool is_index_mode_{false}; bool is_data_mode_{false}; // sometimes need to skip index and using raw data diff --git a/internal/core/src/exec/expression/JsonContainsExpr.h b/internal/core/src/exec/expression/JsonContainsExpr.h index 8ebe0563f2..4565d41235 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.h +++ b/internal/core/src/exec/expression/JsonContainsExpr.h @@ -48,7 +48,9 @@ class PhyJsonContainsFilterExpr : public SegmentExpr { : FromValCase(expr->vals_[0].val_case()), active_count, batch_size, - consistency_level), + consistency_level, + false, + true), expr_(expr) { } diff --git a/internal/core/src/index/Index.h b/internal/core/src/index/Index.h index 328d4dd72d..ae369b289d 100644 --- a/internal/core/src/index/Index.h +++ b/internal/core/src/index/Index.h @@ -75,7 +75,7 @@ class IndexBase { } virtual bool - IsDataTypeSupported(DataType data_type) const { + IsDataTypeSupported(DataType data_type, bool is_array) const { return true; }; diff --git a/internal/core/src/index/JsonInvertedIndex.cpp b/internal/core/src/index/JsonInvertedIndex.cpp index db1ba17fdc..233a854f09 100644 --- a/internal/core/src/index/JsonInvertedIndex.cpp +++ b/internal/core/src/index/JsonInvertedIndex.cpp @@ -175,10 +175,14 @@ JsonInvertedIndex::build_index_for_json( template bool -JsonInvertedIndex::IsDataTypeSupported(DataType data_type) const { +JsonInvertedIndex::IsDataTypeSupported(DataType data_type, + bool is_array) const { + bool cast_type_is_array = + cast_type_.data_type() == JsonCastType::DataType::ARRAY; auto type = cast_type_.ToMilvusDataType(); - return type == data_type || - (data_type == DataType::INT64 && type == DataType::DOUBLE); + return is_array == cast_type_is_array && + (type == data_type || + (data_type == DataType::INT64 && type == DataType::DOUBLE)); } template class JsonInvertedIndex; diff --git a/internal/core/src/index/JsonInvertedIndex.h b/internal/core/src/index/JsonInvertedIndex.h index 7e6df24952..8b857cc2bb 100644 --- a/internal/core/src/index/JsonInvertedIndex.h +++ b/internal/core/src/index/JsonInvertedIndex.h @@ -116,7 +116,7 @@ class JsonInvertedIndex : public index::InvertedIndexTantivy { } bool - IsDataTypeSupported(DataType data_type) const override; + IsDataTypeSupported(DataType data_type, bool is_array) const override; JsonInvertedIndexParseErrorRecorder& GetErrorRecorder() { diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index d54c915392..8bc83cce44 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -335,7 +335,8 @@ class SegmentGrowingImpl : public SegmentGrowing { HasIndex(FieldId field_id, const std::string& nested_path, DataType data_type, - bool any_type = false) const override { + bool any_type = false, + bool is_array = false) const override { return false; }; diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 34710350bb..906f9e23c1 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -301,7 +301,8 @@ class SegmentInternalInterface : public SegmentInterface { HasIndex(FieldId field_id, const std::string& nested_path, DataType data_type, - bool any_type = false) const = 0; + bool any_type = false, + bool is_array = false) const = 0; virtual bool HasFieldData(FieldId field_id) const = 0; diff --git a/internal/core/src/segcore/SegmentSealed.h b/internal/core/src/segcore/SegmentSealed.h index 3b53228fc2..9adbd47f01 100644 --- a/internal/core/src/segcore/SegmentSealed.h +++ b/internal/core/src/segcore/SegmentSealed.h @@ -92,7 +92,8 @@ class SegmentSealed : public SegmentInternalInterface { HasIndex(FieldId field_id, const std::string& path, DataType data_type, - bool any_type = false) const override { + bool any_type = false, + bool is_json_contain = false) const override { JSONIndexKey key; key.field_id = field_id; key.nested_path = path; @@ -103,7 +104,7 @@ class SegmentSealed : public SegmentInternalInterface { if (any_type) { return true; } - return index->second->IsDataTypeSupported(data_type); + return index->second->IsDataTypeSupported(data_type, is_json_contain); } protected: diff --git a/internal/util/indexparamcheck/inverted_checker.go b/internal/util/indexparamcheck/inverted_checker.go index 9b1acd7776..6552d6b327 100644 --- a/internal/util/indexparamcheck/inverted_checker.go +++ b/internal/util/indexparamcheck/inverted_checker.go @@ -16,7 +16,7 @@ type INVERTEDChecker struct { scalarIndexChecker } -var validJSONCastTypes = []string{"BOOL", "DOUBLE", "VARCHAR"} +var validJSONCastTypes = []string{"BOOL", "DOUBLE", "VARCHAR", "ARRAY_BOOL", "ARRAY_DOUBLE", "ARRAY_VARCHAR"} var validJSONCastFunctions = []string{"STRING_TO_DOUBLE"}