enhance: Add ST_IsValid operator implementation for gis (#45501)

issue:#43427

---------

Signed-off-by: xiejh <862103595@qq.com>
This commit is contained in:
862103595 2025-11-18 15:09:40 +08:00 committed by GitHub
parent caed0fe470
commit a0e2fe78f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 1078 additions and 762 deletions

View File

@ -203,6 +203,13 @@ class Geometry {
return false;
}
bool
is_valid() const {
if (!IsValid()) {
return false;
}
return GEOSisValid_r(ctx_, geometry_) == 1;
}
private:
// Convert degrees distance to meters using approximate location

View File

@ -17395,6 +17395,85 @@ TEST_P(ExprTest, TestGISFunctionWithControlledData) {
});
}
TEST_P(ExprTest, TestSTIsValidFunction) {
using namespace milvus;
using namespace milvus::query;
using namespace milvus::segcore;
auto schema = std::make_shared<Schema>();
auto int_fid = schema->AddDebugField("int", DataType::INT64);
auto vec_fid = schema->AddDebugField(
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto geom_fid = schema->AddDebugField("geometry", DataType::GEOMETRY);
schema->set_primary_field_id(int_fid);
auto seg = CreateGrowingSegment(schema, empty_index_meta);
int N = 100;
int num_iters = 1;
std::vector<const char*> wkts = {
"POINT (0 0)", // valid
"LINESTRING (0 0, 1 1, 2 2)", // valid
"POLYGON ((0 0, 2 2, 2 0, 0 2, 0 0))", // invalid
"LINESTRING (0 0, 0 0)" // invalid
};
std::vector<bool> expected_flags = {true, true, false, false};
for (int iter = 0; iter < num_iters; ++iter) {
auto raw_data = DataGen(schema, N, iter);
milvus::proto::schema::FieldData* geometry_field_data = nullptr;
for (auto& fd : *raw_data.raw_->mutable_fields_data()) {
if (fd.field_id() == geom_fid.get()) {
geometry_field_data = &fd;
break;
}
}
ASSERT_NE(geometry_field_data, nullptr);
geometry_field_data->mutable_scalars()
->mutable_geometry_data()
->clear_data();
auto ctx = GEOS_init_r();
for (int i = 0; i < N; ++i) {
const char* wkt = wkts[i % wkts.size()];
Geometry geom(ctx, wkt);
geometry_field_data->mutable_scalars()
->mutable_geometry_data()
->add_data(geom.to_wkb_string());
}
GEOS_finish_r(ctx);
seg->PreInsert(N);
seg->Insert(iter * N,
N,
raw_data.row_ids_.data(),
raw_data.timestamps_.data(),
raw_data.raw_);
}
auto seg_promote = dynamic_cast<SegmentInternalInterface*>(seg.get());
auto is_valid_expr = std::make_shared<milvus::expr::GISFunctionFilterExpr>(
milvus::expr::ColumnInfo(geom_fid, DataType::GEOMETRY),
proto::plan::GISFunctionFilterExpr_GISOp_STIsValid,
"");
auto plan = std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID,
is_valid_expr);
BitsetType final =
ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP);
ASSERT_EQ(final.size(), N * num_iters);
for (int i = 0; i < final.size(); ++i) {
bool expected = expected_flags[i % expected_flags.size()];
EXPECT_EQ(final[i], expected)
<< "Unexpected validity result at index " << i;
}
}
TEST_P(ExprTest, TestSTDWithinFunction) {
using namespace milvus;
using namespace milvus::query;

View File

@ -120,12 +120,77 @@ namespace exec {
processed_size, \
real_batch_size); \
return res_vec;
// Macro for unary operations (like IsValid) that don't need a right_source
#define GEOMETRY_EXECUTE_SUB_BATCH_UNARY(_DataType, method) \
auto execute_sub_batch = [this](const _DataType* data, \
const bool* valid_data, \
const int32_t* offsets, \
const int32_t* segment_offsets, \
const int size, \
TargetBitmapView res, \
TargetBitmapView valid_res) { \
AssertInfo(segment_offsets != nullptr, \
"segment_offsets should not be nullptr"); \
auto* geometry_cache = \
SimpleGeometryCacheManager::Instance().GetCache( \
this->segment_->get_segment_id(), field_id_); \
if (geometry_cache) { \
auto cache_lock = geometry_cache->AcquireReadLock(); \
for (int i = 0; i < size; ++i) { \
if (valid_data != nullptr && !valid_data[i]) { \
res[i] = valid_res[i] = false; \
continue; \
} \
auto absolute_offset = segment_offsets[i]; \
auto cached_geometry = \
geometry_cache->GetByOffsetUnsafe(absolute_offset); \
AssertInfo(cached_geometry != nullptr, \
"cached geometry is nullptr"); \
res[i] = cached_geometry->method(); \
} \
} else { \
GEOSContextHandle_t ctx_ = GEOS_init_r(); \
for (int i = 0; i < size; ++i) { \
if (valid_data != nullptr && !valid_data[i]) { \
res[i] = valid_res[i] = false; \
continue; \
} \
res[i] = \
Geometry(ctx_, data[i].data(), data[i].size()).method(); \
} \
GEOS_finish_r(ctx_); \
} \
}; \
int64_t processed_size = ProcessDataChunks<_DataType, true>( \
execute_sub_batch, std::nullptr_t{}, res, valid_res); \
AssertInfo(processed_size == real_batch_size, \
"internal error: expr processed rows {} not equal " \
"expect batch size {}", \
processed_size, \
real_batch_size); \
return res_vec;
bool
PhyGISFunctionFilterExpr::CanUseIndex(
proto::plan::GISFunctionFilterExpr_GISOp op) const {
if (!SegmentExpr::CanUseIndex()) {
return false;
}
switch (op) {
case proto::plan::GISFunctionFilterExpr_GISOp_STIsValid:
return false;
default:
return true;
}
}
void
PhyGISFunctionFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
AssertInfo(expr_->column_.data_type_ == DataType::GEOMETRY,
"unsupported data type: {}",
expr_->column_.data_type_);
if (SegmentExpr::CanUseIndex()) {
if (CanUseIndex(expr_->op_)) {
result = EvalForIndexSegment();
} else {
result = EvalForDataSegment();
@ -144,6 +209,18 @@ PhyGISFunctionFilterExpr::EvalForDataSegment() {
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();
if (expr_->op_ == proto::plan::GISFunctionFilterExpr_GISOp_STIsValid) {
if (segment_->type() == SegmentType::Growing &&
!storage::MmapManager::GetInstance()
.GetMmapConfig()
.growing_enable_mmap) {
GEOMETRY_EXECUTE_SUB_BATCH_UNARY(std::string, is_valid);
} else {
GEOMETRY_EXECUTE_SUB_BATCH_UNARY(std::string_view, is_valid);
}
return res_vec;
}
auto right_source =
Geometry(segment_->get_ctx(), expr_->geometry_wkt_.c_str());

View File

@ -75,6 +75,9 @@ class PhyGISFunctionFilterExpr : public SegmentExpr {
VectorPtr
EvalForDataSegment();
bool
CanUseIndex(proto::plan::GISFunctionFilterExpr_GISOp op) const;
private:
std::shared_ptr<const milvus::expr::GISFunctionFilterExpr> expr_;

View File

@ -36,6 +36,7 @@ expr:
| STIntersects'('Identifier','StringLiteral')' # STIntersects
| STWithin'('Identifier','StringLiteral')' # STWithin
| STDWithin'('Identifier','StringLiteral',' expr')' # STDWithin
| STIsValid'('Identifier')' # STIsValid
| ArrayLength'('(Identifier | JSONIdentifier)')' # ArrayLength
| Identifier '(' ( expr (',' expr )* ','? )? ')' # Call
| expr op1 = (LT | LE) (Identifier | JSONIdentifier) op2 = (LT | LE) expr # Range
@ -123,6 +124,7 @@ STContains: 'st_contains' | 'ST_CONTAINS';
STIntersects : 'st_intersects' | 'ST_INTERSECTS';
STWithin :'st_within' | 'ST_WITHIN';
STDWithin: 'st_dwithin' | 'ST_DWITHIN';
STIsValid: 'st_isvalid' | 'ST_ISVALID';
BooleanConstant: 'true' | 'True' | 'TRUE' | 'false' | 'False' | 'FALSE';

File diff suppressed because one or more lines are too long

View File

@ -54,15 +54,16 @@ STContains=53
STIntersects=54
STWithin=55
STDWithin=56
BooleanConstant=57
IntegerConstant=58
FloatingConstant=59
Identifier=60
Meta=61
StringLiteral=62
JSONIdentifier=63
Whitespace=64
Newline=65
STIsValid=57
BooleanConstant=58
IntegerConstant=59
FloatingConstant=60
Identifier=61
Meta=62
StringLiteral=63
JSONIdentifier=64
Whitespace=65
Newline=66
'('=1
')'=2
'['=3
@ -89,4 +90,4 @@ Newline=65
'|'=32
'^'=33
'~'=38
'$meta'=61
'$meta'=62

File diff suppressed because one or more lines are too long

View File

@ -54,15 +54,16 @@ STContains=53
STIntersects=54
STWithin=55
STDWithin=56
BooleanConstant=57
IntegerConstant=58
FloatingConstant=59
Identifier=60
Meta=61
StringLiteral=62
JSONIdentifier=63
Whitespace=64
Newline=65
STIsValid=57
BooleanConstant=58
IntegerConstant=59
FloatingConstant=60
Identifier=61
Meta=62
StringLiteral=63
JSONIdentifier=64
Whitespace=65
Newline=66
'('=1
')'=2
'['=3
@ -89,4 +90,4 @@ Newline=65
'|'=32
'^'=33
'~'=38
'$meta'=61
'$meta'=62

View File

@ -167,6 +167,10 @@ func (v *BasePlanVisitor) VisitJSONContainsAny(ctx *JSONContainsAnyContext) inte
return v.VisitChildren(ctx)
}
func (v *BasePlanVisitor) VisitSTIsValid(ctx *STIsValidContext) interface{} {
return v.VisitChildren(ctx)
}
func (v *BasePlanVisitor) VisitBitXor(ctx *BitXorContext) interface{} {
return v.VisitChildren(ctx)
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -127,6 +127,9 @@ type PlanVisitor interface {
// Visit a parse tree produced by PlanParser#JSONContainsAny.
VisitJSONContainsAny(ctx *JSONContainsAnyContext) interface{}
// Visit a parse tree produced by PlanParser#STIsValid.
VisitSTIsValid(ctx *STIsValidContext) interface{}
// Visit a parse tree produced by PlanParser#BitXor.
VisitBitXor(ctx *BitXorContext) interface{}

View File

@ -1688,6 +1688,31 @@ func (v *ParserVisitor) VisitSTEuqals(ctx *parser.STEuqalsContext) interface{} {
}
}
func (v *ParserVisitor) VisitSTIsValid(ctx *parser.STIsValidContext) interface{} {
childExpr, err := v.translateIdentifier(ctx.Identifier().GetText())
if err != nil {
return err
}
columnInfo := toColumnInfo(childExpr)
if columnInfo == nil ||
(!typeutil.IsGeometryType(columnInfo.GetDataType())) {
return fmt.Errorf(
"STIsValid operation are only supported on geometry fields now, got: %s", ctx.GetText())
}
expr := &planpb.Expr{
Expr: &planpb.Expr_GisfunctionFilterExpr{
GisfunctionFilterExpr: &planpb.GISFunctionFilterExpr{
ColumnInfo: columnInfo,
Op: planpb.GISFunctionFilterExpr_STIsValid,
},
},
}
return &ExprWithType{
expr: expr,
dataType: schemapb.DataType_Bool,
}
}
func (v *ParserVisitor) VisitSTTouches(ctx *parser.STTouchesContext) interface{} {
childExpr, err := v.translateIdentifier(ctx.Identifier().GetText())
if err != nil {

View File

@ -2188,6 +2188,10 @@ func TestExpr_GISFunctions(t *testing.T) {
`ST_DWITHIN(GeometryField, "POINT(0.5 0.5)", 2.0)`,
`st_dwithin(GeometryField, "POINT(1.0 1.0)", 1)`,
// ST_ISVALID tests
`st_isvalid(GeometryField)`,
`ST_ISVALID(GeometryField)`,
// Case insensitive tests
`St_Equals(GeometryField, "POINT(0 0)")`,
`sT_iNtErSeCts(GeometryField, "POINT(1 1)")`,
@ -2230,6 +2234,11 @@ func TestExpr_GISFunctionsInvalidExpressions(t *testing.T) {
// Non-existent fields
`st_equals(NonExistentField, "POINT(0 0)")`,
`st_dwithin(UnknownGeometryField, "POINT(0 0)", 5.0)`,
// ST_ISVALID invalid usage
`st_isvalid(Int64Field)`,
`st_isvalid()`,
`st_isvalid(GeometryField, 1)`,
}
for _, expr := range invalidExprs {
@ -2246,6 +2255,7 @@ func TestExpr_GISFunctionsComplexExpressions(t *testing.T) {
`st_equals(GeometryField, "POINT(0 0)") and st_intersects(GeometryField, "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))")`,
`st_contains(GeometryField, "POINT(0.5 0.5)") AND st_within(GeometryField, "POLYGON((-1 -1, 1 -1, 1 1, -1 1, -1 -1))")`,
`st_dwithin(GeometryField, "POINT(0 0)", 5.0) and Int64Field > 100`,
`st_isvalid(GeometryField) and Int64Field > 0`,
// OR combinations
`st_equals(GeometryField, "POINT(0 0)") or st_equals(GeometryField, "POINT(1 1)")`,
@ -2256,7 +2266,7 @@ func TestExpr_GISFunctionsComplexExpressions(t *testing.T) {
`not st_equals(GeometryField, "POINT(0 0)")`,
`!(st_intersects(GeometryField, "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))"))`,
`not (st_dwithin(GeometryField, "POINT(0 0)", 1.0))`,
`not st_isvalid(GeometryField)`,
// Mixed with other field types
`st_contains(GeometryField, "POINT(0 0)") and StringField == "test"`,
`st_dwithin(GeometryField, "POINT(0 0)", 5.0) or Int32Field in [1, 2, 3]`,

View File

@ -190,6 +190,9 @@ message GISFunctionFilterExpr{
Intersects = 6;
Within = 7;
DWithin = 8;
// STIsValid is a special operator that conflicts with proto-generated default methods.
// Using STIsValid instead of IsValid to avoid naming conflicts with IsValid() method.
STIsValid = 9;
}
GISOp op = 3;
double distance = 4; // Distance parameter for DWithin

View File

@ -503,6 +503,9 @@ const (
GISFunctionFilterExpr_Intersects GISFunctionFilterExpr_GISOp = 6
GISFunctionFilterExpr_Within GISFunctionFilterExpr_GISOp = 7
GISFunctionFilterExpr_DWithin GISFunctionFilterExpr_GISOp = 8
// STIsValid is a special operator that conflicts with proto-generated default methods.
// Using STIsValid instead of IsValid to avoid naming conflicts with IsValid() method.
GISFunctionFilterExpr_STIsValid GISFunctionFilterExpr_GISOp = 9
)
// Enum value maps for GISFunctionFilterExpr_GISOp.
@ -517,6 +520,7 @@ var (
6: "Intersects",
7: "Within",
8: "DWithin",
9: "STIsValid",
}
GISFunctionFilterExpr_GISOp_value = map[string]int32{
"Invalid": 0,
@ -528,6 +532,7 @@ var (
"Intersects": 6,
"Within": 7,
"DWithin": 8,
"STIsValid": 9,
}
)
@ -3492,7 +3497,7 @@ var file_plan_proto_rawDesc = []byte{
0x52, 0x02, 0x6f, 0x70, 0x22, 0x30, 0x0a, 0x06, 0x4e, 0x75, 0x6c, 0x6c, 0x4f, 0x70, 0x12, 0x0b,
0x0a, 0x07, 0x49, 0x6e, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x49,
0x73, 0x4e, 0x75, 0x6c, 0x6c, 0x10, 0x01, 0x12, 0x0d, 0x0a, 0x09, 0x49, 0x73, 0x4e, 0x6f, 0x74,
0x4e, 0x75, 0x6c, 0x6c, 0x10, 0x02, 0x22, 0xd3, 0x02, 0x0a, 0x15, 0x47, 0x49, 0x53, 0x46, 0x75,
0x4e, 0x75, 0x6c, 0x6c, 0x10, 0x02, 0x22, 0xe3, 0x02, 0x0a, 0x15, 0x47, 0x49, 0x53, 0x46, 0x75,
0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x46, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x45, 0x78, 0x70, 0x72,
0x12, 0x3e, 0x0a, 0x0b, 0x63, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x18,
0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70,
@ -3505,15 +3510,16 @@ var file_plan_proto_rawDesc = []byte{
0x47, 0x49, 0x53, 0x46, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x46, 0x69, 0x6c, 0x74, 0x65,
0x72, 0x45, 0x78, 0x70, 0x72, 0x2e, 0x47, 0x49, 0x53, 0x4f, 0x70, 0x52, 0x02, 0x6f, 0x70, 0x12,
0x1a, 0x0a, 0x08, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28,
0x01, 0x52, 0x08, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x22, 0x7f, 0x0a, 0x05, 0x47,
0x49, 0x53, 0x4f, 0x70, 0x12, 0x0b, 0x0a, 0x07, 0x49, 0x6e, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x10,
0x00, 0x12, 0x0a, 0x0a, 0x06, 0x45, 0x71, 0x75, 0x61, 0x6c, 0x73, 0x10, 0x01, 0x12, 0x0b, 0x0a,
0x07, 0x54, 0x6f, 0x75, 0x63, 0x68, 0x65, 0x73, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x4f, 0x76,
0x65, 0x72, 0x6c, 0x61, 0x70, 0x73, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x43, 0x72, 0x6f, 0x73,
0x73, 0x65, 0x73, 0x10, 0x04, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e,
0x73, 0x10, 0x05, 0x12, 0x0e, 0x0a, 0x0a, 0x49, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x65, 0x63, 0x74,
0x73, 0x10, 0x06, 0x12, 0x0a, 0x0a, 0x06, 0x57, 0x69, 0x74, 0x68, 0x69, 0x6e, 0x10, 0x07, 0x12,
0x0b, 0x0a, 0x07, 0x44, 0x57, 0x69, 0x74, 0x68, 0x69, 0x6e, 0x10, 0x08, 0x22, 0x91, 0x01, 0x0a,
0x01, 0x52, 0x08, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x22, 0x8e, 0x01, 0x0a, 0x05,
0x47, 0x49, 0x53, 0x4f, 0x70, 0x12, 0x0b, 0x0a, 0x07, 0x49, 0x6e, 0x76, 0x61, 0x6c, 0x69, 0x64,
0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x45, 0x71, 0x75, 0x61, 0x6c, 0x73, 0x10, 0x01, 0x12, 0x0b,
0x0a, 0x07, 0x54, 0x6f, 0x75, 0x63, 0x68, 0x65, 0x73, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x4f,
0x76, 0x65, 0x72, 0x6c, 0x61, 0x70, 0x73, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x43, 0x72, 0x6f,
0x73, 0x73, 0x65, 0x73, 0x10, 0x04, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69,
0x6e, 0x73, 0x10, 0x05, 0x12, 0x0e, 0x0a, 0x0a, 0x49, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x65, 0x63,
0x74, 0x73, 0x10, 0x06, 0x12, 0x0a, 0x0a, 0x06, 0x57, 0x69, 0x74, 0x68, 0x69, 0x6e, 0x10, 0x07,
0x12, 0x0b, 0x0a, 0x07, 0x44, 0x57, 0x69, 0x74, 0x68, 0x69, 0x6e, 0x10, 0x08, 0x12, 0x0d, 0x0a,
0x09, 0x53, 0x54, 0x49, 0x73, 0x56, 0x61, 0x6c, 0x69, 0x64, 0x10, 0x09, 0x22, 0x91, 0x01, 0x0a,
0x09, 0x55, 0x6e, 0x61, 0x72, 0x79, 0x45, 0x78, 0x70, 0x72, 0x12, 0x34, 0x0a, 0x02, 0x6f, 0x70,
0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e,
0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x70, 0x6c, 0x61, 0x6e, 0x2e, 0x55, 0x6e, 0x61, 0x72, 0x79,