milvus/tests/python_client/geometry_comprehensive_test.py

import random
import numpy as np
import math
import time

from pymilvus import MilvusClient, DataType

COUNT = 10000

def generate_simple_point():
    """Generate simple random point with integer coordinates"""
    x = random.randint(100, 120)
    y = random.randint(30, 50)
    return f"POINT({x} {y})"

def generate_simple_line():
    """Generate simple random line with integer coordinates"""
    x1, y1 = random.randint(100, 120), random.randint(30, 50)
    x2, y2 = random.randint(100, 120), random.randint(30, 50)
    return f"LINESTRING({x1} {y1}, {x2} {y2})"

def generate_simple_polygon():
    """Generate simple random polygon with integer coordinates"""
    # Generate center point
    center_x = random.randint(100, 120)
    center_y = random.randint(30, 50)

    # Generate polygon vertices (triangle or rectangle)
    num_vertices = random.choice([3, 4])
    vertices = []

    for i in range(num_vertices):
        angle = (2 * math.pi * i) / num_vertices
        radius = random.randint(1, 3)
        x = center_x + int(radius * math.cos(angle))
        y = center_y + int(radius * math.sin(angle))
        vertices.append(f"{x} {y}")

    # Close polygon
    vertices.append(vertices[0])
    return f"POLYGON(({', '.join(vertices)}))"

def generate_clustered_data():
    """Generate clustered data with simple integer coordinates"""
    # Define center areas with simple coordinates
    centers = [
        (110, 40),  # Center 1
        (115, 35),  # Center 2
        (105, 45),  # Center 3
    ]

    geometries = []

    for i in range(COUNT):
        # Choose a center
        center = random.choice(centers)
        center_x, center_y = center

        # Generate geometry objects around center
        offset_x = random.randint(-5, 5)
        offset_y = random.randint(-5, 5)

        geom_type = random.choice(['point', 'line', 'polygon'])

        if geom_type == 'point':
            x = center_x + offset_x
            y = center_y + offset_y
            geom = f"POINT({x} {y})"
        elif geom_type == 'line':
            x1 = center_x + offset_x
            y1 = center_y + offset_y
            x2 = center_x + offset_x + random.randint(-3, 3)
            y2 = center_y + offset_y + random.randint(-3, 3)
            geom = f"LINESTRING({x1} {y1}, {x2} {y2})"
        else:  # polygon
            # Generate small polygon around center
            vertices = []
            for j in range(3):
                angle = (2 * math.pi * j) / 3
                radius = random.randint(1, 3)
                x = center_x + offset_x + int(radius * math.cos(angle))
                y = center_y + offset_y + int(radius * math.sin(angle))
                vertices.append(f"{x} {y}")
            vertices.append(vertices[0])  # Close polygon
            geom = f"POLYGON(({', '.join(vertices)}))"

        geometries.append(geom)

    return geometries

def generate_test_data(num_records=10000):
    """Generate test data"""
    ids = list(range(1, num_records + 1))

    # Use clustered data to generate geometry objects
    geometries = generate_clustered_data()

    # Generate random vectors
    vectors = []
    for i in range(num_records):
        vector = [random.random() for _ in range(128)]
        vectors.append(vector)

    return ids, geometries, vectors

def main():
    fmt = "\n=== {:30} ===\n"

    # Connection configuration
    client = MilvusClient(
        uri="http://localhost:19530",
        token=""
    )
    collection_name = "comprehensive_geo_test"
    dim = 128  # Vector dimension

    # Drop existing collection if exists
    if client.has_collection(collection_name):
        client.drop_collection(collection_name)
        print(f"Dropped existing collection: {collection_name}")

    print(fmt.format("Creating Collection"))
    try:
        schema = client.create_schema(auto_id=False, description="comprehensive_geo_test")
        schema.add_field("id", DataType.INT64, is_primary=True)
        schema.add_field("geo", DataType.GEOMETRY)
        schema.add_field("vector", DataType.FLOAT_VECTOR, dim=dim)
        index_params = client.prepare_index_params()
        index_params.add_index(field_name="vector", index_type="IVF_FLAT", metric_type="L2", nlist=128)
        client.create_collection(collection_name, schema=schema, index_params=index_params)
        print(f"Collection created: {collection_name}")
    except Exception as e:
        print(f"Error creating collection: {e}")
        return

    # Generate test data
    print(fmt.format("Generating Test Data"))
    num_records = COUNT
    ids, geometries, vectors = generate_test_data(num_records)

    # Show data preview
    print(fmt.format("Data Preview"))
    for i in range(5):
        print(f"ID: {ids[i]}")
        print(f"Geometry: {geometries[i]}")
        print(f"Vector: [{', '.join([f'{x:.3f}' for x in vectors[i][:3]])}...]")
        print("---")

    # Insert data
    print(fmt.format("Inserting Data"))

    # Insert data in batches to avoid memory issues
    batch_size = 1000
    total_inserted = 0
    time_start = time.time()
    for i in range(0, num_records, batch_size):
        end_idx = min(i + batch_size, num_records)
        batch_data = []

        for j in range(i, end_idx):
            row = {
                "id": ids[j],
                "geo": geometries[j],
                "vector": vectors[j]
            }
            batch_data.append(row)

        try:
            insert_result = client.insert(collection_name, batch_data)
            total_inserted += len(batch_data)
            print(f"Inserted {total_inserted}/{num_records} records")
        except Exception as e:
            print(f"Error inserting data: {e}")
            return
    time_end = time.time()
    print(f"Data Insertion Time: {(time_end - time_start) * 1000:.2f} ms")
    print(fmt.format("Data Insertion Complete"))

    # # Flush data to persistent storage
    # print("Flushing data...")
    # try:
    #     client.flush(collection_name)
    #     print("Data flush complete")
    # except Exception as e:
    #     print(f"Error flushing data: {e}")
    #     return

    # Load collection
    try:
        client.load_collection(collection_name)
        print(fmt.format("Collection Loaded"))
    except Exception as e:
        print(f"Error loading collection: {e}")
        return

    # Test non-spatial function queries
    print(fmt.format("Testing Non-Spatial Queries"))
    time_start = time.time()
    try:
        # Simple query test
        print("\nSimple query test:")
        query_results = client.query(
            collection_name=collection_name,
            filter="id <= 10",
            output_fields=["id", "geo"],
            limit=10
        )
        print(f"Found {len(query_results)} records")
        for result in query_results[:3]:
            print(f"  ID: {result['id']}, geo: {result['geo']}")
    except Exception as e:
        print(f"Simple query test error: {e}")
    time_end = time.time()
    print(f"Simple query test Time: {(time_end - time_start) * 1000:.2f} ms")
    # Test vector search
    print(fmt.format("Testing Vector Search"))
    try:
        search_vector = vectors[0]

        search_results = client.search(
            collection_name=collection_name,
            data=[search_vector],
            anns_field="vector",
            search_params={"metric_type": "L2", "params": {"nprobe": 10}},
            limit=5,
            output_fields=["id", "geo"]
        )

        print(f"Search results length: {len(search_results)}")
        for i, hits in enumerate(search_results):
            print(f"Query vector {i+1} search results:")
            for j, hit in enumerate(hits):
                print(f"  Result {j+1} - ID: {hit['id']}, Geo: {hit['geo']}, distance: {hit['distance']:.4f}")

    except Exception as e:
        print(f"Vector search test error: {e}")

    # Test all spatial functions
    print(fmt.format("Testing Spatial Functions"))

    # Define test geometry objects with simple integer coordinates
    test_geometries = {
        "point": "POINT(110 40)",  # Center point
        "line": "LINESTRING(105 35, 115 45)",  # Line across centers
        "polygon": "POLYGON((105 35, 115 35, 115 45, 105 45, 105 35))",  # Rectangle covering centers
        "small_polygon": "POLYGON((108 38, 112 38, 112 42, 108 42, 108 38))",  # Small rectangle
        "crossing_line": "LINESTRING(100 30, 120 50)",  # Line crossing the area
        "overlapping_polygon": "POLYGON((110 38, 115 38, 115 42, 110 42, 110 38))"  # Overlapping polygon
    }

    # 空间函数列表
    spatial_functions = [
        ("st_equals", "ST_EQUALS"),
        ("st_touches", "ST_TOUCHES"),
        ("st_overlaps", "ST_OVERLAPS"),
        ("st_crosses", "ST_CROSSES"),
        ("st_contains", "ST_CONTAINS"),
        ("st_intersects", "ST_INTERSECTS"),
        ("st_within", "ST_WITHIN")
    ]

    for func_name, func_alias in spatial_functions:
        print(f"\nTesting {func_name} / {func_alias}:")
        # Test different geometry objects
        for geom_key, test_geom in test_geometries.items():
            try:
                time_start = time.time()
                expr = f"{func_name}(geo, '{test_geom}')"
                results = client.query(
                    collection_name=collection_name,
                    filter=expr,
                    output_fields=["id","geo"],
                    limit=10
                )
                time_end = time.time()
                print(f"  {func_name} with {geom_key}: Found {len(results)} records, Time: {(time_end - time_start) * 1000:.2f} ms")
                if results:
                    print(f"    Sample IDs: {[r['id'] for r in results[:5]]}")
                    print(f"    Sample geometries: {[r['geo'] for r in results[:5]]}")
            except Exception as e:
                print(f"  {func_name} with {geom_key} test failed: {e}")

        # Test uppercase function name
        try:
            expr = f"{func_alias}(geo, '{test_geometries['point']}')"
            results = client.query(
                collection_name=collection_name,
                filter=expr,
                output_fields=["id","geo"],
                limit=10
            )
            print(f"  {func_alias}: Found {len(results)} records")
            if results:
                print(f"    Sample IDs: {[r['id'] for r in results[:5]]}")
                print(f"    Sample geometries: {[r['geo'] for r in results[:5]]}")
        except Exception as e:
            print(f"  {func_alias} test failed: {e}")

    # Test different geometry types
    print(fmt.format("Testing Different Geometry Types"))
    print(fmt.format("Using ST_INTERSECTS"))
    for geom_type, test_geom in test_geometries.items():
        print(f"\nTesting {geom_type} geometry:")
        try:
            time_start = time.time()
            expr = f"st_intersects(geo, '{test_geom}')"
            results = client.query(
                collection_name=collection_name,
                filter=expr,
                output_fields=["id","geo"],
                limit=10
            )
            time_end = time.time()
            print(f"  Found {len(results)} records, Time: {(time_end - time_start) * 1000:.2f} ms")
            if results:
                print(f"  Sample IDs: {[r['id'] for r in results[:5]]}")
                print(f"  Sample geometries: {[r['geo'] for r in results[:5]]}")
        except Exception as e:
            print(f"  Test failed: {e}")

    print(fmt.format("Test Complete"))
    print(f"Total records tested: {num_records}")
    print(f"Total spatial functions tested: {len(spatial_functions)}")
    print("All tests completed!")

if __name__ == "__main__":
    main()