test: add import case for struct array (#45146)

/kind improvement Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
2026-01-07 19:31:51 +08:00 · 2025-11-03 17:19:39 +08:00 · 2025-11-03 17:19:39 +08:00 · a03c398986
commit a03c398986
parent 25e0485a56
2 changed files with 745 additions and 9 deletions
--- a/tests/python_client/common/common_func.py
+++ b/tests/python_client/common/common_func.py
@ -689,7 +689,6 @@ def gen_array_field(name=ct.default_array_field_name, element_type=DataType.INT6
    return gen_scalar_field(DataType.ARRAY, name=name, description=description, is_primary=is_primary, 
                    element_type=element_type, max_capacity=max_capacity, **kwargs)

-
 def gen_int8_field(name=ct.default_int8_field_name, description=ct.default_desc, is_primary=False, **kwargs):
    return gen_scalar_field(DataType.INT8, name=name, description=description, is_primary=is_primary, **kwargs)

--- a/tests/python_client/milvus_client/test_milvus_client_struct_array.py
+++ b/tests/python_client/milvus_client/test_milvus_client_struct_array.py
@ -1,6 +1,10 @@
 import pytest
 import numpy as np
 import random
+import time
+import os
+import json
+from pathlib import Path
 from typing import List, Dict, Any

 from base.client_v2_base import TestMilvusClientV2Base
@ -8,9 +12,15 @@ from utils.util_log import test_log as log
 from common import common_func as cf
 from common import common_type as ct
 from common.common_type import CaseLabel, CheckTasks
+from check.param_check import compare_lists_with_epsilon_ignore_dict_order
 from utils.util_pymilvus import *
 from pymilvus import DataType, MilvusClient, AnnSearchRequest, RRFRanker, WeightedRanker
 from pymilvus.client.embedding_list import EmbeddingList
+from pymilvus.bulk_writer import bulk_import, get_import_progress, LocalBulkWriter, BulkFileType
+import pyarrow as pa
+import pyarrow.parquet as pq
+from minio import Minio
+from minio.error import S3Error


 prefix = "struct_array"
@ -2886,6 +2896,46 @@ class TestMilvusClientStructArrayInvalid(TestMilvusClientV2Base):
                or "not supported for fields in struct" in error_msg
            )

+    @pytest.mark.tags(CaseLabel.L2)
+    def test_struct_with_unsupported_vector_field(self):
+        """
+        target: test creating struct with BinaryVector field (should fail)
+        method: attempt to create struct with BinaryVector field
+        expected: creation should fail
+        """
+        collection_name = cf.gen_unique_str(f"{prefix}_invalid")
+
+        client = self._client()
+
+        # Create schema
+        schema = client.create_schema(auto_id=False, enable_dynamic_field=False)
+        schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
+        schema.add_field(
+            field_name="normal_vector", datatype=DataType.FLOAT_VECTOR, dim=default_dim
+        )
+
+        struct_schema = client.create_struct_field_schema()
+        struct_schema.add_field("binary_vector_field", DataType.BINARY_VECTOR, dim=default_dim)
+        schema.add_field(
+            "struct_array",
+            datatype=DataType.ARRAY,
+            element_type=DataType.STRUCT,
+            struct_schema=struct_schema,
+            max_capacity=100,
+        )
+        error = {
+            ct.err_code: 65535,
+            ct.err_msg: "now only float vector is supported",
+        }
+        self.create_collection(
+            client,
+            collection_name,
+            schema=schema,
+            check_task=CheckTasks.err_res,
+            check_items=error,
+        )
+
+
    @pytest.mark.tags(CaseLabel.L2)
    def test_struct_with_json_field(self):
        """
@ -2915,7 +2965,46 @@ class TestMilvusClientStructArrayInvalid(TestMilvusClientV2Base):
        )
        error = {
            ct.err_code: 65535,
-            ct.err_msg: "JSON is not supported for fields in struct",
+            ct.err_msg: "element type JSON is not supported",
+        }
+        self.create_collection(
+            client,
+            collection_name,
+            schema=schema,
+            check_task=CheckTasks.err_res,
+            check_items=error,
+        )
+
+    @pytest.mark.tags(CaseLabel.L2)
+    def test_struct_with_geometry_field(self):
+        """
+        target: test creating struct with Geometry field (should fail)
+        method: attempt to create struct with Geometry field
+        expected: creation should fail
+        """
+        collection_name = cf.gen_unique_str(f"{prefix}_invalid")
+
+        client = self._client()
+
+        # Create schema
+        schema = client.create_schema(auto_id=False, enable_dynamic_field=False)
+        schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
+        schema.add_field(
+            field_name="normal_vector", datatype=DataType.FLOAT_VECTOR, dim=default_dim
+        )
+
+        struct_schema = client.create_struct_field_schema()
+        struct_schema.add_field("geometry_field", DataType.GEOMETRY)
+        schema.add_field(
+            "struct_array",
+            datatype=DataType.ARRAY,
+            element_type=DataType.STRUCT,
+            struct_schema=struct_schema,
+            max_capacity=100,
+        )
+        error = {
+            ct.err_code: 65535,
+            ct.err_msg: "element type Geometry is not supported",
        }
        self.create_collection(
            client,
@ -3276,13 +3365,661 @@ class TestMilvusClientStructArrayInvalid(TestMilvusClientV2Base):
        )


-class TestMilvusClientStructArrayBulkInsert(TestMilvusClientV2Base):
-    """Test case of struct array bulk insert functionality"""
+class TestMilvusClientStructArrayImport(TestMilvusClientV2Base):
+    """Test case of struct array data import functionality using bulk_import API"""

-    def test_bulk_insert_struct_array(self):
+    # MinIO configuration constants
+    MINIO_ACCESS_KEY = "minioadmin"
+    MINIO_SECRET_KEY = "minioadmin"
+    REMOTE_DATA_PATH = "bulkinsert_data"
+    LOCAL_FILES_PATH = "/tmp/milvus_bulkinsert/"
+
+    @pytest.fixture(scope="function", autouse=True)
+    def setup_minio(self, minio_host, minio_bucket):
+        """Setup MinIO configuration from fixtures"""
+        Path(self.LOCAL_FILES_PATH).mkdir(parents=True, exist_ok=True)
+        self.minio_host = minio_host
+        self.bucket_name = minio_bucket
+        self.minio_endpoint = f"{minio_host}:9000"
+
+    def gen_parquet_file(self, num_rows: int, dim: int, file_path: str) -> dict:
        """
-        target: test bulk insert struct array functionality
-        method: insert struct array data into collection
-        expected: insert successful
+        Generate parquet file with struct array data
+
+        Args:
+            num_rows: Number of rows to generate
+            dim: Dimension of vector fields
+            file_path: Path to save the parquet file
+
+        Returns:
+            Dictionary containing the generated data for verification
        """
-        pass
+        id_arr = []
+        float_vector_arr = []
+        struct_arr = []
+
+        for i in range(num_rows):
+            # ID field
+            id_arr.append(np.int64(i))
+
+            # Float vector field
+            raw_vector = [random.random() for _ in range(dim)]
+            float_vector_arr.append(np.array(raw_vector, dtype=np.float32))
+
+            # Struct array field - generate array of struct objects
+            arr_len = random.randint(1, 5)  # Random number of struct elements (1-5)
+            struct_list = []
+            for j in range(arr_len):
+                struct_obj = {
+                    "struct_str": f"struct_str_{i}_{j}_{cf.gen_unique_str()}",
+                    "struct_float_vec": [random.random() for _ in range(dim)]
+                }
+                struct_list.append(struct_obj)
+            struct_arr.append(struct_list)
+
+        # Define PyArrow schema for struct field
+        struct_type = pa.struct([
+            pa.field("struct_str", pa.string()),
+            pa.field("struct_float_vec", pa.list_(pa.float32()))
+        ])
+
+        # Build PyArrow arrays with explicit types
+        pa_arrays = {
+            "id": pa.array(id_arr, type=pa.int64()),
+            "float_vector": pa.array(
+                [np.array(v, dtype=np.float32) for v in float_vector_arr],
+                type=pa.list_(pa.float32())
+            ),
+            "struct_array": pa.array(struct_arr, type=pa.list_(struct_type))
+        }
+
+        # Create PyArrow table and write to Parquet
+        table = pa.table(pa_arrays)
+        pq.write_table(table, file_path, row_group_size=10000)
+
+        log.info(f"Generated parquet file with {num_rows} rows: {file_path}")
+
+        return {
+            "id": id_arr,
+            "float_vector": float_vector_arr,
+            "struct_array": struct_arr
+        }
+
+    def gen_json_file(self, num_rows: int, dim: int, file_path: str) -> dict:
+        """
+        Generate JSON file with struct array data in array format: [row1, row2, ...]
+
+        Args:
+            num_rows: Number of rows to generate
+            dim: Dimension of vector fields
+            file_path: Path to save the JSON file
+
+        Returns:
+            Dictionary containing the generated data for verification
+        """
+        rows = []
+        id_arr = []
+        float_vector_arr = []
+        struct_arr = []
+
+        for i in range(num_rows):
+            # ID field
+            id_arr.append(i)
+
+            # Float vector field
+            float_vector = [random.random() for _ in range(dim)]
+            float_vector_arr.append(float_vector)
+
+            # Struct array field - generate array of struct objects
+            arr_len = random.randint(1, 5)  # Random number of struct elements (1-5)
+            struct_list = []
+            for j in range(arr_len):
+                struct_obj = {
+                    "struct_str": f"struct_str_{i}_{j}_{cf.gen_unique_str()}",
+                    "struct_float_vec": [random.random() for _ in range(dim)]
+                }
+                struct_list.append(struct_obj)
+            struct_arr.append(struct_list)
+
+            # Build row object
+            row = {
+                "id": i,
+                "float_vector": float_vector,
+                "struct_array": struct_list
+            }
+            rows.append(row)
+
+        # Write to JSON file as array format: [row1, row2, ...]
+        with open(file_path, "w") as f:
+            json.dump(rows, f, indent=2)
+
+        log.info(f"Generated JSON file with {num_rows} rows: {file_path}")
+
+        return {
+            "id": id_arr,
+            "float_vector": float_vector_arr,
+            "struct_array": struct_arr
+        }
+
+    def gen_file_with_local_bulk_writer(
+        self,
+        schema,
+        data: List[Dict[str, Any]],
+        file_type: str = "PARQUET"
+    ) -> tuple[str, dict]:
+        """
+        Generate import file using LocalBulkWriter from insert-format data
+
+        Args:
+            schema: Collection schema
+            data: List of dictionaries in insert format (same format as client.insert())
+            file_type: Output file type, "PARQUET" or "JSON"
+
+        Returns:
+            Tuple of (directory path containing generated files, original data dict for verification)
+        """
+        # Convert file_type string to BulkFileType enum
+        bulk_file_type = BulkFileType.PARQUET if file_type == "PARQUET" else BulkFileType.JSON
+
+        # Create LocalBulkWriter
+        writer = LocalBulkWriter(
+            schema=schema,
+            local_path=self.LOCAL_FILES_PATH,
+            segment_size=512 * 1024 * 1024,  # 512MB
+            file_type=bulk_file_type
+        )
+
+        log.info(f"Creating {file_type} file using LocalBulkWriter with {len(data)} rows")
+
+        # Append each row using the same format as insert
+        for row in data:
+            writer.append_row(row)
+
+        # Commit to generate files
+        writer.commit()
+
+        # Get the generated file paths
+        batch_files = writer.batch_files
+        log.info(f"LocalBulkWriter generated files: {batch_files}")
+
+        # Extract data for verification (same format as other gen methods)
+        id_arr = [row["id"] for row in data]
+        float_vector_arr = [row["float_vector"] for row in data]
+        struct_arr = [row["struct_array"] for row in data]
+
+        return batch_files, {
+            "id": id_arr,
+            "float_vector": float_vector_arr,
+            "struct_array": struct_arr
+        }
+
+    def upload_to_minio(self, local_file_path: str) -> List[List[str]]:
+        """
+        Upload parquet file to MinIO
+
+        Args:
+            local_file_path: Local path of the file to upload
+
+        Returns:
+            List of remote file paths in MinIO
+        """
+        if not os.path.exists(local_file_path):
+            raise Exception(f"Local file '{local_file_path}' doesn't exist")
+
+        try:
+            minio_client = Minio(
+                endpoint=self.minio_endpoint,
+                access_key=self.MINIO_ACCESS_KEY,
+                secret_key=self.MINIO_SECRET_KEY,
+                secure=False
+            )
+
+            # Check if bucket exists
+            if not minio_client.bucket_exists(self.bucket_name):
+                raise Exception(f"MinIO bucket '{self.bucket_name}' doesn't exist")
+
+            # Upload file
+            filename = os.path.basename(local_file_path)
+            minio_file_path = os.path.join(self.REMOTE_DATA_PATH, filename)
+            minio_client.fput_object(self.bucket_name, minio_file_path, local_file_path)
+
+            log.info(f"Uploaded file to MinIO: {minio_file_path}")
+            return [[minio_file_path]]
+
+        except S3Error as e:
+            raise Exception(f"Failed to connect MinIO server {self.minio_endpoint}, error: {e}")
+
+    def call_bulkinsert(self, collection_name: str, batch_files: List[List[str]]):
+        """
+        Call bulk import API and wait for completion
+
+        Args:
+            collection_name: Target collection name
+            batch_files: List of file paths in MinIO
+        """
+        url = f"http://{cf.param_info.param_host}:{cf.param_info.param_port}"
+
+        log.info(f"Starting bulk import to collection '{collection_name}'")
+        resp = bulk_import(
+            url=url,
+            collection_name=collection_name,
+            files=batch_files,
+        )
+
+        job_id = resp.json()['data']['jobId']
+        log.info(f"Bulk import job created, job_id: {job_id}")
+
+        # Wait for import to complete
+        timeout = 300
+        start_time = time.time()
+        while time.time() - start_time < timeout:
+            time.sleep(5)
+
+            resp = get_import_progress(url=url, job_id=job_id)
+            state = resp.json()['data']['state']
+            progress = resp.json()['data']['progress']
+
+            log.info(f"Import job {job_id} - state: {state}, progress: {progress}%")
+
+            if state == "Importing":
+                continue
+            elif state == "Failed":
+                reason = resp.json()['data']['reason']
+                raise Exception(f"Bulk import job {job_id} failed: {reason}")
+            elif state == "Completed" and progress == 100:
+                log.info(f"Bulk import job {job_id} completed successfully")
+                break
+        else:
+            raise Exception(f"Bulk import job {job_id} timeout after {timeout}s")
+
+        log.info("Bulk import finished")
+
+    def verify_data(self, client: MilvusClient, collection_name: str, original_data: dict):
+        """
+        Verify imported data matches original data using compare_lists_with_epsilon_ignore_dict_order
+
+        Args:
+            client: MilvusClient instance
+            collection_name: Collection name
+            original_data: Original data dictionary for comparison
+        """
+        log.info("============= Verifying imported data ==============")
+
+        # Query all data from the collection
+        num_rows = len(original_data["id"])
+        log.info(f"Total rows to verify: {num_rows}")
+
+        results = client.query(
+            collection_name=collection_name,
+            filter=f"id >= {min(original_data['id'])}",
+            output_fields=["*"],
+            limit=num_rows + 100  # Add buffer to ensure all data is retrieved
+        )
+
+        log.info(f"Query returned {len(results)} rows")
+
+        # Check if row count matches
+        if len(results) != num_rows:
+            assert False, f"Row count mismatch: expected {num_rows}, got {len(results)}"
+
+        # Convert original data to comparable format (list of dicts per row)
+        original_rows = []
+        for i in range(num_rows):
+            row_id = int(original_data["id"][i])
+            original_rows.append({
+                "id": row_id,
+                "float_vector": [float(x) for x in original_data["float_vector"][i]],
+                "struct_array": original_data["struct_array"][i]
+            })
+
+        # Convert query results to comparable format
+        query_rows_formatted = []
+        for row in results:
+            formatted_row = {
+                "id": row["id"],
+                "float_vector": [float(x) for x in row["float_vector"]],
+                "struct_array": row["struct_array"]
+            }
+            query_rows_formatted.append(formatted_row)
+
+        # Use compare_lists_with_epsilon_ignore_dict_order for comparison
+        # This function handles floating-point tolerance and dict order
+        is_equal = compare_lists_with_epsilon_ignore_dict_order(
+            original_rows,
+            query_rows_formatted,
+            epsilon=epsilon
+        )
+
+        if not is_equal:
+            assert False, "Data verification failed: original data and query results do not match"
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.parametrize("dim", [128])
+    @pytest.mark.parametrize("entities", [1000])
+    @pytest.mark.parametrize("array_capacity", [100])
+    def test_import_struct_array_with_parquet(self, dim, entities, array_capacity):
+        """
+        Test bulk import of struct array data from parquet file
+
+        Collection schema: [id, float_vector, struct_array]
+        Struct array contains: [struct_str (VARCHAR), struct_float_vec (FLOAT_VECTOR)]
+        Data file format: parquet
+
+        Steps:
+        1. Create collection with struct array field using MilvusClient
+        2. Generate parquet data with PyArrow
+        3. Upload parquet file to MinIO
+        4. Import data using bulk_import API
+        5. Verify imported data count
+        6. Verify imported data integrity
+        """
+        client = self._client()
+        c_name = cf.gen_unique_str("import_struct_array")
+
+        # Step 1: Create collection with struct array field
+        schema = client.create_schema(auto_id=False, enable_dynamic_field=False)
+
+        # Add primary key field
+        schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
+
+        # Add float vector field
+        schema.add_field(field_name="float_vector", datatype=DataType.FLOAT_VECTOR, dim=dim)
+
+        # Create struct schema for array elements
+        struct_schema = client.create_struct_field_schema()
+        struct_schema.add_field("struct_str", DataType.VARCHAR, max_length=65535)
+        struct_schema.add_field("struct_float_vec", DataType.FLOAT_VECTOR, dim=dim)
+
+        # Add struct array field
+        schema.add_field(
+            "struct_array",
+            datatype=DataType.ARRAY,
+            element_type=DataType.STRUCT,
+            struct_schema=struct_schema,
+            max_capacity=array_capacity,
+        )
+
+        # Create index params
+        index_params = client.prepare_index_params()
+
+        # Index for regular vector field
+        index_params.add_index(
+            field_name="float_vector",
+            index_type="HNSW",
+            metric_type="COSINE",
+            params={"M": 16, "efConstruction": 200}
+        )
+
+        # Index for vector field inside struct array
+        # Field name format: struct_array[struct_float_vec]
+        index_params.add_index(
+            field_name="struct_array[struct_float_vec]",
+            index_type="HNSW",
+            metric_type="MAX_SIM_COSINE",
+            params={"M": 16, "efConstruction": 200}
+        )
+
+        # Create collection
+        client.create_collection(
+            collection_name=c_name,
+            schema=schema,
+            index_params=index_params
+        )
+        log.info(f"Collection '{c_name}' created")
+
+        # Step 2: Generate parquet file
+        local_file_path = os.path.join(self.LOCAL_FILES_PATH, f"test_{cf.gen_unique_str()}.parquet")
+        original_data = self.gen_parquet_file(entities, dim, local_file_path)
+
+        # Step 3: Upload to MinIO
+        remote_files = self.upload_to_minio(local_file_path)
+
+        # Step 4: Import data
+        self.call_bulkinsert(c_name, remote_files)
+
+        # Refresh load state after import
+        client.refresh_load(collection_name=c_name)
+
+        # Step 5: Verify number of entities
+        stats = client.get_collection_stats(collection_name=c_name)
+        num_entities = stats['row_count']
+        log.info(f"Collection entities: {num_entities}")
+        assert num_entities == entities, f"Expected {entities} entities, got {num_entities}"
+
+        # Step 6: Verify data integrity
+        self.verify_data(client, c_name, original_data)
+
+        log.info("Struct array import test completed successfully")
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.parametrize("dim", [128])
+    @pytest.mark.parametrize("entities", [1000])
+    @pytest.mark.parametrize("array_capacity", [100])
+    def test_import_struct_array_with_json(self, dim, entities, array_capacity):
+        """
+        Test bulk import of struct array data from JSON file
+
+        Collection schema: [id, float_vector, struct_array]
+        Struct array contains: [struct_str (VARCHAR), struct_float_vec (FLOAT_VECTOR)]
+        Data file format: JSON array format [row1, row2, ...]
+
+        Steps:
+        1. Create collection with struct array field using MilvusClient
+        2. Generate JSON data in array format
+        3. Upload JSON file to MinIO
+        4. Import data using bulk_import API
+        5. Verify imported data count
+        6. Verify imported data integrity
+        """
+        client = self._client()
+        c_name = cf.gen_unique_str("import_struct_array_json")
+
+        # Step 1: Create collection with struct array field
+        schema = client.create_schema(auto_id=False, enable_dynamic_field=False)
+
+        # Add primary key field
+        schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
+
+        # Add float vector field
+        schema.add_field(field_name="float_vector", datatype=DataType.FLOAT_VECTOR, dim=dim)
+
+        # Create struct schema for array elements
+        struct_schema = client.create_struct_field_schema()
+        struct_schema.add_field("struct_str", DataType.VARCHAR, max_length=65535)
+        struct_schema.add_field("struct_float_vec", DataType.FLOAT_VECTOR, dim=dim)
+
+        # Add struct array field
+        schema.add_field(
+            "struct_array",
+            datatype=DataType.ARRAY,
+            element_type=DataType.STRUCT,
+            struct_schema=struct_schema,
+            max_capacity=array_capacity,
+        )
+
+        # Create index params
+        index_params = client.prepare_index_params()
+
+        # Index for regular vector field
+        index_params.add_index(
+            field_name="float_vector",
+            index_type="HNSW",
+            metric_type="COSINE",
+            params={"M": 16, "efConstruction": 200}
+        )
+
+        # Index for vector field inside struct array
+        # Field name format: struct_array[struct_float_vec]
+        index_params.add_index(
+            field_name="struct_array[struct_float_vec]",
+            index_type="HNSW",
+            metric_type="MAX_SIM_COSINE",
+            params={"M": 16, "efConstruction": 200}
+        )
+
+        # Create collection
+        client.create_collection(
+            collection_name=c_name,
+            schema=schema,
+            index_params=index_params
+        )
+        log.info(f"Collection '{c_name}' created")
+
+        # Step 2: Generate JSON file
+        local_file_path = os.path.join(self.LOCAL_FILES_PATH, f"test_{cf.gen_unique_str()}.json")
+        original_data = self.gen_json_file(entities, dim, local_file_path)
+
+        # Step 3: Upload to MinIO
+        remote_files = self.upload_to_minio(local_file_path)
+
+        # Step 4: Import data
+        self.call_bulkinsert(c_name, remote_files)
+
+        # Refresh load state after import
+        client.refresh_load(collection_name=c_name)
+
+        # Step 5: Verify number of entities
+        stats = client.get_collection_stats(collection_name=c_name)
+        num_entities = stats['row_count']
+        log.info(f"Collection entities: {num_entities}")
+        assert num_entities == entities, f"Expected {entities} entities, got {num_entities}"
+
+        # Step 6: Verify data integrity
+        self.verify_data(client, c_name, original_data)
+
+        log.info("Struct array JSON import test completed successfully")
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.parametrize("dim", [128])
+    @pytest.mark.parametrize("entities", [1000])
+    @pytest.mark.parametrize("array_capacity", [100])
+    @pytest.mark.parametrize("file_type", ["PARQUET", "JSON"])
+    @pytest.mark.xfail(reason="issue: https://github.com/milvus-io/pymilvus/issues/3050")
+    def test_import_struct_array_with_local_bulk_writer(self, dim, entities, array_capacity, file_type):
+        """
+        Test bulk import of struct array data using LocalBulkWriter
+
+        This test demonstrates using LocalBulkWriter to convert insert-format data
+        into import files, which is much simpler than manually creating PyArrow/JSON files.
+
+        Collection schema: [id, float_vector, struct_array]
+        Struct array contains: [struct_str (VARCHAR), struct_float_vec (FLOAT_VECTOR)]
+
+        Steps:
+        1. Create collection with struct array field using MilvusClient
+        2. Generate insert-format data (same format as client.insert())
+        3. Use LocalBulkWriter to convert data to import files
+        4. Upload generated files to MinIO
+        5. Import data using bulk_import API
+        6. Verify imported data count and integrity
+        """
+        client = self._client()
+        c_name = cf.gen_unique_str("import_lbw_struct")
+
+        # Step 1: Create collection with struct array field
+        schema = client.create_schema(auto_id=False, enable_dynamic_field=False)
+
+        # Add primary key field
+        schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
+
+        # Add float vector field
+        schema.add_field(field_name="float_vector", datatype=DataType.FLOAT_VECTOR, dim=dim)
+
+        # Create struct schema for array elements
+        struct_schema = client.create_struct_field_schema()
+        struct_schema.add_field("struct_str", DataType.VARCHAR, max_length=65535)
+        struct_schema.add_field("struct_float_vec", DataType.FLOAT_VECTOR, dim=dim)
+
+        # Add struct array field
+        schema.add_field(
+            "struct_array",
+            datatype=DataType.ARRAY,
+            element_type=DataType.STRUCT,
+            struct_schema=struct_schema,
+            max_capacity=array_capacity,
+        )
+
+        # Create index params
+        index_params = client.prepare_index_params()
+
+        # Index for regular vector field
+        index_params.add_index(
+            field_name="float_vector",
+            index_type="HNSW",
+            metric_type="COSINE",
+            params={"M": 16, "efConstruction": 200}
+        )
+
+        # Index for vector field inside struct array
+        index_params.add_index(
+            field_name="struct_array[struct_float_vec]",
+            index_type="HNSW",
+            metric_type="MAX_SIM_COSINE",
+            params={"M": 16, "efConstruction": 200}
+        )
+
+        # Create collection
+        client.create_collection(
+            collection_name=c_name,
+            schema=schema,
+            index_params=index_params
+        )
+        log.info(f"Collection '{c_name}' created")
+
+        # Step 2: Generate insert-format data
+        log.info(f"Generating {entities} rows of insert-format data")
+        insert_data = []
+        for i in range(entities):
+            # Generate random number of struct elements (1-5)
+            arr_len = random.randint(1, 5)
+            struct_list = []
+            for j in range(arr_len):
+                struct_obj = {
+                    "struct_str": f"struct_str_{i}_{j}_{cf.gen_unique_str()}",
+                    "struct_float_vec": [random.random() for _ in range(dim)]
+                }
+                struct_list.append(struct_obj)
+
+            row = {
+                "id": i,
+                "float_vector": [random.random() for _ in range(dim)],
+                "struct_array": struct_list
+            }
+            insert_data.append(row)
+
+        # Step 3: Use LocalBulkWriter to convert data to import files
+        log.info(f"Using LocalBulkWriter to generate {file_type} files")
+        batch_files, original_data = self.gen_file_with_local_bulk_writer(
+            schema=schema,
+            data=insert_data,
+            file_type=file_type
+        )
+
+        # Step 4: Upload generated files to MinIO
+        # LocalBulkWriter generates files in a UUID subdirectory
+        # We need to upload all files in that directory
+        import_files = []
+        for file_list in batch_files:
+            for file_path in file_list:
+                remote_files = self.upload_to_minio(file_path)
+                import_files.extend(remote_files)
+
+        log.info(f"Uploaded {len(import_files)} file(s) to MinIO")
+
+        # Step 5: Import data using bulk_import API
+        self.call_bulkinsert(c_name, import_files)
+
+        # Refresh load state after import
+        client.refresh_load(collection_name=c_name)
+
+        # Step 6: Verify number of entities
+        stats = client.get_collection_stats(collection_name=c_name)
+        num_entities = stats['row_count']
+        log.info(f"Collection entities: {num_entities}")
+        assert num_entities == entities, f"Expected {entities} entities, got {num_entities}"
+
+        # Step 7: Verify data integrity
+        self.verify_data(client, c_name, original_data)
+
+        log.info(f"LocalBulkWriter {file_type} import test completed successfully")
+