mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
test: refactor checker to using milvus client (#45524)
/kind improvement Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
This commit is contained in:
parent
79926b412c
commit
e0df44481d
File diff suppressed because it is too large
Load Diff
@ -22,6 +22,7 @@ RELEASE_NAME = 'test-allstandalone-pod-kill-19-25-26'
|
|||||||
WAIT_PER_OP = 10 # time to wait in seconds between operations
|
WAIT_PER_OP = 10 # time to wait in seconds between operations
|
||||||
CHAOS_DURATION = 120 # chaos duration time in seconds
|
CHAOS_DURATION = 120 # chaos duration time in seconds
|
||||||
DEFAULT_INDEX_PARAM = {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 48, "efConstruction": 500}}
|
DEFAULT_INDEX_PARAM = {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 48, "efConstruction": 500}}
|
||||||
|
DEFAULT_EMB_LIST_INDEX_PARAM = {"index_type": "HNSW", "metric_type": "MAX_SIM_COSINE", "params": {"M": 16, "efConstruction": 200}}
|
||||||
DEFAULT_SEARCH_PARAM = {"metric_type": "L2", "params": {"ef": 64}}
|
DEFAULT_SEARCH_PARAM = {"metric_type": "L2", "params": {"ef": 64}}
|
||||||
DEFAULT_INT8_INDEX_PARAM = {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 48, "efConstruction": 500}}
|
DEFAULT_INT8_INDEX_PARAM = {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 48, "efConstruction": 500}}
|
||||||
DEFAULT_INT8_SEARCH_PARAM = {"metric_type": "L2", "params": {"ef": 64}}
|
DEFAULT_INT8_SEARCH_PARAM = {"metric_type": "L2", "params": {"ef": 64}}
|
||||||
|
|||||||
@ -24,6 +24,7 @@ from chaos.checker import (CollectionCreateChecker,
|
|||||||
AlterCollectionChecker,
|
AlterCollectionChecker,
|
||||||
AddFieldChecker,
|
AddFieldChecker,
|
||||||
CollectionRenameChecker,
|
CollectionRenameChecker,
|
||||||
|
TensorSearchChecker,
|
||||||
Op,
|
Op,
|
||||||
EventRecords,
|
EventRecords,
|
||||||
ResultAnalyzer
|
ResultAnalyzer
|
||||||
@ -83,6 +84,7 @@ class TestOperations(TestBase):
|
|||||||
checkers = {
|
checkers = {
|
||||||
Op.create: CollectionCreateChecker(collection_name=c_name),
|
Op.create: CollectionCreateChecker(collection_name=c_name),
|
||||||
Op.insert: InsertChecker(collection_name=c_name),
|
Op.insert: InsertChecker(collection_name=c_name),
|
||||||
|
Op.tensor_search :TensorSearchChecker(collection_name=c_name),
|
||||||
Op.upsert: UpsertChecker(collection_name=c_name),
|
Op.upsert: UpsertChecker(collection_name=c_name),
|
||||||
Op.partial_update: PartialUpdateChecker(collection_name=c_name),
|
Op.partial_update: PartialUpdateChecker(collection_name=c_name),
|
||||||
Op.flush: FlushChecker(collection_name=c_name),
|
Op.flush: FlushChecker(collection_name=c_name),
|
||||||
|
|||||||
@ -836,33 +836,49 @@ def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.
|
|||||||
|
|
||||||
|
|
||||||
def gen_all_datatype_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
|
def gen_all_datatype_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
|
||||||
auto_id=False, dim=ct.default_dim, enable_dynamic_field=True, nullable=True,**kwargs):
|
auto_id=False, dim=ct.default_dim, enable_dynamic_field=True, nullable=True,
|
||||||
|
enable_struct_array_field=True, **kwargs):
|
||||||
analyzer_params = {
|
analyzer_params = {
|
||||||
"tokenizer": "standard",
|
"tokenizer": "standard",
|
||||||
}
|
}
|
||||||
fields = [
|
|
||||||
gen_int64_field(),
|
|
||||||
gen_float_field(nullable=nullable),
|
|
||||||
gen_string_field(nullable=nullable),
|
|
||||||
gen_string_field(name="document", max_length=2000, enable_analyzer=True, enable_match=True, nullable=nullable),
|
|
||||||
gen_string_field(name="text", max_length=2000, enable_analyzer=True, enable_match=True,
|
|
||||||
analyzer_params=analyzer_params),
|
|
||||||
gen_json_field(nullable=nullable),
|
|
||||||
gen_geometry_field(nullable=nullable),
|
|
||||||
gen_array_field(name="array_int", element_type=DataType.INT64),
|
|
||||||
gen_array_field(name="array_float", element_type=DataType.FLOAT),
|
|
||||||
gen_array_field(name="array_varchar", element_type=DataType.VARCHAR, max_length=200),
|
|
||||||
gen_array_field(name="array_bool", element_type=DataType.BOOL),
|
|
||||||
gen_float_vec_field(dim=dim),
|
|
||||||
gen_int8_vec_field(name="image_emb", dim=dim),
|
|
||||||
gen_float_vec_field(name="text_sparse_emb", vector_data_type=DataType.SPARSE_FLOAT_VECTOR),
|
|
||||||
gen_float_vec_field(name="voice_emb", dim=dim),
|
|
||||||
# gen_timestamptz_field(name="timestamptz", nullable=nullable),
|
|
||||||
]
|
|
||||||
|
|
||||||
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
# Create schema using MilvusClient
|
||||||
primary_field=primary_field, auto_id=auto_id,
|
schema = MilvusClient.create_schema(
|
||||||
enable_dynamic_field=enable_dynamic_field, **kwargs)
|
auto_id=auto_id,
|
||||||
|
enable_dynamic_field=enable_dynamic_field,
|
||||||
|
description=description,
|
||||||
|
**kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add all fields using schema.add_field()
|
||||||
|
schema.add_field(primary_field, DataType.INT64, is_primary=True)
|
||||||
|
schema.add_field(ct.default_float_field_name, DataType.FLOAT, nullable=nullable)
|
||||||
|
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=ct.default_max_length, nullable=nullable)
|
||||||
|
schema.add_field("document", DataType.VARCHAR, max_length=2000, enable_analyzer=True, enable_match=True, nullable=nullable)
|
||||||
|
schema.add_field("text", DataType.VARCHAR, max_length=2000, enable_analyzer=True, enable_match=True,
|
||||||
|
analyzer_params=analyzer_params)
|
||||||
|
schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=nullable)
|
||||||
|
schema.add_field(ct.default_geometry_field_name, DataType.GEOMETRY, nullable=nullable)
|
||||||
|
schema.add_field("array_int", DataType.ARRAY, element_type=DataType.INT64, max_capacity=ct.default_max_capacity)
|
||||||
|
schema.add_field("array_float", DataType.ARRAY, element_type=DataType.FLOAT, max_capacity=ct.default_max_capacity)
|
||||||
|
schema.add_field("array_varchar", DataType.ARRAY, element_type=DataType.VARCHAR, max_length=200, max_capacity=ct.default_max_capacity)
|
||||||
|
schema.add_field("array_bool", DataType.ARRAY, element_type=DataType.BOOL, max_capacity=ct.default_max_capacity)
|
||||||
|
schema.add_field(ct.default_float_vec_field_name, DataType.FLOAT_VECTOR, dim=dim)
|
||||||
|
schema.add_field("image_emb", DataType.INT8_VECTOR, dim=dim)
|
||||||
|
schema.add_field("text_sparse_emb", DataType.SPARSE_FLOAT_VECTOR)
|
||||||
|
# schema.add_field("voice_emb", DataType.FLOAT_VECTOR, dim=dim)
|
||||||
|
# schema.add_field("timestamptz", DataType.TIMESTAMPTZ, nullable=nullable)
|
||||||
|
|
||||||
|
# Add struct array field
|
||||||
|
if enable_struct_array_field:
|
||||||
|
struct_schema = MilvusClient.create_struct_field_schema()
|
||||||
|
struct_schema.add_field("name", DataType.VARCHAR, max_length=200)
|
||||||
|
struct_schema.add_field("age", DataType.INT64)
|
||||||
|
struct_schema.add_field("float_vector", DataType.FLOAT_VECTOR, dim=dim)
|
||||||
|
schema.add_field("array_struct", datatype=DataType.ARRAY, element_type=DataType.STRUCT,
|
||||||
|
struct_schema=struct_schema, max_capacity=10)
|
||||||
|
|
||||||
|
# Add BM25 function
|
||||||
bm25_function = Function(
|
bm25_function = Function(
|
||||||
name=f"text",
|
name=f"text",
|
||||||
function_type=FunctionType.BM25,
|
function_type=FunctionType.BM25,
|
||||||
@ -871,6 +887,7 @@ def gen_all_datatype_collection_schema(description=ct.default_desc, primary_fiel
|
|||||||
params={},
|
params={},
|
||||||
)
|
)
|
||||||
schema.add_function(bm25_function)
|
schema.add_function(bm25_function)
|
||||||
|
|
||||||
return schema
|
return schema
|
||||||
|
|
||||||
|
|
||||||
@ -1779,16 +1796,44 @@ def get_column_data_by_schema(nb=ct.default_nb, schema=None, skip_vectors=False,
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def convert_orm_schema_to_dict_schema(orm_schema):
|
||||||
|
"""
|
||||||
|
Convert ORM CollectionSchema object to dict format (same as describe_collection output).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
orm_schema: CollectionSchema object from pymilvus.orm
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Schema in dict format compatible with MilvusClient describe_collection output
|
||||||
|
"""
|
||||||
|
# Use the built-in to_dict() method which already provides the right structure
|
||||||
|
schema_dict = orm_schema.to_dict()
|
||||||
|
|
||||||
|
# to_dict() already includes:
|
||||||
|
# - auto_id
|
||||||
|
# - description
|
||||||
|
# - fields (with each field's to_dict())
|
||||||
|
# - enable_dynamic_field
|
||||||
|
# - functions (if present)
|
||||||
|
# - struct_fields (if present)
|
||||||
|
|
||||||
|
return schema_dict
|
||||||
|
|
||||||
|
|
||||||
def gen_row_data_by_schema(nb=ct.default_nb, schema=None, start=0, random_pk=False, skip_field_names=[], desired_field_names=[]):
|
def gen_row_data_by_schema(nb=ct.default_nb, schema=None, start=0, random_pk=False, skip_field_names=[], desired_field_names=[]):
|
||||||
"""
|
"""
|
||||||
Generates row data based on the given schema.
|
Generates row data based on the given schema.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
nb (int): Number of rows to generate. Defaults to ct.default_nb.
|
nb (int): Number of rows to generate. Defaults to ct.default_nb.
|
||||||
schema (Schema): Collection schema or collection info. If None, uses default schema.
|
schema (Schema): Collection schema or collection info. Can be:
|
||||||
|
- dict (from client.describe_collection())
|
||||||
|
- CollectionSchema object (from ORM)
|
||||||
|
- None (uses default schema)
|
||||||
start (int): Starting value for primary key fields. Defaults to 0.
|
start (int): Starting value for primary key fields. Defaults to 0.
|
||||||
random_pk (bool, optional): Whether to generate random primary key values (default: False)
|
random_pk (bool, optional): Whether to generate random primary key values (default: False)
|
||||||
skip_field_names(list, optional): whether to skip some field to gen data manually (default: [])
|
skip_field_names(list, optional): whether to skip some field to gen data manually (default: [])
|
||||||
|
desired_field_names(list, optional): only generate data for specified field names (default: [])
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
list[dict]: List of dictionaries where each dictionary represents a row,
|
list[dict]: List of dictionaries where each dictionary represents a row,
|
||||||
@ -1798,6 +1843,7 @@ def gen_row_data_by_schema(nb=ct.default_nb, schema=None, start=0, random_pk=Fal
|
|||||||
- Skips auto_id fields and function output fields.
|
- Skips auto_id fields and function output fields.
|
||||||
- For primary key fields, generates sequential values starting from 'start'.
|
- For primary key fields, generates sequential values starting from 'start'.
|
||||||
- For non-primary fields, generates random data based on field type.
|
- For non-primary fields, generates random data based on field type.
|
||||||
|
- Supports struct array fields in both dict and ORM schema formats.
|
||||||
"""
|
"""
|
||||||
# if both skip_field_names and desired_field_names are specified, raise an exception
|
# if both skip_field_names and desired_field_names are specified, raise an exception
|
||||||
if skip_field_names and desired_field_names:
|
if skip_field_names and desired_field_names:
|
||||||
@ -1806,10 +1852,12 @@ def gen_row_data_by_schema(nb=ct.default_nb, schema=None, start=0, random_pk=Fal
|
|||||||
if schema is None:
|
if schema is None:
|
||||||
schema = gen_default_collection_schema()
|
schema = gen_default_collection_schema()
|
||||||
|
|
||||||
# ignore auto id field and the fields in function output
|
# Convert ORM schema to dict schema for unified processing
|
||||||
func_output_fields = []
|
if not isinstance(schema, dict):
|
||||||
if isinstance(schema, dict):
|
schema = convert_orm_schema_to_dict_schema(schema)
|
||||||
# a dict of collection schema info is usually from client.describe_collection()
|
|
||||||
|
# Now schema is always a dict after conversion, process it uniformly
|
||||||
|
# Get all fields from schema
|
||||||
all_fields = schema.get('fields', [])
|
all_fields = schema.get('fields', [])
|
||||||
fields = []
|
fields = []
|
||||||
for field in all_fields:
|
for field in all_fields:
|
||||||
@ -1820,12 +1868,44 @@ def gen_row_data_by_schema(nb=ct.default_nb, schema=None, start=0, random_pk=Fal
|
|||||||
elif not desired_field_names:
|
elif not desired_field_names:
|
||||||
fields.append(field)
|
fields.append(field)
|
||||||
|
|
||||||
|
# Get struct_fields from schema
|
||||||
|
struct_fields = schema.get('struct_fields', [])
|
||||||
|
log.debug(f"[gen_row_data_by_schema] struct_fields from schema: {len(struct_fields)} items")
|
||||||
|
if struct_fields:
|
||||||
|
log.debug(f"[gen_row_data_by_schema] First struct_field: {struct_fields[0]}")
|
||||||
|
|
||||||
|
# If struct_fields is not present, extract struct array fields from fields list
|
||||||
|
# This happens when using client.describe_collection()
|
||||||
|
if not struct_fields:
|
||||||
|
struct_fields = []
|
||||||
|
for field in fields:
|
||||||
|
if field.get('type') == DataType.ARRAY and field.get('element_type') == DataType.STRUCT:
|
||||||
|
# Convert field format to struct_field format
|
||||||
|
struct_field_dict = {
|
||||||
|
'name': field.get('name'),
|
||||||
|
'max_capacity': field.get('params', {}).get('max_capacity', 100),
|
||||||
|
'fields': []
|
||||||
|
}
|
||||||
|
# Get struct fields from field - key can be 'struct_fields' or 'struct_schema'
|
||||||
|
struct_field_list = field.get('struct_fields') or field.get('struct_schema')
|
||||||
|
if struct_field_list:
|
||||||
|
# If it's a dict with 'fields' key, get the fields
|
||||||
|
if isinstance(struct_field_list, dict) and 'fields' in struct_field_list:
|
||||||
|
struct_field_dict['fields'] = struct_field_list['fields']
|
||||||
|
# If it's already a list, use it directly
|
||||||
|
elif isinstance(struct_field_list, list):
|
||||||
|
struct_field_dict['fields'] = struct_field_list
|
||||||
|
struct_fields.append(struct_field_dict)
|
||||||
|
|
||||||
|
# Get function output fields to skip
|
||||||
|
func_output_fields = []
|
||||||
functions = schema.get('functions', [])
|
functions = schema.get('functions', [])
|
||||||
for func in functions:
|
for func in functions:
|
||||||
output_field_names = func.get('output_field_names', [])
|
output_field_names = func.get('output_field_names', [])
|
||||||
func_output_fields.extend(output_field_names)
|
func_output_fields.extend(output_field_names)
|
||||||
func_output_fields = list(set(func_output_fields))
|
func_output_fields = list(set(func_output_fields))
|
||||||
|
|
||||||
|
# Filter fields that need data generation
|
||||||
fields_needs_data = []
|
fields_needs_data = []
|
||||||
for field in fields:
|
for field in fields:
|
||||||
field_name = field.get('name', None)
|
field_name = field.get('name', None)
|
||||||
@ -1833,57 +1913,35 @@ def gen_row_data_by_schema(nb=ct.default_nb, schema=None, start=0, random_pk=Fal
|
|||||||
continue
|
continue
|
||||||
if field_name in func_output_fields or field_name in skip_field_names:
|
if field_name in func_output_fields or field_name in skip_field_names:
|
||||||
continue
|
continue
|
||||||
|
# Skip struct array fields as they are handled separately via struct_fields
|
||||||
|
if field.get('type') == DataType.ARRAY and field.get('element_type') == DataType.STRUCT:
|
||||||
|
continue
|
||||||
fields_needs_data.append(field)
|
fields_needs_data.append(field)
|
||||||
|
|
||||||
|
# Generate data for each row
|
||||||
data = []
|
data = []
|
||||||
for i in range(nb):
|
for i in range(nb):
|
||||||
tmp = {}
|
tmp = {}
|
||||||
|
# Generate data for regular fields
|
||||||
for field in fields_needs_data:
|
for field in fields_needs_data:
|
||||||
tmp[field.get('name', None)] = gen_data_by_collection_field(field, random_pk=random_pk)
|
tmp[field.get('name', None)] = gen_data_by_collection_field(field, random_pk=random_pk)
|
||||||
|
# Handle primary key fields specially
|
||||||
if field.get('is_primary', False) is True and field.get('type', None) == DataType.INT64:
|
if field.get('is_primary', False) is True and field.get('type', None) == DataType.INT64:
|
||||||
tmp[field.get('name', None)] = start
|
tmp[field.get('name', None)] = start
|
||||||
start += 1
|
start += 1
|
||||||
if field.get('is_primary', False) is True and field.get('type', None) == DataType.VARCHAR:
|
if field.get('is_primary', False) is True and field.get('type', None) == DataType.VARCHAR:
|
||||||
tmp[field.get('name', None)] = str(start)
|
tmp[field.get('name', None)] = str(start)
|
||||||
start += 1
|
start += 1
|
||||||
data.append(tmp)
|
|
||||||
else:
|
|
||||||
# a schema object is usually form orm schema object
|
|
||||||
all_fields = schema.fields
|
|
||||||
fields = []
|
|
||||||
for field in all_fields:
|
|
||||||
# if desired_field_names is specified, only generate the fields in desired_field_names
|
|
||||||
if field.name in desired_field_names:
|
|
||||||
fields.append(field)
|
|
||||||
# elif desired_field_names is not specified, generate all fields
|
|
||||||
elif not desired_field_names:
|
|
||||||
fields.append(field)
|
|
||||||
|
|
||||||
if hasattr(schema, "functions"):
|
# Generate data for struct array fields
|
||||||
functions = schema.functions
|
for struct_field in struct_fields:
|
||||||
for func in functions:
|
field_name = struct_field.get('name', None)
|
||||||
output_field_names = func.output_field_names
|
struct_data = gen_struct_array_data(struct_field, start=start, random_pk=random_pk)
|
||||||
func_output_fields.extend(output_field_names)
|
tmp[field_name] = struct_data
|
||||||
func_output_fields = list(set(func_output_fields))
|
|
||||||
|
|
||||||
fields_needs_data = []
|
|
||||||
for field in fields:
|
|
||||||
if field.auto_id:
|
|
||||||
continue
|
|
||||||
if field.name in func_output_fields or field.name in skip_field_names:
|
|
||||||
continue
|
|
||||||
fields_needs_data.append(field)
|
|
||||||
data = []
|
|
||||||
for i in range(nb):
|
|
||||||
tmp = {}
|
|
||||||
for field in fields_needs_data:
|
|
||||||
tmp[field.name] = gen_data_by_collection_field(field, random_pk=random_pk)
|
|
||||||
if field.is_primary is True and field.dtype == DataType.INT64:
|
|
||||||
tmp[field.name] = start
|
|
||||||
start += 1
|
|
||||||
if field.is_primary is True and field.dtype == DataType.VARCHAR:
|
|
||||||
tmp[field.name] = str(start)
|
|
||||||
start += 1
|
|
||||||
data.append(tmp)
|
data.append(tmp)
|
||||||
|
|
||||||
|
log.debug(f"[gen_row_data_by_schema] Generated {len(data)} rows, first row keys: {list(data[0].keys()) if data else []}")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
@ -2035,6 +2093,17 @@ def get_int8_vec_field_name_list(schema=None):
|
|||||||
vec_fields.append(field.name)
|
vec_fields.append(field.name)
|
||||||
return vec_fields
|
return vec_fields
|
||||||
|
|
||||||
|
def get_emb_list_field_name_list(schema=None):
|
||||||
|
vec_fields = []
|
||||||
|
if schema is None:
|
||||||
|
schema = gen_default_collection_schema()
|
||||||
|
struct_fields = schema.struct_fields
|
||||||
|
for struct_field in struct_fields:
|
||||||
|
for field in struct_field.fields:
|
||||||
|
if field.dtype in [DataType.FLOAT_VECTOR]:
|
||||||
|
vec_fields.append(f"{struct_field.name}[{field.name}]")
|
||||||
|
return vec_fields
|
||||||
|
|
||||||
def get_bm25_vec_field_name_list(schema=None):
|
def get_bm25_vec_field_name_list(schema=None):
|
||||||
if not hasattr(schema, "functions"):
|
if not hasattr(schema, "functions"):
|
||||||
return []
|
return []
|
||||||
@ -2072,6 +2141,40 @@ def get_dense_anns_field_name_list(schema=None):
|
|||||||
anns_fields.append(item)
|
anns_fields.append(item)
|
||||||
return anns_fields
|
return anns_fields
|
||||||
|
|
||||||
|
def get_struct_array_vector_field_list(schema=None):
|
||||||
|
if schema is None:
|
||||||
|
schema = gen_default_collection_schema()
|
||||||
|
|
||||||
|
struct_fields = schema.struct_fields
|
||||||
|
struct_vector_fields = []
|
||||||
|
|
||||||
|
for struct_field in struct_fields:
|
||||||
|
struct_field_name = struct_field.name
|
||||||
|
# Check each sub-field for vector types
|
||||||
|
for sub_field in struct_field.fields:
|
||||||
|
sub_field_name = sub_field.name if hasattr(sub_field, 'name') else sub_field.get('name')
|
||||||
|
sub_field_dtype = sub_field.dtype if hasattr(sub_field, 'dtype') else sub_field.get('type')
|
||||||
|
|
||||||
|
if sub_field_dtype in [DataType.FLOAT_VECTOR, DataType.FLOAT16_VECTOR,
|
||||||
|
DataType.BFLOAT16_VECTOR, DataType.INT8_VECTOR,
|
||||||
|
DataType.BINARY_VECTOR]:
|
||||||
|
# Get dimension
|
||||||
|
if hasattr(sub_field, 'params'):
|
||||||
|
dim = sub_field.params.get('dim')
|
||||||
|
else:
|
||||||
|
dim = sub_field.get('params', {}).get('dim')
|
||||||
|
|
||||||
|
item = {
|
||||||
|
"struct_field": struct_field_name,
|
||||||
|
"vector_field": sub_field_name,
|
||||||
|
"anns_field": f"{struct_field_name}[{sub_field_name}]",
|
||||||
|
"dtype": sub_field_dtype,
|
||||||
|
"dim": dim
|
||||||
|
}
|
||||||
|
struct_vector_fields.append(item)
|
||||||
|
|
||||||
|
return struct_vector_fields
|
||||||
|
|
||||||
|
|
||||||
def gen_varchar_data(length: int, nb: int, text_mode=False):
|
def gen_varchar_data(length: int, nb: int, text_mode=False):
|
||||||
if text_mode:
|
if text_mode:
|
||||||
@ -2080,6 +2183,38 @@ def gen_varchar_data(length: int, nb: int, text_mode=False):
|
|||||||
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(nb)]
|
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(nb)]
|
||||||
|
|
||||||
|
|
||||||
|
def gen_struct_array_data(struct_field, start=0, random_pk=False):
|
||||||
|
"""
|
||||||
|
Generates struct array data based on the struct field schema.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
struct_field: Either a dict (from dict schema) or StructFieldSchema object (from ORM schema)
|
||||||
|
start: Starting value for primary key fields
|
||||||
|
random_pk: Whether to generate random primary key values
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of struct data dictionaries
|
||||||
|
"""
|
||||||
|
struct_array_data = []
|
||||||
|
|
||||||
|
# Handle both dict and object formats
|
||||||
|
if isinstance(struct_field, dict):
|
||||||
|
max_capacity = struct_field.get('max_capacity', 100)
|
||||||
|
fields = struct_field.get('fields', [])
|
||||||
|
else:
|
||||||
|
# StructFieldSchema object
|
||||||
|
max_capacity = getattr(struct_field, 'max_capacity', 100) or 100
|
||||||
|
fields = struct_field.fields
|
||||||
|
|
||||||
|
arr_len = random.randint(1, max_capacity)
|
||||||
|
for _ in range(arr_len):
|
||||||
|
struct_data = {}
|
||||||
|
for field in fields:
|
||||||
|
field_name = field.get('name') if isinstance(field, dict) else field.name
|
||||||
|
struct_data[field_name] = gen_data_by_collection_field(field, nb=None, start=start, random_pk=random_pk)
|
||||||
|
struct_array_data.append(struct_data)
|
||||||
|
return struct_array_data
|
||||||
|
|
||||||
def gen_data_by_collection_field(field, nb=None, start=0, random_pk=False):
|
def gen_data_by_collection_field(field, nb=None, start=0, random_pk=False):
|
||||||
"""
|
"""
|
||||||
Generates test data for a given collection field based on its data type and properties.
|
Generates test data for a given collection field based on its data type and properties.
|
||||||
@ -2105,7 +2240,8 @@ def gen_data_by_collection_field(field, nb=None, start=0, random_pk=False):
|
|||||||
# for v2 client, it accepts a dict of field info
|
# for v2 client, it accepts a dict of field info
|
||||||
nullable = field.get('nullable', False)
|
nullable = field.get('nullable', False)
|
||||||
data_type = field.get('type', None)
|
data_type = field.get('type', None)
|
||||||
enable_analyzer = field.get('params').get("enable_analyzer", False)
|
params = field.get('params', {}) or {}
|
||||||
|
enable_analyzer = params.get("enable_analyzer", False)
|
||||||
is_primary = field.get('is_primary', False)
|
is_primary = field.get('is_primary', False)
|
||||||
else:
|
else:
|
||||||
# for ORM client, it accepts a field object
|
# for ORM client, it accepts a field object
|
||||||
@ -2224,9 +2360,16 @@ def gen_data_by_collection_field(field, nb=None, start=0, random_pk=False):
|
|||||||
elif data_type == DataType.ARRAY:
|
elif data_type == DataType.ARRAY:
|
||||||
if isinstance(field, dict):
|
if isinstance(field, dict):
|
||||||
max_capacity = field.get('params')['max_capacity']
|
max_capacity = field.get('params')['max_capacity']
|
||||||
|
element_type = field.get('element_type')
|
||||||
else:
|
else:
|
||||||
max_capacity = field.params['max_capacity']
|
max_capacity = field.params['max_capacity']
|
||||||
element_type = field.element_type
|
element_type = field.element_type
|
||||||
|
|
||||||
|
# Struct array fields are handled separately in gen_row_data_by_schema
|
||||||
|
# by processing struct_fields, so skip here
|
||||||
|
if element_type == DataType.STRUCT:
|
||||||
|
return None
|
||||||
|
|
||||||
if element_type == DataType.INT8:
|
if element_type == DataType.INT8:
|
||||||
if nb is None:
|
if nb is None:
|
||||||
return [random.randint(-128, 127) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
|
return [random.randint(-128, 127) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
|
||||||
|
|||||||
@ -12,6 +12,7 @@ default_dim = 128
|
|||||||
default_nb = 2000
|
default_nb = 2000
|
||||||
default_nb_medium = 5000
|
default_nb_medium = 5000
|
||||||
default_max_capacity = 100
|
default_max_capacity = 100
|
||||||
|
default_max_length = 500
|
||||||
default_top_k = 10
|
default_top_k = 10
|
||||||
default_nq = 2
|
default_nq = 2
|
||||||
default_limit = 10
|
default_limit = 10
|
||||||
|
|||||||
@ -111,7 +111,7 @@ class TestMilvusClientPartialUpdateValid(TestMilvusClientV2Base):
|
|||||||
"""
|
"""
|
||||||
# step 1: create collection with all datatype schema
|
# step 1: create collection with all datatype schema
|
||||||
client = self._client()
|
client = self._client()
|
||||||
schema = cf.gen_all_datatype_collection_schema(dim=default_dim)
|
schema = cf.gen_all_datatype_collection_schema(dim=default_dim, enable_struct_array_field=False)
|
||||||
index_params = self.prepare_index_params(client)[0]
|
index_params = self.prepare_index_params(client)[0]
|
||||||
text_sparse_emb_field_name = "text_sparse_emb"
|
text_sparse_emb_field_name = "text_sparse_emb"
|
||||||
|
|
||||||
@ -201,7 +201,7 @@ class TestMilvusClientPartialUpdateValid(TestMilvusClientV2Base):
|
|||||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||||
|
|
||||||
# Create schema with all data types
|
# Create schema with all data types
|
||||||
schema = cf.gen_all_datatype_collection_schema(dim=dim)
|
schema = cf.gen_all_datatype_collection_schema(dim=dim, enable_struct_array_field=False)
|
||||||
|
|
||||||
# Create index parameters
|
# Create index parameters
|
||||||
index_params = client.prepare_index_params()
|
index_params = client.prepare_index_params()
|
||||||
|
|||||||
@ -2659,7 +2659,7 @@ class TestMilvusClientStructArrayCRUD(TestMilvusClientV2Base):
|
|||||||
def test_upsert_struct_array_data(self):
|
def test_upsert_struct_array_data(self):
|
||||||
"""
|
"""
|
||||||
target: test upsert operation with struct array data
|
target: test upsert operation with struct array data
|
||||||
method: insert data then upsert with modified struct array
|
method: insert 3000 records, flush 2000, insert 1000 growing, then upsert with modified struct array
|
||||||
expected: data successfully upserted
|
expected: data successfully upserted
|
||||||
"""
|
"""
|
||||||
collection_name = cf.gen_unique_str(f"{prefix}_crud")
|
collection_name = cf.gen_unique_str(f"{prefix}_crud")
|
||||||
@ -2669,25 +2669,50 @@ class TestMilvusClientStructArrayCRUD(TestMilvusClientV2Base):
|
|||||||
# Create collection
|
# Create collection
|
||||||
self.create_collection_with_schema(client, collection_name)
|
self.create_collection_with_schema(client, collection_name)
|
||||||
|
|
||||||
# Initial insert
|
# Insert 2000 records for flushed data
|
||||||
initial_data = [
|
flushed_data = []
|
||||||
{
|
for i in range(2000):
|
||||||
"id": 1,
|
row = {
|
||||||
|
"id": i,
|
||||||
"normal_vector": [random.random() for _ in range(default_dim)],
|
"normal_vector": [random.random() for _ in range(default_dim)],
|
||||||
"clips": [
|
"clips": [
|
||||||
{
|
{
|
||||||
"clip_embedding1": [
|
"clip_embedding1": [random.random() for _ in range(default_dim)],
|
||||||
random.random() for _ in range(default_dim)
|
"scalar_field": i,
|
||||||
],
|
"label": f"flushed_{i}",
|
||||||
"scalar_field": 100,
|
|
||||||
"label": "initial",
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
]
|
flushed_data.append(row)
|
||||||
|
|
||||||
res, check = self.insert(client, collection_name, initial_data)
|
res, check = self.insert(client, collection_name, flushed_data)
|
||||||
assert check
|
assert check
|
||||||
|
assert res["insert_count"] == 2000
|
||||||
|
|
||||||
|
# Flush to persist data
|
||||||
|
res, check = self.flush(client, collection_name)
|
||||||
|
assert check
|
||||||
|
|
||||||
|
# Insert 1000 records for growing data
|
||||||
|
growing_data = []
|
||||||
|
for i in range(2000, 3000):
|
||||||
|
row = {
|
||||||
|
"id": i,
|
||||||
|
"normal_vector": [random.random() for _ in range(default_dim)],
|
||||||
|
"clips": [
|
||||||
|
{
|
||||||
|
"clip_embedding1": [random.random() for _ in range(default_dim)],
|
||||||
|
"scalar_field": i,
|
||||||
|
"label": f"growing_{i}",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
growing_data.append(row)
|
||||||
|
|
||||||
|
res, check = self.insert(client, collection_name, growing_data)
|
||||||
|
assert check
|
||||||
|
assert res["insert_count"] == 1000
|
||||||
|
|
||||||
# create index and load collection
|
# create index and load collection
|
||||||
index_params = client.prepare_index_params()
|
index_params = client.prepare_index_params()
|
||||||
index_params.add_index(
|
index_params.add_index(
|
||||||
@ -2707,40 +2732,63 @@ class TestMilvusClientStructArrayCRUD(TestMilvusClientV2Base):
|
|||||||
res, check = self.load_collection(client, collection_name)
|
res, check = self.load_collection(client, collection_name)
|
||||||
assert check
|
assert check
|
||||||
|
|
||||||
# Upsert with modified data
|
# Upsert data in both flushed and growing segments
|
||||||
upsert_data = [
|
upsert_data = []
|
||||||
{
|
# Upsert 10 records from flushed data
|
||||||
"id": 1, # Same ID
|
for i in range(0, 10):
|
||||||
|
row = {
|
||||||
|
"id": i,
|
||||||
"normal_vector": [random.random() for _ in range(default_dim)],
|
"normal_vector": [random.random() for _ in range(default_dim)],
|
||||||
"clips": [
|
"clips": [
|
||||||
{
|
{
|
||||||
"clip_embedding1": [
|
"clip_embedding1": [random.random() for _ in range(default_dim)],
|
||||||
random.random() for _ in range(default_dim)
|
"scalar_field": i + 10000, # Modified
|
||||||
],
|
"label": f"updated_flushed_{i}", # Modified
|
||||||
"scalar_field": 200, # Modified
|
|
||||||
"label": "updated", # Modified
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
]
|
upsert_data.append(row)
|
||||||
|
|
||||||
|
# Upsert 10 records from growing data
|
||||||
|
for i in range(2000, 2010):
|
||||||
|
row = {
|
||||||
|
"id": i,
|
||||||
|
"normal_vector": [random.random() for _ in range(default_dim)],
|
||||||
|
"clips": [
|
||||||
|
{
|
||||||
|
"clip_embedding1": [random.random() for _ in range(default_dim)],
|
||||||
|
"scalar_field": i + 10000, # Modified
|
||||||
|
"label": f"updated_growing_{i}", # Modified
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
upsert_data.append(row)
|
||||||
|
|
||||||
res, check = self.upsert(client, collection_name, upsert_data)
|
res, check = self.upsert(client, collection_name, upsert_data)
|
||||||
assert check
|
assert check
|
||||||
|
|
||||||
# Verify upsert worked
|
# Verify upsert worked for flushed data
|
||||||
res, check = self.flush(client, collection_name)
|
res, check = self.flush(client, collection_name)
|
||||||
assert check
|
assert check
|
||||||
|
|
||||||
results, check = self.query(client, collection_name, filter="id == 1")
|
results, check = self.query(client, collection_name, filter="id < 10")
|
||||||
assert check
|
assert check
|
||||||
assert len(results) == 1
|
assert len(results) == 10
|
||||||
assert results[0]["clips"][0]["label"] == "updated"
|
for result in results:
|
||||||
|
assert "updated_flushed" in result["clips"][0]["label"]
|
||||||
|
|
||||||
|
# Verify upsert worked for growing data
|
||||||
|
results, check = self.query(client, collection_name, filter="id >= 2000 and id < 2010")
|
||||||
|
assert check
|
||||||
|
assert len(results) == 10
|
||||||
|
for result in results:
|
||||||
|
assert "updated_growing" in result["clips"][0]["label"]
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L0)
|
@pytest.mark.tags(CaseLabel.L0)
|
||||||
def test_delete_struct_array_data(self):
|
def test_delete_struct_array_data(self):
|
||||||
"""
|
"""
|
||||||
target: test delete operation with struct array data
|
target: test delete operation with struct array data
|
||||||
method: insert struct array data then delete by ID
|
method: insert 3000 records (2000 flushed + 1000 growing), then delete by ID from both segments
|
||||||
expected: data successfully deleted
|
expected: data successfully deleted
|
||||||
"""
|
"""
|
||||||
collection_name = cf.gen_unique_str(f"{prefix}_crud")
|
collection_name = cf.gen_unique_str(f"{prefix}_crud")
|
||||||
@ -2750,25 +2798,50 @@ class TestMilvusClientStructArrayCRUD(TestMilvusClientV2Base):
|
|||||||
# Create collection and insert data
|
# Create collection and insert data
|
||||||
self.create_collection_with_schema(client, collection_name)
|
self.create_collection_with_schema(client, collection_name)
|
||||||
|
|
||||||
data = []
|
# Insert 2000 records for flushed data
|
||||||
for i in range(10):
|
flushed_data = []
|
||||||
|
for i in range(2000):
|
||||||
row = {
|
row = {
|
||||||
"id": i,
|
"id": i,
|
||||||
"normal_vector": [random.random() for _ in range(default_dim)],
|
"normal_vector": [random.random() for _ in range(default_dim)],
|
||||||
"clips": [
|
"clips": [
|
||||||
{
|
{
|
||||||
"clip_embedding1": [
|
"clip_embedding1": [random.random() for _ in range(default_dim)],
|
||||||
random.random() for _ in range(default_dim)
|
|
||||||
],
|
|
||||||
"scalar_field": i,
|
"scalar_field": i,
|
||||||
"label": f"label_{i}",
|
"label": f"flushed_{i}",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
data.append(row)
|
flushed_data.append(row)
|
||||||
|
|
||||||
res, check = self.insert(client, collection_name, data)
|
res, check = self.insert(client, collection_name, flushed_data)
|
||||||
assert check
|
assert check
|
||||||
|
assert res["insert_count"] == 2000
|
||||||
|
|
||||||
|
# Flush to persist data
|
||||||
|
res, check = self.flush(client, collection_name)
|
||||||
|
assert check
|
||||||
|
|
||||||
|
# Insert 1000 records for growing data
|
||||||
|
growing_data = []
|
||||||
|
for i in range(2000, 3000):
|
||||||
|
row = {
|
||||||
|
"id": i,
|
||||||
|
"normal_vector": [random.random() for _ in range(default_dim)],
|
||||||
|
"clips": [
|
||||||
|
{
|
||||||
|
"clip_embedding1": [random.random() for _ in range(default_dim)],
|
||||||
|
"scalar_field": i,
|
||||||
|
"label": f"growing_{i}",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
growing_data.append(row)
|
||||||
|
|
||||||
|
res, check = self.insert(client, collection_name, growing_data)
|
||||||
|
assert check
|
||||||
|
assert res["insert_count"] == 1000
|
||||||
|
|
||||||
# create index and load collection
|
# create index and load collection
|
||||||
index_params = client.prepare_index_params()
|
index_params = client.prepare_index_params()
|
||||||
index_params.add_index(
|
index_params.add_index(
|
||||||
@ -2788,9 +2861,14 @@ class TestMilvusClientStructArrayCRUD(TestMilvusClientV2Base):
|
|||||||
res, check = self.load_collection(client, collection_name)
|
res, check = self.load_collection(client, collection_name)
|
||||||
assert check
|
assert check
|
||||||
|
|
||||||
# Delete some records
|
# Delete some records from flushed segment
|
||||||
delete_ids = [1, 3, 5]
|
delete_flushed_ids = [1, 3, 5, 100, 500, 1000]
|
||||||
res, check = self.delete(client, collection_name, filter=f"id in {delete_ids}")
|
res, check = self.delete(client, collection_name, filter=f"id in {delete_flushed_ids}")
|
||||||
|
assert check
|
||||||
|
|
||||||
|
# Delete some records from growing segment
|
||||||
|
delete_growing_ids = [2001, 2003, 2500, 2999]
|
||||||
|
res, check = self.delete(client, collection_name, filter=f"id in {delete_growing_ids}")
|
||||||
assert check
|
assert check
|
||||||
|
|
||||||
# Verify deletion
|
# Verify deletion
|
||||||
@ -2801,14 +2879,21 @@ class TestMilvusClientStructArrayCRUD(TestMilvusClientV2Base):
|
|||||||
assert check
|
assert check
|
||||||
|
|
||||||
remaining_ids = {result["id"] for result in results}
|
remaining_ids = {result["id"] for result in results}
|
||||||
for delete_id in delete_ids:
|
# Verify flushed data deletion
|
||||||
|
for delete_id in delete_flushed_ids:
|
||||||
assert delete_id not in remaining_ids
|
assert delete_id not in remaining_ids
|
||||||
|
# Verify growing data deletion
|
||||||
|
for delete_id in delete_growing_ids:
|
||||||
|
assert delete_id not in remaining_ids
|
||||||
|
|
||||||
|
# Verify total count is correct (3000 - 10 deleted)
|
||||||
|
assert len(results) == 2990
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
def test_batch_operations(self):
|
def test_batch_operations(self):
|
||||||
"""
|
"""
|
||||||
target: test batch insert/upsert operations with struct array
|
target: test batch insert/upsert operations with struct array
|
||||||
method: perform large batch operations
|
method: insert 3000 records (2000 flushed + 1000 growing), then perform batch upsert
|
||||||
expected: all operations successful
|
expected: all operations successful
|
||||||
"""
|
"""
|
||||||
collection_name = cf.gen_unique_str(f"{prefix}_crud")
|
collection_name = cf.gen_unique_str(f"{prefix}_crud")
|
||||||
@ -2818,42 +2903,77 @@ class TestMilvusClientStructArrayCRUD(TestMilvusClientV2Base):
|
|||||||
# Create collection
|
# Create collection
|
||||||
self.create_collection_with_schema(client, collection_name)
|
self.create_collection_with_schema(client, collection_name)
|
||||||
|
|
||||||
# Large batch insert
|
# Insert 2000 records for flushed data
|
||||||
batch_size = 1000
|
flushed_data = []
|
||||||
data = []
|
for i in range(2000):
|
||||||
for i in range(batch_size):
|
|
||||||
row = {
|
row = {
|
||||||
"id": i,
|
"id": i,
|
||||||
"normal_vector": [random.random() for _ in range(default_dim)],
|
"normal_vector": [random.random() for _ in range(default_dim)],
|
||||||
"clips": [
|
"clips": [
|
||||||
{
|
{
|
||||||
"clip_embedding1": [
|
"clip_embedding1": [random.random() for _ in range(default_dim)],
|
||||||
random.random() for _ in range(default_dim)
|
|
||||||
],
|
|
||||||
"scalar_field": i % 100,
|
"scalar_field": i % 100,
|
||||||
"label": f"batch_{i}",
|
"label": f"flushed_{i}",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
data.append(row)
|
flushed_data.append(row)
|
||||||
|
|
||||||
res, check = self.insert(client, collection_name, data)
|
res, check = self.insert(client, collection_name, flushed_data)
|
||||||
assert check
|
assert check
|
||||||
assert res["insert_count"] == batch_size
|
assert res["insert_count"] == 2000
|
||||||
|
|
||||||
# Batch upsert (update first 100 records)
|
# Flush to persist data
|
||||||
|
res, check = self.flush(client, collection_name)
|
||||||
|
assert check
|
||||||
|
|
||||||
|
# Insert 1000 records for growing data
|
||||||
|
growing_data = []
|
||||||
|
for i in range(2000, 3000):
|
||||||
|
row = {
|
||||||
|
"id": i,
|
||||||
|
"normal_vector": [random.random() for _ in range(default_dim)],
|
||||||
|
"clips": [
|
||||||
|
{
|
||||||
|
"clip_embedding1": [random.random() for _ in range(default_dim)],
|
||||||
|
"scalar_field": i % 100,
|
||||||
|
"label": f"growing_{i}",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
growing_data.append(row)
|
||||||
|
|
||||||
|
res, check = self.insert(client, collection_name, growing_data)
|
||||||
|
assert check
|
||||||
|
assert res["insert_count"] == 1000
|
||||||
|
|
||||||
|
# Batch upsert (update first 100 flushed records and 50 growing records)
|
||||||
upsert_data = []
|
upsert_data = []
|
||||||
|
# Update first 100 flushed records
|
||||||
for i in range(100):
|
for i in range(100):
|
||||||
row = {
|
row = {
|
||||||
"id": i,
|
"id": i,
|
||||||
"normal_vector": [random.random() for _ in range(default_dim)],
|
"normal_vector": [random.random() for _ in range(default_dim)],
|
||||||
"clips": [
|
"clips": [
|
||||||
{
|
{
|
||||||
"clip_embedding1": [
|
"clip_embedding1": [random.random() for _ in range(default_dim)],
|
||||||
random.random() for _ in range(default_dim)
|
|
||||||
],
|
|
||||||
"scalar_field": i + 1000, # Modified
|
"scalar_field": i + 1000, # Modified
|
||||||
"label": f"upserted_{i}", # Modified
|
"label": f"upserted_flushed_{i}", # Modified
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
upsert_data.append(row)
|
||||||
|
|
||||||
|
# Update first 50 growing records
|
||||||
|
for i in range(2000, 2050):
|
||||||
|
row = {
|
||||||
|
"id": i,
|
||||||
|
"normal_vector": [random.random() for _ in range(default_dim)],
|
||||||
|
"clips": [
|
||||||
|
{
|
||||||
|
"clip_embedding1": [random.random() for _ in range(default_dim)],
|
||||||
|
"scalar_field": i + 1000, # Modified
|
||||||
|
"label": f"upserted_growing_{i}", # Modified
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
@ -2862,11 +2982,15 @@ class TestMilvusClientStructArrayCRUD(TestMilvusClientV2Base):
|
|||||||
res, check = self.upsert(client, collection_name, upsert_data)
|
res, check = self.upsert(client, collection_name, upsert_data)
|
||||||
assert check
|
assert check
|
||||||
|
|
||||||
|
# Verify upsert success with flush
|
||||||
|
res, check = self.flush(client, collection_name)
|
||||||
|
assert check
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
def test_collection_operations(self):
|
def test_collection_operations(self):
|
||||||
"""
|
"""
|
||||||
target: test collection operations (load/release/drop) with struct array
|
target: test collection operations (load/release/drop) with struct array
|
||||||
method: perform collection management operations
|
method: insert 3000 records (2000 flushed + 1000 growing), then perform collection management operations
|
||||||
expected: all operations successful
|
expected: all operations successful
|
||||||
"""
|
"""
|
||||||
collection_name = cf.gen_unique_str(f"{prefix}_crud")
|
collection_name = cf.gen_unique_str(f"{prefix}_crud")
|
||||||
@ -2876,25 +3000,49 @@ class TestMilvusClientStructArrayCRUD(TestMilvusClientV2Base):
|
|||||||
# Create collection with data
|
# Create collection with data
|
||||||
self.create_collection_with_schema(client, collection_name)
|
self.create_collection_with_schema(client, collection_name)
|
||||||
|
|
||||||
# Insert some data
|
# Insert 2000 records for flushed data
|
||||||
data = [
|
flushed_data = []
|
||||||
{
|
for i in range(2000):
|
||||||
"id": 1,
|
row = {
|
||||||
|
"id": i,
|
||||||
"normal_vector": [random.random() for _ in range(default_dim)],
|
"normal_vector": [random.random() for _ in range(default_dim)],
|
||||||
"clips": [
|
"clips": [
|
||||||
{
|
{
|
||||||
"clip_embedding1": [
|
"clip_embedding1": [random.random() for _ in range(default_dim)],
|
||||||
random.random() for _ in range(default_dim)
|
"scalar_field": i,
|
||||||
],
|
"label": f"flushed_{i}",
|
||||||
"scalar_field": 100,
|
|
||||||
"label": "test",
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
]
|
flushed_data.append(row)
|
||||||
|
|
||||||
res, check = self.insert(client, collection_name, data)
|
res, check = self.insert(client, collection_name, flushed_data)
|
||||||
assert check
|
assert check
|
||||||
|
assert res["insert_count"] == 2000
|
||||||
|
|
||||||
|
# Flush to persist data
|
||||||
|
res, check = self.flush(client, collection_name)
|
||||||
|
assert check
|
||||||
|
|
||||||
|
# Insert 1000 records for growing data
|
||||||
|
growing_data = []
|
||||||
|
for i in range(2000, 3000):
|
||||||
|
row = {
|
||||||
|
"id": i,
|
||||||
|
"normal_vector": [random.random() for _ in range(default_dim)],
|
||||||
|
"clips": [
|
||||||
|
{
|
||||||
|
"clip_embedding1": [random.random() for _ in range(default_dim)],
|
||||||
|
"scalar_field": i,
|
||||||
|
"label": f"growing_{i}",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
growing_data.append(row)
|
||||||
|
|
||||||
|
res, check = self.insert(client, collection_name, growing_data)
|
||||||
|
assert check
|
||||||
|
assert res["insert_count"] == 1000
|
||||||
|
|
||||||
# Create index for loading
|
# Create index for loading
|
||||||
index_params = client.prepare_index_params()
|
index_params = client.prepare_index_params()
|
||||||
@ -2922,6 +3070,11 @@ class TestMilvusClientStructArrayCRUD(TestMilvusClientV2Base):
|
|||||||
load_state = client.get_load_state(collection_name)
|
load_state = client.get_load_state(collection_name)
|
||||||
assert str(load_state["state"]) == "Loaded"
|
assert str(load_state["state"]) == "Loaded"
|
||||||
|
|
||||||
|
# Query to verify both flushed and growing data are accessible
|
||||||
|
results, check = self.query(client, collection_name, filter="id >= 0", limit=3000)
|
||||||
|
assert check
|
||||||
|
assert len(results) == 3000
|
||||||
|
|
||||||
# Release collection
|
# Release collection
|
||||||
res, check = self.release_collection(client, collection_name)
|
res, check = self.release_collection(client, collection_name)
|
||||||
assert check
|
assert check
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
[pytest]
|
[pytest]
|
||||||
|
|
||||||
|
|
||||||
addopts = --host localhost --html=/tmp/ci_logs/report.html --self-contained-html -v
|
addopts = --host 10.104.31.219 --html=/tmp/ci_logs/report.html --self-contained-html -v --log-cli-level INFO
|
||||||
# python3 -W ignore -m pytest
|
# python3 -W ignore -m pytest
|
||||||
|
|
||||||
log_format = [%(asctime)s - %(levelname)s - %(name)s]: %(message)s (%(filename)s:%(lineno)s)
|
log_format = [%(asctime)s - %(levelname)s - %(name)s]: %(message)s (%(filename)s:%(lineno)s)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user