test: add collection V2 cases for milvus client (#43892)

issue: #43590
Migrate collection test cases from TestcaseBase to
TestMilvusClientV2Base

---------

Signed-off-by: Orpheus Wang <orpheus.wang@zilliz.com>
This commit is contained in:
9Eurydice9 2025-08-16 17:43:43 +08:00 committed by GitHub
parent d1e4243e0b
commit 03295bb351
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 457 additions and 502 deletions

View File

@ -52,7 +52,7 @@ class TestMilvusClientCollectionInvalid(TestMilvusClientV2Base):
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("collection_name", ["12-s", "12 s", "(mn)", "中文", "%$#"])
@pytest.mark.parametrize("collection_name", ["12-s", "12 s", "(mn)", "中文", "%$#", "español", "عربي", "हिंदी", "Русский"])
def test_milvus_client_collection_invalid_collection_name(self, collection_name):
"""
target: test fast create collection with invalid collection name
@ -61,8 +61,12 @@ class TestMilvusClientCollectionInvalid(TestMilvusClientV2Base):
"""
client = self._client()
# 1. create collection
error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {collection_name}. the first character of a "
f"collection name must be an underscore or letter: invalid parameter"}
if collection_name == "español":
expected_msg = "collection name can only contain numbers, letters and underscores"
else:
expected_msg = "the first character of a collection name must be an underscore or letter"
error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {collection_name}. {expected_msg}: invalid parameter"}
self.create_collection(client, collection_name, default_dim,
check_task=CheckTasks.err_res, check_items=error)
@ -332,6 +336,30 @@ class TestMilvusClientCollectionInvalid(TestMilvusClientV2Base):
self.create_collection(client, collection_name, schema=schema,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("invalid_name", ["中文", "español", "عربي", "हिंदी", "Русский", "!@#$%^&*()", "123abc"])
def test_milvus_client_collection_schema_with_invalid_field_name(self, invalid_name):
"""
target: test create collection schema with invalid field names
method: try to create a schema with a field name
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field("id", DataType.INT64, is_primary=True, auto_id=False)
schema.add_field("vector", DataType.FLOAT_VECTOR, dim=default_dim)
# Add a field with an invalid name
schema.add_field(invalid_name, DataType.VARCHAR, max_length=128)
# Determine expected error message based on invalid field name type
if invalid_name == "español":
expected_msg = "Field name can only contain numbers, letters, and underscores."
else:
expected_msg = "The first character of a field name must be an underscore or letter."
error = {ct.err_code: 1701, ct.err_msg: f"Invalid field name: {invalid_name}. {expected_msg}: field name invalid[field={invalid_name}]"}
self.create_collection(client, collection_name, schema=schema,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("keyword", [
"$meta", "like", "exists", "EXISTS", "and", "or", "not", "in",
@ -1547,6 +1575,39 @@ class TestMilvusClientCollectionValid(TestMilvusClientV2Base):
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_create_collection_multithread(self):
"""
target: Test create collection with multi-thread
method: Create collection using multi-thread
expected: Collections are created successfully
"""
client = self._client()
threads_num = 8
threads = []
collection_names = []
def create():
"""Create collection in separate thread"""
collection_name = cf.gen_collection_name_by_testcase_name() + "_" + cf.gen_unique_str()
collection_names.append(collection_name)
self.create_collection(client, collection_name, default_dim)
# Start multiple threads to create collections
for i in range(threads_num):
t = MyThread(target=create, args=())
threads.append(t)
t.start()
time.sleep(0.2)
# Wait for all threads to complete
for t in threads:
t.join()
# Verify all collections were created successfully
collections_list = self.list_collections(client)[0]
for collection_name in collection_names:
assert collection_name in collections_list
# Clean up: drop the created collection
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_create_drop_collection_multithread(self):
"""
@ -1821,6 +1882,37 @@ class TestMilvusClientReleaseCollectionValid(TestMilvusClientV2Base):
self.drop_collection(client, collection_name)
class TestMilvusClientReleaseAdvanced(TestMilvusClientV2Base):
"""
******************************************************************
The following cases are used to test release during search operations
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_release_collection_during_searching(self):
"""
target: test release collection during searching
method: insert entities into collection, flush and load collection, release collection during searching
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
self.create_collection(client, collection_name, default_dim)
self.load_collection(client, collection_name)
load_state = self.get_load_state(client, collection_name)[0]
assert load_state["state"] == LoadState.Loaded, f"Expected Loaded, but got {load_state['state']}"
vectors_to_search = np.random.default_rng(seed=19530).random((1, default_dim))
self.search(client, collection_name, vectors_to_search, limit=default_limit, _async=True)
self.release_collection(client, collection_name)
load_state = self.get_load_state(client, collection_name)[0]
assert load_state["state"] == LoadState.NotLoad, f"Expected NotLoad after release, but got {load_state['state']}"
error = {ct.err_code: 65535, ct.err_msg: "collection not loaded"}
self.search(client, collection_name, vectors_to_search, limit=default_limit,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
class TestMilvusClientLoadCollectionInvalid(TestMilvusClientV2Base):
""" Test case of search interface """
"""
@ -2520,32 +2612,41 @@ class TestMilvusClientLoadCollectionValid(TestMilvusClientV2Base):
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_load_collection_after_index(self):
@pytest.mark.parametrize("vector_type", [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR])
def test_milvus_client_load_collection_after_index(self, vector_type):
"""
target: test load collection, after index created
method: insert and create index, load collection with correct params
target: test load collection after index created
method: insert data and create index, load collection with correct params
expected: no error raised
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# Create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field("id", DataType.INT64, is_primary=True, auto_id=False)
if vector_type == DataType.FLOAT_VECTOR:
schema.add_field("vector", DataType.FLOAT_VECTOR, dim=default_dim)
elif vector_type == DataType.BINARY_VECTOR:
schema.add_field("binary_vector", DataType.BINARY_VECTOR, dim=default_dim)
self.create_collection(client, collection_name, schema=schema, consistency_level="Strong")
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
# Insert data
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema)
self.insert(client, collection_name, rows)
# Prepare and create index
self.flush(client, collection_name)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name="vector", index_type="IVF_SQ8", metric_type="L2")
if vector_type == DataType.FLOAT_VECTOR:
index_params.add_index(field_name="vector", index_type="IVF_SQ8", metric_type="L2")
elif vector_type == DataType.BINARY_VECTOR:
index_params.add_index(field_name="binary_vector", index_type="BIN_IVF_FLAT", metric_type="JACCARD")
self.create_index(client, collection_name, index_params)
# Load and release collection
self.load_collection(client, collection_name)
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_load_collection_after_load_release(self):
"""
@ -2663,6 +2764,81 @@ class TestMilvusClientDescribeCollectionInvalid(TestMilvusClientV2Base):
self.describe_collection(client, collection_name,
check_task=CheckTasks.err_res, check_items=error)
class TestMilvusClientDescribeCollectionValid(TestMilvusClientV2Base):
"""
******************************************************************
The following cases are used to test `describe_collection` function
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_collection_describe(self):
"""
target: test describe collection
method: create a collection and check its information when describe
expected: return correct information
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# Expected description structure
expected_description = {
'collection_name': collection_name,
'auto_id': False,
'num_shards': ct.default_shards_num,
'description': '',
'fields': [
{'field_id': 100, 'name': 'id', 'description': '', 'type': DataType.INT64, 'params': {},
'is_primary': True},
{'field_id': 101, 'name': 'vector', 'description': '', 'type': DataType.FLOAT_VECTOR,
'params': {'dim': default_dim}}
],
'functions': [],
'aliases': [],
'consistency_level': 0,
'properties': {},
'num_partitions': 1,
'enable_dynamic_field': True
}
# Get actual description
res = self.describe_collection(client, collection_name)[0]
# Remove dynamic fields that vary between runs (like V1 test)
assert isinstance(res['collection_id'], int) and isinstance(res['created_timestamp'], int)
del res['collection_id']
del res['created_timestamp']
del res['update_timestamp']
# Exact comparison
assert expected_description == res, f"Description mismatch:\nExpected: {expected_description}\nActual: {res}"
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_collection_describe_nullable_default_value(self):
"""
target: test describe collection with nullable and default_value fields
method: create a collection with nullable and default_value fields, then check its information when describe
expected: return correct information
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# Create collection with nullable and default_value fields
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field("id", DataType.INT64, is_primary=True, auto_id=False)
schema.add_field("float_field", DataType.FLOAT, nullable=True)
schema.add_field("varchar_field", DataType.VARCHAR, max_length=65535, default_value="default_string")
schema.add_field("vector", DataType.FLOAT_VECTOR, dim=default_dim)
self.create_collection(client, collection_name, schema=schema)
# Describe collection and verify nullable and default_value properties
res = self.describe_collection(client, collection_name)[0]
# Check fields for nullable and default_value properties
for field in res["fields"]:
if field["name"] == "float_field":
assert field.get("nullable") is True, f"Expected nullable=True for float_field, got {field.get('nullable')}"
if field["name"] == "varchar_field":
assert field["default_value"].string_data == "default_string", f"Expected 'default_string', got {field['default_value'].string_data}"
self.drop_collection(client, collection_name)
class TestMilvusClientHasCollectionValid(TestMilvusClientV2Base):
""" Test case of has collection interface """
"""
@ -3312,7 +3488,9 @@ class TestMilvusClientCollectionDefaultValueInvalid(TestMilvusClientV2Base):
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_create_collection_non_match_default_value(self):
@pytest.mark.parametrize("default_value", ["abc", 9.09, 1, False])
@pytest.mark.parametrize("field_type", [DataType.INT8, DataType.FLOAT])
def test_milvus_client_create_collection_non_match_default_value(self, default_value, field_type):
"""
target: test create collection with set data type not matched default value
method: create collection with data type not matched default value
@ -3323,11 +3501,27 @@ class TestMilvusClientCollectionDefaultValueInvalid(TestMilvusClientV2Base):
# Create schema with field that has mismatched default value type
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field("id", DataType.INT64, is_primary=True, auto_id=False)
# INT8 field with float default value (type mismatch)
schema.add_field("int8_field", DataType.INT8, default_value=10.0)
schema.add_field("vector", DataType.FLOAT_VECTOR, dim=default_dim)
error = {ct.err_code: 1100,
ct.err_msg: "type (Int8) of field (int8_field) is not equal to the type(DataType_Double) of default_value"}
# Add field with mismatched default value type based on field_type
if field_type == DataType.INT8:
schema.add_field("int8_field", DataType.INT8, default_value=default_value)
field_name = "int8_field"
field_type_str = "Int8"
elif field_type == DataType.FLOAT:
schema.add_field("float_field", DataType.FLOAT, default_value=default_value)
field_name = "float_field"
field_type_str = "Float"
# Determine expected error message based on default_value type
if isinstance(default_value, str):
expected_type = "DataType_VarChar"
elif isinstance(default_value, bool):
expected_type = "DataType_Bool"
elif isinstance(default_value, float):
expected_type = "DataType_Double"
elif isinstance(default_value, int):
expected_type = "DataType_Int64"
error = {ct.err_code: 1100,
ct.err_msg: f"type ({field_type_str}) of field ({field_name}) is not equal to the type({expected_type}) of default_value"}
self.create_collection(client, collection_name, schema=schema,
check_task=CheckTasks.err_res, check_items=error)
@ -3356,6 +3550,28 @@ class TestMilvusClientCollectionDefaultValueInvalid(TestMilvusClientV2Base):
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("default_value", ["abc"])
def test_milvus_client_create_collection_with_invalid_default_value_string(self, default_value):
"""
target: Test create collection with invalid default_value for string field
method: Create collection with string field where default_value exceeds max_length
expected: Raise exception with appropriate error message
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
max_length = 2
# Create schema with string field having default_value longer than max_length
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field("pk", DataType.INT64, is_primary=True)
schema.add_field(ct.default_float_vec_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field("string_field", DataType.VARCHAR, max_length=max_length, default_value=default_value)
error = {ct.err_code: 1100, ct.err_msg: f"the length ({len(default_value)}) of string exceeds max length ({max_length}): "
f"invalid parameter[expected=valid length string][actual=string length exceeds max length]"}
self.create_collection(client, collection_name, schema=schema,
check_task=CheckTasks.err_res, check_items=error)
class TestMilvusClientCollectionDefaultValueValid(TestMilvusClientV2Base):
""" Test case of collection interface """
@ -3408,6 +3624,38 @@ class TestMilvusClientCollectionDefaultValueValid(TestMilvusClientV2Base):
# Clean up: drop the collection
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("auto_id", [True, False])
def test_milvus_client_create_collection_using_default_value(self, auto_id):
"""
target: Test create collection with default_value fields
method: Create a schema with various fields using default values
expected: Collection is created successfully with default values
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
schema = self.create_schema(client, enable_dynamic_field=False, auto_id=auto_id)[0]
schema.add_field("pk", DataType.INT64, is_primary=True)
schema.add_field("vector", DataType.FLOAT_VECTOR, dim=default_dim)
# Add various scalar fields with default values
schema.add_field(ct.default_int8_field_name, DataType.INT8, default_value=numpy.int8(8))
schema.add_field(ct.default_int16_field_name, DataType.INT16, default_value=numpy.int16(16))
schema.add_field(ct.default_int32_field_name, DataType.INT32, default_value=numpy.int32(32))
schema.add_field(ct.default_int64_field_name, DataType.INT64, default_value=numpy.int64(64))
schema.add_field(ct.default_float_field_name, DataType.FLOAT, default_value=numpy.float32(3.14))
schema.add_field(ct.default_double_field_name, DataType.DOUBLE, default_value=numpy.double(3.1415))
schema.add_field(ct.default_bool_field_name, DataType.BOOL, default_value=False)
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=100, default_value="abc")
# Create collection with default value fields
self.create_collection(client, collection_name, schema=schema)
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"auto_id": auto_id,
"enable_dynamic_field": False,
"schema": schema})
self.drop_collection(client, collection_name)
class TestMilvusClientCollectionCountIP(TestMilvusClientV2Base):
"""
@ -3415,18 +3663,8 @@ class TestMilvusClientCollectionCountIP(TestMilvusClientV2Base):
params means different nb, the nb value may trigger merge, or not
"""
@pytest.fixture(
scope="function",
params=[
1,
1000,
2001
],
)
def insert_count(self, request):
yield request.param
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("insert_count", [1, 1000, 2001])
def test_milvus_client_collection_count_after_index_created(self, insert_count):
"""
target: test count_entities, after index have been created
@ -3453,3 +3691,191 @@ class TestMilvusClientCollectionCountIP(TestMilvusClientV2Base):
assert stats['row_count'] == insert_count
self.drop_collection(client, collection_name)
class TestMilvusClientCollectionCountBinary(TestMilvusClientV2Base):
"""
Test collection count functionality with binary vectors
Params means different nb, the nb value may trigger merge, or not
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("insert_count", [8, 1000, 2001])
def test_milvus_client_collection_count_after_index_created_binary(self, insert_count):
"""
target: Test collection count after binary index is created
method: Create binary collection, insert data, create index, then verify count
expected: Collection count equals entities count just inserted
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# Create binary collection schema
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(ct.default_int64_field_name, DataType.INT64, is_primary=True)
schema.add_field(ct.default_binary_vec_field_name, DataType.BINARY_VECTOR, dim=default_dim)
# Create collection
self.create_collection(client, collection_name, schema=schema)
# Generate and insert binary data
data = cf.gen_row_data_by_schema(nb=insert_count, schema=schema)
self.insert(client, collection_name, data)
self.flush(client, collection_name)
# Create index
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=ct.default_binary_vec_field_name, index_type="BIN_IVF_FLAT", metric_type="JACCARD")
self.create_index(client, collection_name, index_params)
# Verify entity count
stats = self.get_collection_stats(client, collection_name)[0]
assert stats['row_count'] == insert_count
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("auto_id", [True, False])
def test_milvus_client_binary_collection_with_min_dim(self, auto_id):
"""
target: Test binary collection when dim=1 (invalid for binary vectors)
method: Create collection with binary vector field having dim=1
expected: Raise exception with appropriate error message
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# Create schema with invalid binary vector dimension
schema = self.create_schema(client, enable_dynamic_field=False, auto_id=auto_id)[0]
schema.add_field(ct.default_int64_field_name, DataType.INT64, is_primary=True)
# Try to add binary vector field with invalid dimension
error = {ct.err_code: 1,
ct.err_msg: f"invalid dimension: {ct.min_dim} of field {ct.default_binary_vec_field_name}. "
f"binary vector dimension should be multiple of 8."}
schema.add_field(ct.default_binary_vec_field_name, DataType.BINARY_VECTOR, dim=ct.min_dim)
# Try to create collection
self.create_collection(client, collection_name, schema=schema,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_collection_count_no_entities(self):
"""
target: Test collection count when collection is empty
method: Create binary collection with binary vector field but insert no data
expected: The count should be equal to 0
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# Create binary collection schema
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(ct.default_int64_field_name, DataType.INT64, is_primary=True)
schema.add_field(ct.default_binary_vec_field_name, DataType.BINARY_VECTOR, dim=default_dim)
# Create collection without inserting any data
self.create_collection(client, collection_name, schema=schema)
# Verify entity count is 0
stats = self.get_collection_stats(client, collection_name)[0]
assert stats['row_count'] == 0
self.drop_collection(client, collection_name)
class TestMilvusClientCollectionMultiCollections(TestMilvusClientV2Base):
"""
Test collection count functionality with multiple collections
Params means different nb, the nb value may trigger merge, or not
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("insert_count", [1, 1000, 2001])
def test_milvus_client_collection_count_multi_collections_l2(self, insert_count):
"""
target: Test collection rows_count with multiple float vector collections (L2 metric)
method: Create multiple collections, insert entities, and verify count for each
expected: The count equals the length of entities for each collection
"""
client = self._client()
collection_list = []
collection_num = 10
# Create multiple collections and insert data
for i in range(collection_num):
collection_name = cf.gen_collection_name_by_testcase_name() + f"_{i}"
self.create_collection(client, collection_name, default_dim)
schema_info = self.describe_collection(client, collection_name)[0]
data = cf.gen_row_data_by_schema(nb=insert_count, schema=schema_info)
self.insert(client, collection_name, data)
self.flush(client, collection_name)
collection_list.append(collection_name)
# Verify count for each collection
for collection_name in collection_list:
stats = self.get_collection_stats(client, collection_name)[0]
assert stats['row_count'] == insert_count
# Cleanup
for collection_name in collection_list:
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("insert_count", [1, 1000, 2001])
def test_milvus_client_collection_count_multi_collections_binary(self, insert_count):
"""
target: Test collection rows_count with multiple binary vector collections (JACCARD metric)
method: Create multiple binary collections, insert entities, and verify count for each
expected: The count equals the length of entities for each collection
"""
client = self._client()
collection_list = []
collection_num = 20
# Create multiple binary collections and insert data
for i in range(collection_num):
collection_name = cf.gen_collection_name_by_testcase_name() + f"_{i}"
# Create binary collection schema
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(ct.default_int64_field_name, DataType.INT64, is_primary=True)
schema.add_field(ct.default_binary_vec_field_name, DataType.BINARY_VECTOR, dim=default_dim)
# Create collection
self.create_collection(client, collection_name, schema=schema)
# Generate and insert binary data
data = cf.gen_row_data_by_schema(nb=insert_count, schema=schema)
self.insert(client, collection_name, data)
self.flush(client, collection_name)
collection_list.append(collection_name)
# Verify count for each collection
for collection_name in collection_list:
stats = self.get_collection_stats(client, collection_name)[0]
assert stats['row_count'] == insert_count
# Cleanup
for collection_name in collection_list:
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_collection_count_multi_collections_mix(self):
"""
target: Test collection rows_count with mixed float and binary vector collections
method: Create both float and binary collections, insert entities, and verify count for each
expected: The count equals the length of entities for each collection
"""
client = self._client()
collection_list = []
collection_num = 20
insert_count = ct.default_nb
# Create half float vector collections and half binary vector collections
for i in range(0, int(collection_num / 2)):
# Create float vector collection
collection_name = cf.gen_collection_name_by_testcase_name() + f"_float_{i}"
self.create_collection(client, collection_name, default_dim)
schema_info = self.describe_collection(client, collection_name)[0]
data = cf.gen_row_data_by_schema(nb=insert_count, schema=schema_info)
self.insert(client, collection_name, data)
self.flush(client, collection_name)
collection_list.append(collection_name)
for i in range(int(collection_num / 2), collection_num):
# Create binary vector collection
collection_name = cf.gen_collection_name_by_testcase_name() + f"_binary_{i}"
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(ct.default_int64_field_name, DataType.INT64, is_primary=True)
schema.add_field(ct.default_binary_vec_field_name, DataType.BINARY_VECTOR, dim=default_dim)
self.create_collection(client, collection_name, schema=schema)
# Generate and insert binary data
data = cf.gen_row_data_by_schema(nb=insert_count, schema=schema)
self.insert(client, collection_name, data)
self.flush(client, collection_name)
collection_list.append(collection_name)
# Verify count for each collection
for collection_name in collection_list:
stats = self.get_collection_stats(client, collection_name)[0]
assert stats['row_count'] == insert_count
# Cleanup
for collection_name in collection_list:
self.drop_collection(client, collection_name)

View File

@ -426,377 +426,6 @@ class TestCollectionDataframe(TestcaseBase):
assert collection_w.num_entities == self.collection_wrap.num_entities
class TestCollectionCountBinary(TestcaseBase):
"""
params means different nb, the nb value may trigger merge, or not
"""
@pytest.fixture(
scope="function",
params=[
8,
1000,
2001
],
)
def insert_count(self, request):
yield request.param
# TODO: need to update and enable
@pytest.mark.tags(CaseLabel.L1)
def test_collection_count_after_index_created_binary(self, insert_count):
"""
target: test num_entities, after index have been created
method: add vectors in db, and create binary index, then calling num_entities with correct params
expected: num_entities equals entities count just inserted
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
df, _ = cf.gen_default_binary_dataframe_data(insert_count)
mutation_res, _ = collection_w.insert(data=df)
collection_w.create_index(ct.default_binary_vec_field_name, default_binary_index_params)
assert collection_w.num_entities == insert_count
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("auto_id", [True, False])
def test_binary_collection_with_min_dim(self, auto_id):
"""
target: test binary collection when dim=1
method: creat collection and set dim=1
expected: check error message successfully
"""
self._connect()
dim = ct.min_dim
c_schema = cf.gen_default_binary_collection_schema(auto_id=auto_id, dim=dim)
collection_w = self.init_collection_wrap(schema=c_schema,
check_task=CheckTasks.err_res,
check_items={"err_code": 1,
"err_msg": f"invalid dimension: {dim} of field {ct.default_binary_vec_field_name}. binary vector dimension should be multiple of 8."})
@pytest.mark.tags(CaseLabel.L2)
def test_collection_count_no_entities(self):
"""
target: test collection num_entities is correct or not, if collection is empty
method: create collection and no vectors in it,
assert the value returned by num_entities method is equal to 0
expected: the count is equal to 0
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
assert collection_w.num_entities == 0
class TestCollectionMultiCollections(TestcaseBase):
"""
params means different nb, the nb value may trigger merge, or not
"""
@pytest.fixture(
scope="function",
params=[
1,
1000,
2001
],
)
def insert_count(self, request):
yield request.param
@pytest.mark.tags(CaseLabel.L0)
def test_collection_count_multi_collections_l2(self, insert_count):
"""
target: test collection rows_count is correct or not with multiple collections of L2
method: create collection and add entities in it,
assert the value returned by num_entities is equal to length of entities
expected: the count is equal to the length of entities
"""
self._connect()
data = cf.gen_default_list_data(insert_count)
collection_list = []
collection_num = 10
for i in range(collection_num):
collection_name = gen_unique_str(uid_count)
collection_w = self.init_collection_wrap(name=collection_name)
collection_w.insert(data)
collection_list.append(collection_name)
for i in range(collection_num):
res, _ = self.collection_wrap.init_collection(collection_list[i])
assert self.collection_wrap.num_entities == insert_count
@pytest.mark.tags(CaseLabel.L2)
def test_collection_count_multi_collections_binary(self, insert_count):
"""
target: test collection rows_count is correct or not with multiple collections of JACCARD
method: create collection and add entities in it,
assert the value returned by count_entities method is equal to length of entities
expected: the count is equal to the length of entities
"""
self._connect()
df, _ = cf.gen_default_binary_dataframe_data(insert_count)
collection_list = []
collection_num = 20
for i in range(collection_num):
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
mutation_res, _ = collection_w.insert(data=df)
collection_list.append(c_name)
for i in range(collection_num):
res, _ = self.collection_wrap.init_collection(collection_list[i])
assert self.collection_wrap.num_entities == insert_count
@pytest.mark.tags(CaseLabel.L2)
def test_collection_count_multi_collections_mix(self):
"""
target: test collection rows_count is correct or not with multiple collections of
method: create collection and add entities in it,
assert the value returned by count_entities method is equal to length of entities
expected: the count is equal to the length of entities
"""
self._connect()
collection_list = []
collection_num = 20
data = cf.gen_default_list_data()
df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
for i in range(0, int(collection_num / 2)):
collection_name = gen_unique_str(uid_count)
collection_w = self.init_collection_wrap(name=collection_name)
collection_w.insert(data)
collection_list.append(collection_name)
for i in range(int(collection_num / 2), collection_num):
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
mutation_res, _ = collection_w.insert(data=df)
collection_list.append(c_name)
for i in range(collection_num):
res, _ = self.collection_wrap.init_collection(collection_list[i])
assert self.collection_wrap.num_entities == ct.default_nb
class TestCreateCollection(TestcaseBase):
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.mark.tags(CaseLabel.L1)
def test_create_collection_multithread(self):
"""
target: test create collection with multi-thread
method: create collection using multi-thread,
expected: collections are created
"""
self._connect()
threads_num = 8
threads = []
collection_names = []
def create():
collection_name = gen_unique_str(uid_create)
collection_names.append(collection_name)
self.init_collection_wrap(name=collection_name)
for i in range(threads_num):
t = MyThread(target=create, args=())
threads.append(t)
t.start()
time.sleep(0.2)
for t in threads:
t.join()
for item in collection_names:
assert item in self.utility_wrap.list_collections()[0]
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip("not support default_value now")
def test_create_collection_using_default_value(self, auto_id):
"""
target: test create collection with default_value
method: create a schema with all fields using default value
expected: collections are created
"""
fields = [
cf.gen_int64_field(name='pk', is_primary=True),
cf.gen_float_vec_field(),
cf.gen_int8_field(default_value=numpy.int8(8)),
cf.gen_int16_field(default_value=numpy.int16(16)),
cf.gen_int32_field(default_value=numpy.int32(32)),
cf.gen_int64_field(default_value=numpy.int64(64)),
cf.gen_float_field(default_value=numpy.float32(3.14)),
cf.gen_double_field(default_value=numpy.double(3.1415)),
cf.gen_bool_field(default_value=False),
cf.gen_string_field(default_value="abc")
]
schema = cf.gen_collection_schema(fields, auto_id=auto_id)
self.init_collection_wrap(schema=schema,
check_task=CheckTasks.check_collection_property,
check_items={"schema": schema})
class TestCreateCollectionInvalid(TestcaseBase):
"""
Test creating collections with invalid params
"""
@pytest.mark.tags(CaseLabel.L2)
def test_create_collection_limit_fields(self):
"""
target: test create collection with maximum fields
method: create collection with maximum field number
expected: raise exception
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
limit_num = ct.max_field_num
field_schema_list = []
field_pr = cf.gen_int64_field(ct.default_int64_field_name, is_primary=True)
field_v = cf.gen_float_vec_field(ct.default_float_vec_field_name)
field_schema_list.append(field_pr)
field_schema_list.append(field_v)
for i in range(limit_num):
field_name_tmp = gen_unique_str("field_name")
field_schema_temp = cf.gen_int64_field(field_name_tmp)
field_schema_list.append(field_schema_temp)
error = {ct.err_code: 65535, ct.err_msg: "maximum field's number should be limited to 64"}
schema, _ = self.collection_schema_wrap.init_collection_schema(fields=field_schema_list)
self.init_collection_wrap(name=c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("invalid_name", ["中文", "español", "عربي", "हिंदी", "Русский"])
def test_create_schema_with_different_language(self, invalid_name):
"""
target: test create collection with maximum fields
method: create collection with maximum field number
expected: raise exception
"""
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_vec_field(),
cf.gen_string_field(name=invalid_name)]
schema = cf.gen_collection_schema(fields)
self.init_collection_wrap(schema=schema,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1701,
ct.err_msg: "Invalid field name: %s" % invalid_name})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("invalid_name", ["中文", "español", "عربي", "हिंदी", "Русский"])
def test_create_collection_with_different_language(self, invalid_name):
"""
target: test create collection with maximum fields
method: create collection with maximum field number
expected: raise exception
"""
schema = cf.gen_default_collection_schema()
self.init_collection_wrap(name=invalid_name, schema=schema,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1100,
ct.err_msg: "Invalid collection name: %s" % invalid_name})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("default_value", ["abc"])
@pytest.mark.skip(reason="issue #24634")
def test_create_collection_with_invalid_default_value_string(self, default_value):
"""
target: test create collection with maximum fields
method: create collection with maximum field number
expected: raise exception
"""
fields = [
cf.gen_int64_field(name='pk', is_primary=True),
cf.gen_float_vec_field(),
cf.gen_string_field(max_length=2, default_value=default_value)
]
schema = cf.gen_collection_schema(fields)
self.init_collection_wrap(schema=schema,
check_task=CheckTasks.check_collection_property,
check_items={"schema": schema})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("not support default_value now")
@pytest.mark.parametrize("default_value", ["abc", 9.09, 1, False])
def test_create_collection_with_invalid_default_value_float(self, default_value):
"""
target: test create collection with maximum fields
method: create collection with maximum field number
expected: raise exception
"""
fields = [
cf.gen_int64_field(name='pk', is_primary=True),
cf.gen_float_vec_field(),
cf.gen_float_field(default_value=default_value)
]
schema = cf.gen_collection_schema(fields)
self.init_collection_wrap(schema=schema, check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: "default value type mismatches field schema type"})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("not support default_value now")
@pytest.mark.parametrize("default_value", ["abc", 9.09, 1, False])
def test_create_collection_with_invalid_default_value_int8(self, default_value):
"""
target: test create collection with maximum fields
method: create collection with maximum field number
expected: raise exception
"""
fields = [
cf.gen_int64_field(name='pk', is_primary=True),
cf.gen_float_vec_field(),
cf.gen_int8_field(default_value=default_value)
]
schema = cf.gen_collection_schema(fields)
self.init_collection_wrap(schema=schema, check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: "default value type mismatches field schema type"})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("not support default_value now")
def test_create_collection_with_pk_field_using_default_value(self):
"""
target: test create collection with pk field using default value
method: create a pk field and set default value
expected: report error
"""
# 1. pk int64
fields = [
cf.gen_int64_field(name='pk', is_primary=True, default_value=np.int64(1)),
cf.gen_float_vec_field(), cf.gen_string_field(max_length=2)
]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)
collection_w.insert([[], [vectors[0]], ["a"]],
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: "pk field schema can not set default value"})
# 2. pk string
fields = [
cf.gen_string_field(name='pk', is_primary=True, default_value="a"),
cf.gen_float_vec_field(), cf.gen_string_field(max_length=2)
]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)
collection_w.insert([[], [vectors[0]], ["a"]],
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: "pk field schema can not set default value"})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("not support default_value now")
def test_create_collection_with_json_field_using_default_value(self):
"""
target: test create collection with json field using default value
method: create a json field and set default value
expected: report error
"""
json_default_value = {"number": 1, "float": 2.0, "string": "abc", "bool": True,
"list": [i for i in range(5)]}
cf.gen_json_field(default_value=json_default_value,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: "Default value unsupported data type: 999"})
class TestLoadCollection(TestcaseBase):
"""
******************************************************************
@ -804,22 +433,6 @@ class TestLoadCollection(TestcaseBase):
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
def test_load_collection_after_index_binary(self):
"""
target: test load binary_collection, after index created
method: insert and create index, load binary_collection with correct params
expected: no error raised
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
mutation_res, _ = collection_w.insert(data=df)
collection_w.create_index(ct.default_binary_vec_field_name, default_binary_index_params)
collection_w.load()
collection_w.release()
@pytest.mark.tags(CaseLabel.ClusterOnly)
def test_load_replica_change(self):
"""
@ -1110,91 +723,7 @@ class TestLoadCollection(TestcaseBase):
check_items={'exp_res': [{"count(*)": ct.default_nb}]})
class TestDescribeCollection(TestcaseBase):
"""
******************************************************************
The following cases are used to test `collection.describe` function
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L2)
def test_collection_describe(self):
"""
target: test describe collection
method: create a collection and check its information when describe
expected: return correct information
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index)
description = \
{'collection_name': c_name, 'auto_id': False, 'num_shards': ct.default_shards_num, 'description': '',
'fields': [
{'field_id': 100, 'name': 'int64', 'description': '', 'type': DataType.INT64, 'params': {},
'is_primary': True},
{'field_id': 101, 'name': 'float', 'description': '', 'type': DataType.FLOAT, 'params': {}},
{'field_id': 102, 'name': 'varchar', 'description': '', 'type': DataType.VARCHAR,
'params': {'max_length': 65535}},
{'field_id': 103, 'name': 'json_field', 'description': '', 'type': DataType.JSON, 'params': {}},
{'field_id': 104, 'name': 'float_vector', 'description': '', 'type': DataType.FLOAT_VECTOR,
'params': {'dim': 128}}
],
'functions': [], 'aliases': [], 'consistency_level': 0, 'properties': {},
'num_partitions': 1, 'enable_dynamic_field': False}
res = collection_w.describe()[0]
assert isinstance(res['collection_id'], int) and isinstance(res['created_timestamp'], int)
del res['collection_id']
del res['created_timestamp']
del res['update_timestamp']
log.info(res)
assert description == res
@pytest.mark.tags(CaseLabel.L1)
def test_collection_describe_nullable_default_value(self):
"""
target: test describe collection with nullable and default_value fields
method: create a collection with nullable and default_value fields, then check its information when describe
expected: return correct information
"""
collection_w = self.init_collection_general(prefix, False,
nullable_fields={ct.default_float_field_name: 0},
default_value_fields={ct.default_string_field_name: "1"})[0]
res = collection_w.describe()[0]
for field in res["fields"]:
if field["name"] == ct.default_float_field_name:
assert field["nullable"] is True
if field["name"] == ct.default_string_field_name:
if "default_value" not in field.keys():
log.error("there is no default_value key in the result of describe collection, please file a bug")
assert False
else:
assert field["default_value"].string_data == "1"
class TestReleaseAdvanced(TestcaseBase):
@pytest.mark.tags(CaseLabel.L0)
def test_release_collection_during_searching(self):
"""
target: test release collection during searching
method: insert entities into collection, flush and load collection, release collection during searching
expected: raise exception
"""
self._connect()
data = cf.gen_default_list_data()
c_name = cf.gen_unique_str()
collection_wr = self.init_collection_wrap(name=c_name)
collection_wr.insert(data=data)
assert collection_wr.num_entities == ct.default_nb
collection_wr.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index)
collection_wr.load()
search_res, _ = collection_wr.search(vectors, default_search_field, default_search_params,
default_limit, _async=True)
collection_wr.release()
error = {ct.err_code: 65535, ct.err_msg: "collection not loaded"}
collection_wr.search(vectors, default_search_field, default_search_params, default_limit,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
def test_release_partition_during_searching(self):