test:[cp 2.6] add test for allow insert auto id (#44837)

related issue: https://github.com/milvus-io/milvus/issues/44425
pr: #44801

split insert.py into a few files: upsert.py, insert.py,
partial_upsert.py ...
add test for allow insert auto id

---------

Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
This commit is contained in:
yanliang567 2025-10-15 11:38:00 +08:00 committed by GitHub
parent 88b731d1f0
commit bb4446e5af
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 3791 additions and 2367 deletions

View File

@ -21,7 +21,7 @@ proxy:
dataNode: dataNode:
resources: resources:
limits: limits:
cpu: "2" cpu: "4"
memory: 8Gi memory: 8Gi
requests: requests:
cpu: "0.5" cpu: "0.5"

View File

@ -261,17 +261,18 @@ class TestMilvusClientV2Base(Base):
**kwargs).run() **kwargs).run()
return res, check_result return res, check_result
@trace() # No client.num_entities method
def num_entities(self, client, collection_name, timeout=None, check_task=None, check_items=None, **kwargs): # @trace()
timeout = TIMEOUT if timeout is None else timeout # def num_entities(self, client, collection_name, timeout=None, check_task=None, check_items=None, **kwargs):
kwargs.update({"timeout": timeout}) # timeout = TIMEOUT if timeout is None else timeout
# kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name # func_name = sys._getframe().f_code.co_name
res, check = api_request([client.num_entities, collection_name], **kwargs) # res, check = api_request([client.num_entities, collection_name], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check, # check_result = ResponseChecker(res, func_name, check_task, check_items, check,
collection_name=collection_name, # collection_name=collection_name,
**kwargs).run() # **kwargs).run()
return res, check_result # return res, check_result
@trace() @trace()
def delete(self, client, collection_name, ids=None, timeout=None, filter=None, partition_name=None, def delete(self, client, collection_name, ids=None, timeout=None, filter=None, partition_name=None,

View File

@ -38,7 +38,7 @@ class TestMilvusClientAlterIndex(TestMilvusClientV2Base):
expected: alter successfully expected: alter successfully
""" """
client = self._client() client = self._client()
collection_name = cf.gen_unique_str(prefix) collection_name = cf.gen_collection_name_by_testcase_name()
self.create_collection(client, collection_name, ct.default_dim, consistency_level="Strong") self.create_collection(client, collection_name, ct.default_dim, consistency_level="Strong")
idx_names, _ = self.list_indexes(client, collection_name, field_name=default_vector_field_name) idx_names, _ = self.list_indexes(client, collection_name, field_name=default_vector_field_name)
self.load_collection(client, collection_name) self.load_collection(client, collection_name)
@ -69,7 +69,7 @@ class TestMilvusClientAlterIndex(TestMilvusClientV2Base):
expected: raise exception expected: raise exception
""" """
client = self._client() client = self._client()
collection_name = cf.gen_unique_str(prefix) collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection # 1. create collection
schema = self.create_schema(client, enable_dynamic_field=False)[0] schema = self.create_schema(client, enable_dynamic_field=False)[0]
dim = 32 dim = 32
@ -112,7 +112,7 @@ class TestMilvusClientAlterIndex(TestMilvusClientV2Base):
expected: raise exception expected: raise exception
""" """
client = self._client() client = self._client()
collection_name = cf.gen_unique_str(prefix) collection_name = cf.gen_collection_name_by_testcase_name()
self.create_collection(client, collection_name, ct.default_dim, consistency_level="Strong") self.create_collection(client, collection_name, ct.default_dim, consistency_level="Strong")
idx_names, _ = self.list_indexes(client, collection_name, field_name=default_vector_field_name) idx_names, _ = self.list_indexes(client, collection_name, field_name=default_vector_field_name)
self.release_collection(client, collection_name) self.release_collection(client, collection_name)
@ -141,7 +141,7 @@ class TestMilvusClientAlterCollection(TestMilvusClientV2Base):
expected: alter successfully expected: alter successfully
""" """
client = self._client() client = self._client()
collection_name = cf.gen_unique_str(prefix) collection_name = cf.gen_collection_name_by_testcase_name()
self.create_collection(client, collection_name, ct.default_dim, consistency_level="Strong") self.create_collection(client, collection_name, ct.default_dim, consistency_level="Strong")
self.load_collection(client, collection_name) self.load_collection(client, collection_name)
res1 = self.describe_collection(client, collection_name)[0] res1 = self.describe_collection(client, collection_name)[0]
@ -295,6 +295,114 @@ class TestMilvusClientAlterCollection(TestMilvusClientV2Base):
res = self.describe_collection(client, collection_name)[0] res = self.describe_collection(client, collection_name)[0]
assert res.get('enable_dynamic_field', None) is new_dynamic_flag assert res.get('enable_dynamic_field', None) is new_dynamic_flag
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("pk_field_type", [DataType.INT64, DataType.VARCHAR])
def test_milvus_client_alter_allow_insert_auto_id(self, pk_field_type):
"""
target: test alter collection allow insert auto id
method:
1. create collection with auto_id=True
2. try to insert data with primary key
3. verify insert failed
4. alter collection allow_insert_auto_id=True
5. insert data with customized primary key
6. verify insert successfully
7. verify the new inserted data's primary keys are customized
8. verify the collection info
9. drop the collection properties allow_insert_auto_id
10. alter collection allow_insert_auto_id=False
11. verify the collection info
12. alter collection allow_insert_auto_id=True with string value
13. verify the collection info
14. insert data with customized primary key
15. verify insert successfully
expected: insert successfully
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
dim = 8
# 1. create collection
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, pk_field_type, max_length=64, is_primary=True, auto_id=True)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
# 2. try to insert data with primary key
rows_with_pk = [{
default_primary_key_field_name: i,
default_vector_field_name: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT_VECTOR)[0]
} for i in range(100)]
if pk_field_type == DataType.VARCHAR:
rows_with_pk = [{
default_primary_key_field_name: f"id_{i}",
default_vector_field_name: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT_VECTOR)[0]
} for i in range(100)]
error = {ct.err_code: 999, ct.err_msg: f"more fieldData has pass in"}
self.insert(client, collection_name, rows_with_pk, check_task=CheckTasks.err_res, check_items=error)
rows_without_pk = cf.gen_row_data_by_schema(nb=100, schema=schema)
self.insert(client, collection_name, rows_without_pk)
self.flush(client, collection_name)
num_entities = self.get_collection_stats(client, collection_name)[0]
assert num_entities.get("row_count", None) == 100
self.load_collection(client, collection_name)
filter = f"{default_primary_key_field_name} in [10, 20,90]"
if pk_field_type == DataType.VARCHAR:
filter = f"{default_primary_key_field_name} in ['id_10', 'id_20', 'id_90']"
res = self.query(client, collection_name, filter=filter,
output_fields=[default_primary_key_field_name])[0]
assert (len(res)) == 0
# 3. alter collection allow_insert_auto_id=True
self.alter_collection_properties(client, collection_name, properties={"allow_insert_auto_id": True})
# 4. insert data with customized primary key
self.insert(client, collection_name, rows_with_pk)
# 5. verify insert successfully
self.flush(client, collection_name)
num_entities = self.get_collection_stats(client, collection_name)[0]
assert num_entities.get("row_count", None) == 100 * 2
# 6. verify the new inserted data's primary keys are customized
res = self.query(client, collection_name, filter=filter,
output_fields=[default_primary_key_field_name])[0]
assert (len(res)) == 3
# check the collection info
res = self.describe_collection(client, collection_name)[0]
assert res.get('properties').get('allow_insert_auto_id', None) == 'True'
# drop the collection properties allow_insert_auto_id
self.drop_collection_properties(client, collection_name, property_keys=["allow_insert_auto_id"])
res = self.describe_collection(client, collection_name)[0]
assert res.get('properties').get('allow_insert_auto_id', None) is None
self.insert(client, collection_name, rows_with_pk, check_task=CheckTasks.err_res, check_items=error)
# alter collection allow_insert_auto_id=False
self.alter_collection_properties(client, collection_name, properties={"allow_insert_auto_id": False})
res = self.describe_collection(client, collection_name)[0]
assert res.get('properties').get('allow_insert_auto_id', None) == 'False'
self.insert(client, collection_name, rows_with_pk, check_task=CheckTasks.err_res, check_items=error)
# alter collection allow_insert_auto_id=True with string value
self.alter_collection_properties(client, collection_name, properties={"allow_insert_auto_id": "True"})
res = self.describe_collection(client, collection_name)[0]
assert res.get('properties').get('allow_insert_auto_id', None) == 'True'
rows_with_pk = [{
default_primary_key_field_name: i,
default_vector_field_name: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT_VECTOR)[0]
} for i in range(100, 200)]
if pk_field_type == DataType.VARCHAR:
rows_with_pk = [{
default_primary_key_field_name: f"id_{i}",
default_vector_field_name: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT_VECTOR)[0]
} for i in range(100, 200)]
self.insert(client, collection_name, rows_with_pk)
self.flush(client, collection_name)
num_entities = self.get_collection_stats(client, collection_name)[0]
assert num_entities.get("row_count", None) == 100 * 3
class TestMilvusClientAlterCollectionField(TestMilvusClientV2Base): class TestMilvusClientAlterCollectionField(TestMilvusClientV2Base):
@pytest.mark.tags(CaseLabel.L0) @pytest.mark.tags(CaseLabel.L0)
@ -306,7 +414,7 @@ class TestMilvusClientAlterCollectionField(TestMilvusClientV2Base):
expected: alter successfully expected: alter successfully
""" """
client = self._client() client = self._client()
collection_name = cf.gen_unique_str(prefix) collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection # 1. create collection
schema = self.create_schema(client, enable_dynamic_field=False)[0] schema = self.create_schema(client, enable_dynamic_field=False)[0]
dim = 32 dim = 32
@ -452,7 +560,7 @@ class TestMilvusClientAlterDatabase(TestMilvusClientV2Base):
expected: alter successfully expected: alter successfully
""" """
client = self._client() client = self._client()
collection_name = cf.gen_unique_str(prefix) collection_name = cf.gen_collection_name_by_testcase_name()
self.create_collection(client, collection_name, ct.default_dim, consistency_level="Strong") self.create_collection(client, collection_name, ct.default_dim, consistency_level="Strong")
self.release_collection(client, collection_name) self.release_collection(client, collection_name)
default_db = 'default' default_db = 'default'

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,556 @@
import pytest
import numpy as np
from base.client_v2_base import TestMilvusClientV2Base
from utils.util_log import test_log as log
from common import common_func as cf
from common import common_type as ct
from common.common_type import CaseLabel, CheckTasks
from utils.util_pymilvus import *
prefix = "client_insert"
epsilon = ct.epsilon
default_nb = ct.default_nb
default_nb_medium = ct.default_nb_medium
default_nq = ct.default_nq
default_dim = ct.default_dim
default_limit = ct.default_limit
default_search_exp = "id >= 0"
exp_res = "exp_res"
default_search_string_exp = "varchar >= \"0\""
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
default_invaild_string_exp = "varchar >= 0"
default_json_search_exp = "json_field[\"number\"] >= 0"
perfix_expr = 'varchar like "0%"'
default_search_field = ct.default_float_vec_field_name
default_search_params = ct.default_search_params
default_primary_key_field_name = "id"
default_vector_field_name = "vector"
default_dynamic_field_name = "field_new"
default_float_field_name = ct.default_float_field_name
default_bool_field_name = ct.default_bool_field_name
default_string_field_name = ct.default_string_field_name
default_int32_array_field_name = ct.default_int32_array_field_name
default_string_array_field_name = ct.default_string_array_field_name
default_int32_field_name = ct.default_int32_field_name
default_int32_value = ct.default_int32_value
class TestMilvusClientInsertJsonPathIndexValid(TestMilvusClientV2Base):
""" Test case of insert interface """
@pytest.fixture(scope="function", params=["INVERTED"])
def supported_varchar_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["BOOL", "Double", "Varchar", "json"])
def supported_json_cast_type(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_insert_before_json_path_index(self, enable_dynamic_field, supported_json_cast_type,
supported_varchar_scalar_index):
"""
target: test insert and then create json path index
method: create json path index after insert
steps: 1. create schema
2. create collection
3. insert
4. prepare json path index params with parameter "json_cast_type" and "json_path"
5. create index
expected: insert and create json path index successfully
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert with different data distribution
vectors = cf.gen_vectors(default_nb+50, default_dim)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in
range(default_nb)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: i} for i in
range(default_nb, default_nb+10)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {}} for i in
range(default_nb+10, default_nb+20)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [1, 2, 3]}} for i in
range(default_nb + 20, default_nb + 30)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [{'b': 1}, 2, 3]}} for i in
range(default_nb + 20, default_nb + 30)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [{'b': None}, 2, 3]}} for i in
range(default_nb + 30, default_nb + 40)]
self.insert(client, collection_name, rows)
# 2. prepare index params
index_name = "json_index"
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '1',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '2',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '3',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '4',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]"})
# 3. create index
self.create_index(client, collection_name, index_params)
self.describe_index(client, collection_name, index_name,
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name})
self.describe_index(client, collection_name, index_name + '1',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '1'})
self.describe_index(client, collection_name, index_name +'2',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '2'})
self.describe_index(client, collection_name, index_name + '3',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '3'})
self.describe_index(client, collection_name, index_name + '4',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '4'})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_insert_after_json_path_index(self, enable_dynamic_field, supported_json_cast_type,
supported_varchar_scalar_index):
"""
target: test insert after create json path index
method: create json path index after insert
steps: 1. create schema
2. create all the index parameters including json path index
3. create collection with schema and index params
4. insert
5. check the index
expected: insert successfully after create json path index
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection with schema and all the index parameters
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_name = "json_index"
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '1',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '2',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '3',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '4',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]"})
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert with different data distribution
vectors = cf.gen_vectors(default_nb+50, default_dim)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in
range(default_nb)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: i} for i in
range(default_nb, default_nb+10)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {}} for i in
range(default_nb+10, default_nb+20)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [1, 2, 3]}} for i in
range(default_nb + 20, default_nb + 30)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [{'b': 1}, 2, 3]}} for i in
range(default_nb + 20, default_nb + 30)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [{'b': None}, 2, 3]}} for i in
range(default_nb + 30, default_nb + 40)]
self.insert(client, collection_name, rows)
# 3. check the json path index
self.describe_index(client, collection_name, index_name,
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name})
self.describe_index(client, collection_name, index_name + '1',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '1'})
self.describe_index(client, collection_name, index_name +'2',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '2'})
self.describe_index(client, collection_name, index_name + '3',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '3'})
self.describe_index(client, collection_name, index_name + '4',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '4'})
""" Test case of partial update interface """
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=["COSINE", "L2"])
def metric_type(self, request):
yield request.param
"""
******************************************************************
# The following are invalid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_partial_update_new_pk_with_missing_field(self):
"""
target: Test PU will return error when provided new pk and partial field
method:
1. Create a collection
2. partial upsert a new pk with only partial field
expected: Step 2 should result fail
"""
# step 1: create collection
client = self._client()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_int32_field_name, DataType.INT32, nullable=True)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
index_params.add_index(default_int32_field_name, index_type="AUTOINDEX")
collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
self.create_collection(client, collection_name, default_dim, schema=schema,
consistency_level="Strong", index_params=index_params)
# step 2: partial upsert a new pk with only partial field
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema,
desired_field_names=[default_primary_key_field_name, default_int32_field_name])
error = {ct.err_code: 1100, ct.err_msg:
f"fieldSchema({default_vector_field_name}) has no corresponding fieldData pass in: invalid parameter"}
self.upsert(client, collection_name, rows, partial_update=True,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_partial_update_new_field_without_dynamic_field(self):
"""
target: Test PU will return error when provided new field without dynamic field
method:
1. Create a collection with dynamic field
2. partial upsert a new field
expected: Step 2 should result fail
"""
# step 1: create collection with dynamic field
client = self._client()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
self.create_collection(client, collection_name, default_dim, schema=schema,
consistency_level="Strong", index_params=index_params)
# step 2: partial upsert a new field
row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema)
self.upsert(client, collection_name, row, partial_update=True)
new_row = [{default_primary_key_field_name: i, default_int32_field_name: 99} for i in range(default_nb)]
error = {ct.err_code: 1,
ct.err_msg: f"Attempt to insert an unexpected field `{default_int32_field_name}` to collection without enabling dynamic field"}
self.upsert(client, collection_name, new_row, partial_update=True, check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_partial_update_after_release_collection(self):
"""
target: test basic function of partial update
method:
1. create collection
2. insert a full row of data using partial update
3. partial update data
4. release collection
5. partial update data
expected: step 5 should fail
"""
# Step 1: create collection
client = self._client()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
index_params.add_index(default_string_field_name, index_type="AUTOINDEX")
collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
self.create_collection(client, collection_name, default_dim, schema=schema,
consistency_level="Strong", index_params=index_params)
# Step 2: insert a full row of data using partial update
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema)
self.upsert(client, collection_name, rows, partial_update=True)
# Step 3: partial update data
new_row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema,
desired_field_names=[default_primary_key_field_name, default_string_field_name])
self.upsert(client, collection_name, new_row, partial_update=True)
# Step 4: release collection
self.release_collection(client, collection_name)
# Step 5: partial update data
new_row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema,
desired_field_names=[default_primary_key_field_name, default_string_field_name])
error = {ct.err_code: 101,
ct.err_msg: f"failed to query: collection not loaded"}
self.upsert(client, collection_name, new_row, partial_update=True,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_partial_update_same_pk_after_delete(self):
"""
target: test PU will fail when provided same pk and partial field
method:
1. Create a collection with dynamic field
2. Insert rows
3. delete the rows
4. upsert the rows with same pk and partial field
expected: step 4 should fail
"""
# Step 1: create collection
client = self._client()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_int32_field_name, DataType.INT32)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
index_params.add_index(default_int32_field_name, index_type="AUTOINDEX")
collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
self.create_collection(client, collection_name, default_dim, schema=schema,
consistency_level="Strong", index_params=index_params)
# Step 2: insert rows
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema)
self.upsert(client, collection_name, rows, partial_update=True)
# Step 3: delete the rows
result = self.delete(client, collection_name, filter=default_search_exp)[0]
assert result["delete_count"] == default_nb
result = self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_nothing)[0]
assert len(result) == 0
# Step 4: upsert the rows with same pk and partial field
new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema,
desired_field_names=[default_primary_key_field_name, default_vector_field_name])
error = {ct.err_code: 1100,
ct.err_msg: f"fieldSchema({default_int32_field_name}) has no corresponding fieldData pass in: invalid parameter"}
self.upsert(client, collection_name, new_rows, partial_update=True,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_partial_update_pk_in_wrong_partition(self):
"""
target: test PU will fail when provided pk in wrong partition
method:
1. Create a collection
2. Create 2 partitions
3. Insert rows
4. upsert the rows with pk in wrong partition
expected: step 4 should fail
"""
# Step 1: create collection
client = self._client()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_int32_field_name, DataType.INT32)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
index_params.add_index(default_int32_field_name, index_type="AUTOINDEX")
collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
self.create_collection(client, collection_name, default_dim, schema=schema,
consistency_level="Strong", index_params=index_params)
# Step 2: Create 2 partitions
num_of_partitions = 2
partition_names = []
for _ in range(num_of_partitions):
partition_name = cf.gen_unique_str("partition")
self.create_partition(client, collection_name, partition_name)
partition_names.append(partition_name)
# Step 3: Insert rows
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema)
gap = default_nb // num_of_partitions
for i, partition in enumerate(partition_names):
self.upsert(client, collection_name, rows[i*gap:(i+1)*gap], partition_name=partition, partial_update=True)
# Step 4: upsert the rows with pk in wrong partition
new_rows = cf.gen_row_data_by_schema(nb=gap, schema=schema,
desired_field_names=[default_primary_key_field_name, default_vector_field_name])
error = {ct.err_code: 1100,
ct.err_msg: f"fieldSchema({default_int32_field_name}) has no corresponding fieldData pass in: invalid parameter"}
self.upsert(client, collection_name, new_rows, partition_name=partition_names[-1], partial_update=True,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_partial_update_same_pk_multiple_fields(self):
"""
target: Test PU will success and query will success
method:
1. Create a collection
2. Insert rows
3. Upsert the rows with same pk and different field
expected: Step 3 should fail
"""
# step 1: create collection
client = self._client()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_int32_field_name, DataType.INT32, nullable=True)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
index_params.add_index(default_int32_field_name, index_type="AUTOINDEX")
collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
self.create_collection(client, collection_name, default_dim, schema=schema,
consistency_level="Strong", index_params=index_params)
# step 2: Insert rows
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema)
self.upsert(client, collection_name, rows, partial_update=True)
# step 3: Upsert the rows with same pk and different field
new_rows = []
for i in range(default_nb):
data = {}
if i % 2 == 0:
data[default_int32_field_name] = i + 1000
data[default_primary_key_field_name] = 0
else:
data[default_vector_field_name] = [random.random() for _ in range(default_dim)]
data[default_primary_key_field_name] = 0
new_rows.append(data)
error = {ct.err_code: 1,
ct.err_msg: f"The data fields length is inconsistent. previous length is 2000, current length is 1000"}
self.upsert(client, collection_name, new_rows, partial_update=True,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,824 @@
import pytest
import numpy as np
from base.client_v2_base import TestMilvusClientV2Base
from utils.util_log import test_log as log
from common import common_func as cf
from common import common_type as ct
from common.common_type import CaseLabel, CheckTasks
from utils.util_pymilvus import *
prefix = "client_insert"
epsilon = ct.epsilon
default_nb = ct.default_nb
default_nb_medium = ct.default_nb_medium
default_nq = ct.default_nq
default_dim = ct.default_dim
default_limit = ct.default_limit
default_search_exp = "id >= 0"
exp_res = "exp_res"
default_search_string_exp = "varchar >= \"0\""
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
default_invaild_string_exp = "varchar >= 0"
default_json_search_exp = "json_field[\"number\"] >= 0"
perfix_expr = 'varchar like "0%"'
default_search_field = ct.default_float_vec_field_name
default_search_params = ct.default_search_params
default_primary_key_field_name = "id"
default_vector_field_name = "vector"
default_dynamic_field_name = "field_new"
default_float_field_name = ct.default_float_field_name
default_bool_field_name = ct.default_bool_field_name
default_string_field_name = ct.default_string_field_name
default_int32_array_field_name = ct.default_int32_array_field_name
default_string_array_field_name = ct.default_string_array_field_name
default_int32_field_name = ct.default_int32_field_name
default_int32_value = ct.default_int32_value
class TestMilvusClientUpsertInvalid(TestMilvusClientV2Base):
""" Test case of search interface """
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=["COSINE", "L2"])
def metric_type(self, request):
yield request.param
"""
******************************************************************
# The following are invalid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_upsert_column_data(self):
"""
target: test insert column data
method: create connection, collection, insert and search
expected: raise error
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. insert
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nb)]
data = [[i for i in range(default_nb)], vectors]
error = {ct.err_code: 999,
ct.err_msg: "The Input data type is inconsistent with defined schema, please check it."}
self.upsert(client, collection_name, data,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_empty_collection_name(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._client()
collection_name = ""
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 1, ct.err_msg: f"`collection_name` value {collection_name} is illegal"}
self.upsert(client, collection_name, rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("collection_name", ["12-s", "12 s", "(mn)", "中文", "%$#"])
def test_milvus_client_upsert_invalid_collection_name(self, collection_name):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._client()
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {collection_name}. the first character of a "
f"collection name must be an underscore or letter: invalid parameter"}
self.upsert(client, collection_name, rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_collection_name_over_max_length(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._client()
collection_name = "a".join("a" for i in range(256))
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 1100, ct.err_msg: f"the length of a collection name must be less than 255 characters"}
self.upsert(client, collection_name, rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_not_exist_collection_name(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str("insert_not_exist")
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 100, ct.err_msg: f"can't find collection[database=default][collection={collection_name}]"}
self.upsert(client, collection_name, rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("data", ["12-s", "12 s", "(mn)", "中文", "%$#", " "])
def test_milvus_client_upsert_data_invalid_type(self, data):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
error = {ct.err_code: 1, ct.err_msg: f"wrong type of argument 'data',expected 'Dict' or list of 'Dict'"}
self.upsert(client, collection_name, data,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_data_empty(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
error = {ct.err_code: 1, ct.err_msg: f"wrong type of argument 'data',expected 'Dict' or list of 'Dict'"}
self.upsert(client, collection_name, data="",
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_data_vector_field_missing(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i,
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(10)]
error = {ct.err_code: 1,
ct.err_msg: "Insert missed an field `vector` to collection without set nullable==true or set default_value"}
self.upsert(client, collection_name, data=rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_data_id_field_missing(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(20)]
error = {ct.err_code: 1,
ct.err_msg: f"Insert missed an field `id` to collection without set nullable==true or set default_value"}
self.upsert(client, collection_name, data=rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_data_extra_field(self):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection
dim = 32
self.create_collection(client, collection_name, dim, enable_dynamic_field=False)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(10)]
error = {ct.err_code: 1,
ct.err_msg: f"Attempt to insert an unexpected field `float` to collection without enabling dynamic field"}
self.upsert(client, collection_name, data=rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_data_dim_not_match(self):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [
{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim + 1))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 65536, ct.err_msg: f"of float data should divide the dim({default_dim})"}
self.upsert(client, collection_name, data=rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_not_matched_data(self):
"""
target: test milvus client: insert not matched data then defined
method: insert string to int primary field
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [
{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 1,
ct.err_msg: "The Input data type is inconsistent with defined schema, {id} field should be a int64"}
self.upsert(client, collection_name, data=rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("partition_name", ["12 s", "(mn)", "中文", "%$#", " "])
def test_milvus_client_upsert_invalid_partition_name(self, partition_name):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}"}
if partition_name == " ":
error = {ct.err_code: 1, ct.err_msg: f"Invalid partition name: . Partition name should not be empty."}
self.upsert(client, collection_name, data=rows, partition_name=partition_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_not_exist_partition_name(self):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
partition_name = cf.gen_unique_str("partition_not_exist")
error = {ct.err_code: 200, ct.err_msg: f"partition not found[partition={partition_name}]"}
self.upsert(client, collection_name, data=rows, partition_name=partition_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_upsert_collection_partition_not_match(self):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
another_collection_name = cf.gen_unique_str(prefix + "another")
partition_name = cf.gen_unique_str("partition")
# 1. create collection
self.create_collection(client, collection_name, default_dim)
self.create_collection(client, another_collection_name, default_dim)
self.create_partition(client, another_collection_name, partition_name)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 200, ct.err_msg: f"partition not found[partition={partition_name}]"}
self.upsert(client, collection_name, data=rows, partition_name=partition_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("nullable", [True, False])
def test_milvus_client_insert_array_element_null(self, nullable):
"""
target: test search with null expression on each key of json
method: create connection, collection, insert and search
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
dim = 5
# 1. create collection
nullable_field_name = "nullable_field"
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True,
auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
schema.add_field(nullable_field_name, DataType.ARRAY, element_type=DataType.INT64, max_capacity=12,
max_length=64, nullable=nullable)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
# 2. insert
vectors = cf.gen_vectors(default_nb, dim)
rows = [{default_primary_key_field_name: str(i), default_vector_field_name: vectors[i],
nullable_field_name: [None, 2, 3]} for i in range(default_nb)]
error = {ct.err_code: 1,
ct.err_msg: "The Input data type is inconsistent with defined schema, {nullable_field} field "
"should be a array, but got a {<class 'list'>} instead."}
self.insert(client, collection_name, rows,
check_task=CheckTasks.err_res,
check_items=error)
class TestMilvusClientUpsertValid(TestMilvusClientV2Base):
""" Test case of search interface """
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=["COSINE", "L2"])
def metric_type(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_upsert_default(self):
"""
target: test search (high level api) normal case
method: create connection, collection, insert and search
expected: search/query successfully
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 0})
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
results = self.upsert(client, collection_name, rows)[0]
assert results['upsert_count'] == default_nb
# 3. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit,
"pk_name": default_primary_key_field_name})
# 4. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"pk_name": default_primary_key_field_name})
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_upsert_empty_data(self):
"""
target: test search (high level api) normal case
method: create connection, collection, insert and search
expected: search/query successfully
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
rows = []
results = self.upsert(client, collection_name, rows)[0]
assert results['upsert_count'] == 0
# 3. search
rng = np.random.default_rng(seed=19530)
vectors_to_search = rng.random((1, default_dim))
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": [],
"pk_name": default_primary_key_field_name,
"limit": 0})
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_upsert_partition(self):
"""
target: test fast create collection normal case
method: create collection
expected: create collection with default schema, index, and load successfully
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
partition_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. create partition
self.create_partition(client, collection_name, partition_name)
partitions = self.list_partitions(client, collection_name)[0]
assert partition_name in partitions
index = self.list_indexes(client, collection_name)[0]
assert index == ['vector']
# load_state = self.get_load_state(collection_name)[0]
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
# 3. upsert to default partition
results = self.upsert(client, collection_name, rows, partition_name=partitions[0])[0]
assert results['upsert_count'] == default_nb
# 4. upsert to non-default partition
results = self.upsert(client, collection_name, rows, partition_name=partition_name)[0]
assert results['upsert_count'] == default_nb
# 5. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit,
"pk_name": default_primary_key_field_name})
# partition_number = self.get_partition_stats(client, collection_name, "_default")[0]
# assert partition_number == default_nb
# partition_number = self.get_partition_stats(client, collection_name, partition_name)[0]
# assert partition_number[0]['value'] == 0
if self.has_partition(client, collection_name, partition_name)[0]:
self.release_partitions(client, collection_name, partition_name)
self.drop_partition(client, collection_name, partition_name)
if self.has_collection(client, collection_name)[0]:
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_insert_upsert(self):
"""
target: test fast create collection normal case
method: create collection
expected: create collection with default schema, index, and load successfully
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
partition_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. create partition
self.create_partition(client, collection_name, partition_name)
partitions = self.list_partitions(client, collection_name)[0]
assert partition_name in partitions
index = self.list_indexes(client, collection_name)[0]
assert index == ['vector']
# load_state = self.get_load_state(collection_name)[0]
# 3. insert and upsert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
results = self.insert(client, collection_name, rows, partition_name=partition_name)[0]
assert results['insert_count'] == default_nb
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, "new_diff_str_field": str(i)} for i in range(default_nb)]
results = self.upsert(client, collection_name, rows, partition_name=partition_name)[0]
assert results['upsert_count'] == default_nb
# 3. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit,
"pk_name": default_primary_key_field_name})
if self.has_partition(client, collection_name, partition_name)[0]:
self.release_partitions(client, collection_name, partition_name)
self.drop_partition(client, collection_name, partition_name)
if self.has_collection(client, collection_name)[0]:
self.drop_collection(client, collection_name)
""" Test case of partial update interface """
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=["COSINE", "L2"])
def metric_type(self, request):
yield request.param
"""
******************************************************************
# The following are invalid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_partial_update_new_pk_with_missing_field(self):
"""
target: Test PU will return error when provided new pk and partial field
method:
1. Create a collection
2. partial upsert a new pk with only partial field
expected: Step 2 should result fail
"""
# step 1: create collection
client = self._client()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_int32_field_name, DataType.INT32, nullable=True)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
index_params.add_index(default_int32_field_name, index_type="AUTOINDEX")
collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
self.create_collection(client, collection_name, default_dim, schema=schema,
consistency_level="Strong", index_params=index_params)
# step 2: partial upsert a new pk with only partial field
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema,
desired_field_names=[default_primary_key_field_name, default_int32_field_name])
error = {ct.err_code: 1100, ct.err_msg:
f"fieldSchema({default_vector_field_name}) has no corresponding fieldData pass in: invalid parameter"}
self.upsert(client, collection_name, rows, partial_update=True,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_partial_update_new_field_without_dynamic_field(self):
"""
target: Test PU will return error when provided new field without dynamic field
method:
1. Create a collection with dynamic field
2. partial upsert a new field
expected: Step 2 should result fail
"""
# step 1: create collection with dynamic field
client = self._client()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
self.create_collection(client, collection_name, default_dim, schema=schema,
consistency_level="Strong", index_params=index_params)
# step 2: partial upsert a new field
row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema)
self.upsert(client, collection_name, row, partial_update=True)
new_row = [{default_primary_key_field_name: i, default_int32_field_name: 99} for i in range(default_nb)]
error = {ct.err_code: 1,
ct.err_msg: f"Attempt to insert an unexpected field `{default_int32_field_name}` to collection without enabling dynamic field"}
self.upsert(client, collection_name, new_row, partial_update=True, check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_partial_update_after_release_collection(self):
"""
target: test basic function of partial update
method:
1. create collection
2. insert a full row of data using partial update
3. partial update data
4. release collection
5. partial update data
expected: step 5 should fail
"""
# Step 1: create collection
client = self._client()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
index_params.add_index(default_string_field_name, index_type="AUTOINDEX")
collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
self.create_collection(client, collection_name, default_dim, schema=schema,
consistency_level="Strong", index_params=index_params)
# Step 2: insert a full row of data using partial update
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema)
self.upsert(client, collection_name, rows, partial_update=True)
# Step 3: partial update data
new_row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema,
desired_field_names=[default_primary_key_field_name, default_string_field_name])
self.upsert(client, collection_name, new_row, partial_update=True)
# Step 4: release collection
self.release_collection(client, collection_name)
# Step 5: partial update data
new_row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema,
desired_field_names=[default_primary_key_field_name, default_string_field_name])
error = {ct.err_code: 101,
ct.err_msg: f"failed to query: collection not loaded"}
self.upsert(client, collection_name, new_row, partial_update=True,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_partial_update_same_pk_after_delete(self):
"""
target: test PU will fail when provided same pk and partial field
method:
1. Create a collection with dynamic field
2. Insert rows
3. delete the rows
4. upsert the rows with same pk and partial field
expected: step 4 should fail
"""
# Step 1: create collection
client = self._client()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_int32_field_name, DataType.INT32)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
index_params.add_index(default_int32_field_name, index_type="AUTOINDEX")
collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
self.create_collection(client, collection_name, default_dim, schema=schema,
consistency_level="Strong", index_params=index_params)
# Step 2: insert rows
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema)
self.upsert(client, collection_name, rows, partial_update=True)
# Step 3: delete the rows
result = self.delete(client, collection_name, filter=default_search_exp)[0]
assert result["delete_count"] == default_nb
result = self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_nothing)[0]
assert len(result) == 0
# Step 4: upsert the rows with same pk and partial field
new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema,
desired_field_names=[default_primary_key_field_name, default_vector_field_name])
error = {ct.err_code: 1100,
ct.err_msg: f"fieldSchema({default_int32_field_name}) has no corresponding fieldData pass in: invalid parameter"}
self.upsert(client, collection_name, new_rows, partial_update=True,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_partial_update_pk_in_wrong_partition(self):
"""
target: test PU will fail when provided pk in wrong partition
method:
1. Create a collection
2. Create 2 partitions
3. Insert rows
4. upsert the rows with pk in wrong partition
expected: step 4 should fail
"""
# Step 1: create collection
client = self._client()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_int32_field_name, DataType.INT32)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
index_params.add_index(default_int32_field_name, index_type="AUTOINDEX")
collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
self.create_collection(client, collection_name, default_dim, schema=schema,
consistency_level="Strong", index_params=index_params)
# Step 2: Create 2 partitions
num_of_partitions = 2
partition_names = []
for _ in range(num_of_partitions):
partition_name = cf.gen_unique_str("partition")
self.create_partition(client, collection_name, partition_name)
partition_names.append(partition_name)
# Step 3: Insert rows
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema)
gap = default_nb // num_of_partitions
for i, partition in enumerate(partition_names):
self.upsert(client, collection_name, rows[i*gap:(i+1)*gap], partition_name=partition, partial_update=True)
# Step 4: upsert the rows with pk in wrong partition
new_rows = cf.gen_row_data_by_schema(nb=gap, schema=schema,
desired_field_names=[default_primary_key_field_name, default_vector_field_name])
error = {ct.err_code: 1100,
ct.err_msg: f"fieldSchema({default_int32_field_name}) has no corresponding fieldData pass in: invalid parameter"}
self.upsert(client, collection_name, new_rows, partition_name=partition_names[-1], partial_update=True,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_partial_update_same_pk_multiple_fields(self):
"""
target: Test PU will success and query will success
method:
1. Create a collection
2. Insert rows
3. Upsert the rows with same pk and different field
expected: Step 3 should fail
"""
# step 1: create collection
client = self._client()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_int32_field_name, DataType.INT32, nullable=True)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
index_params.add_index(default_int32_field_name, index_type="AUTOINDEX")
collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
self.create_collection(client, collection_name, default_dim, schema=schema,
consistency_level="Strong", index_params=index_params)
# step 2: Insert rows
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema)
self.upsert(client, collection_name, rows, partial_update=True)
# step 3: Upsert the rows with same pk and different field
new_rows = []
for i in range(default_nb):
data = {}
if i % 2 == 0:
data[default_int32_field_name] = i + 1000
data[default_primary_key_field_name] = 0
else:
data[default_vector_field_name] = [random.random() for _ in range(default_dim)]
data[default_primary_key_field_name] = 0
new_rows.append(data)
error = {ct.err_code: 1,
ct.err_msg: f"The data fields length is inconsistent. previous length is 2000, current length is 1000"}
self.upsert(client, collection_name, new_rows, partial_update=True,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)

View File

@ -1,314 +0,0 @@
import pytest
import time
import random
import numpy as np
from common.common_type import CaseLabel, CheckTasks
from common import common_func as cf
from common import common_type as ct
from utils.util_log import test_log as log
from utils.util_pymilvus import *
from base.client_v2_base import TestMilvusClientV2Base
from pymilvus import DataType, FieldSchema, CollectionSchema
from sklearn import preprocessing
# Test parameters
default_nb = ct.default_nb
default_nq = ct.default_nq
default_limit = ct.default_limit
default_search_exp = "id >= 0"
exp_res = "exp_res"
default_primary_key_field_name = "id"
default_vector_field_name = "vector"
default_int32_field_name = ct.default_int32_field_name
class TestMilvusClientPartialUpdate(TestMilvusClientV2Base):
""" Test case of partial update functionality """
@pytest.mark.tags(CaseLabel.L0)
def test_partial_update_all_field_types(self):
"""
Test partial update functionality with all field types
1. Create collection with all data types
2. Insert initial data
3. Perform partial update for each field type
4. Verify all updates work correctly
"""
client = self._client()
dim = 64
collection_name = cf.gen_collection_name_by_testcase_name()
# Create schema with all data types
schema = cf.gen_all_datatype_collection_schema(dim=dim)
# Create index parameters
index_params = client.prepare_index_params()
for i in range(len(schema.fields)):
field_name = schema.fields[i].name
print(f"field_name: {field_name}")
if field_name == "json_field":
index_params.add_index(field_name, index_type="AUTOINDEX",
params={"json_cast_type": "json"})
elif field_name == "text_sparse_emb":
index_params.add_index(field_name, index_type="AUTOINDEX", metric_type="BM25")
else:
index_params.add_index(field_name, index_type="AUTOINDEX")
# Create collection
client.create_collection(collection_name, default_dim, consistency_level="Strong", schema=schema, index_params=index_params)
# Load collection
self.load_collection(client, collection_name)
# Insert initial data
nb = 1000
rows = cf.gen_row_data_by_schema(nb=nb, schema=schema)
self.upsert(client, collection_name, rows, partial_update=True)
log.info(f"Inserted {nb} initial records")
primary_key_field_name = schema.fields[0].name
for i in range(len(schema.fields)):
update_field_name = schema.fields[i if i != 0 else 1].name
new_row = cf.gen_partial_row_data_by_schema(nb=nb, schema=schema,
desired_field_names=[primary_key_field_name, update_field_name])
client.upsert(collection_name, new_row, partial_update=True)
log.info("Partial update test for all field types passed successfully")
@pytest.mark.tags(CaseLabel.L0)
def test_partial_update_simple_demo(self):
"""
Test simple partial update demo with nullable fields
1. Create collection with explicit schema including nullable fields
2. Insert initial data with some null values
3. Perform partial updates with different field combinations
4. Verify partial update behavior preserves unchanged fields
"""
client = self._client()
dim = 3
collection_name = cf.gen_collection_name_by_testcase_name()
# Create schema with nullable fields
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field("id", DataType.INT64, is_primary=True, auto_id=False)
schema.add_field("vector", DataType.FLOAT_VECTOR, dim=dim)
schema.add_field("name", DataType.VARCHAR, max_length=100, nullable=True)
schema.add_field("price", DataType.FLOAT, nullable=True)
schema.add_field("category", DataType.VARCHAR, max_length=50, nullable=True)
# Create collection
self.create_collection(client, collection_name, schema=schema)
# Create index
index_params = self.prepare_index_params(client)[0]
index_params.add_index("vector", index_type="AUTOINDEX", metric_type="L2")
self.create_index(client, collection_name, index_params=index_params)
# Load collection
self.load_collection(client, collection_name)
# Insert initial data with some null values
initial_data = [
{
"id": 1,
"vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(),
"name": "Product A",
"price": 100.0,
"category": "Electronics"
},
{
"id": 2,
"vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(),
"name": "Product B",
"price": None, # Null price
"category": "Home"
},
{
"id": 3,
"vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(),
"name": "Product C",
"price": None, # Null price
"category": "Books"
}
]
self.upsert(client, collection_name, initial_data, partial_update=False)
log.info("Inserted initial data with null values")
# Verify initial state
results = self.query(client, collection_name, filter="id > 0", output_fields=["*"])[0]
assert len(results) == 3
initial_data_map = {data['id']: data for data in results}
assert initial_data_map[1]['name'] == "Product A"
assert initial_data_map[1]['price'] == 100.0
assert initial_data_map[1]['category'] == "Electronics"
assert initial_data_map[2]['name'] == "Product B"
assert initial_data_map[2]['price'] is None
assert initial_data_map[2]['category'] == "Home"
assert initial_data_map[3]['name'] == "Product C"
assert initial_data_map[3]['price'] is None
assert initial_data_map[3]['category'] == "Books"
log.info("Initial data verification passed")
# First partial update - update all fields
log.info("First partial update - updating all fields...")
first_update_data = [
{
"id": 1,
"vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(),
"name": "Product A-Update",
"price": 111.1,
"category": "Electronics-Update"
},
{
"id": 2,
"vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(),
"name": "Product B-Update",
"price": 222.2,
"category": "Home-Update"
},
{
"id": 3,
"vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(),
"name": "Product C-Update",
"price": None, # Still null
"category": "Books-Update"
}
]
self.upsert(client, collection_name, first_update_data, partial_update=True)
# Verify first update
results = self.query(client, collection_name, filter="id > 0", output_fields=["*"])[0]
assert len(results) == 3
first_update_map = {data['id']: data for data in results}
assert first_update_map[1]['name'] == "Product A-Update"
assert abs(first_update_map[1]['price'] - 111.1) < 0.001
assert first_update_map[1]['category'] == "Electronics-Update"
assert first_update_map[2]['name'] == "Product B-Update"
assert abs(first_update_map[2]['price'] - 222.2) < 0.001
assert first_update_map[2]['category'] == "Home-Update"
assert first_update_map[3]['name'] == "Product C-Update"
assert first_update_map[3]['price'] is None
assert first_update_map[3]['category'] == "Books-Update"
log.info("First partial update verification passed")
# Second partial update - update only specific fields
log.info("Second partial update - updating specific fields...")
second_update_data = [
{
"id": 1,
"name": "Product A-Update-Again",
"price": 1111.1,
"category": "Electronics-Update-Again"
},
{
"id": 2,
"name": "Product B-Update-Again",
"price": None, # Set back to null
"category": "Home-Update-Again"
},
{
"id": 3,
"name": "Product C-Update-Again",
"price": 3333.3, # Set price from null to value
"category": "Books-Update-Again"
}
]
self.upsert(client, collection_name, second_update_data, partial_update=True)
# Verify second update
results = self.query(client, collection_name, filter="id > 0", output_fields=["*"])[0]
assert len(results) == 3
second_update_map = {data['id']: data for data in results}
# Verify ID 1: all fields updated
assert second_update_map[1]['name'] == "Product A-Update-Again"
assert abs(second_update_map[1]['price'] - 1111.1) < 0.001
assert second_update_map[1]['category'] == "Electronics-Update-Again"
# Verify ID 2: all fields updated, price set to null
assert second_update_map[2]['name'] == "Product B-Update-Again"
assert second_update_map[2]['price'] is None
assert second_update_map[2]['category'] == "Home-Update-Again"
# Verify ID 3: all fields updated, price set from null to value
assert second_update_map[3]['name'] == "Product C-Update-Again"
assert abs(second_update_map[3]['price'] - 3333.3) < 0.001
assert second_update_map[3]['category'] == "Books-Update-Again"
# Verify vector fields were preserved from first update (not updated in second update)
# Note: Vector comparison might be complex, so we just verify they exist
assert 'vector' in second_update_map[1]
assert 'vector' in second_update_map[2]
assert 'vector' in second_update_map[3]
log.info("Second partial update verification passed")
log.info("Simple partial update demo test completed successfully")
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_partial_update_null_to_null(self):
"""
Target: test PU can successfully update a null to null
Method:
1. Create a collection, enable nullable fields
2. Insert default_nb rows to the collection
3. Partial Update the nullable field with null
4. Query the collection to check the value of nullable field
Expected: query should have correct value and number of entities
"""
# step 1: create collection with nullable fields
client = self._client()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_int32_field_name, DataType.INT32, nullable=True)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
index_params.add_index(default_int32_field_name, index_type="AUTOINDEX")
collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
self.create_collection(client, collection_name, default_dim, schema=schema,
consistency_level="Strong", index_params=index_params)
# step 2: insert default_nb rows to the collection
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name])
self.upsert(client, collection_name, rows, partial_update=True)
# step 3: Partial Update the nullable field with null
new_row = cf.gen_partial_row_data_by_schema(
nb=default_nb,
schema=schema,
desired_field_names=[default_primary_key_field_name, default_int32_field_name],
start=0
)
# Set the nullable field to None
for data in new_row:
data[default_int32_field_name] = None
self.upsert(client, collection_name, new_row, partial_update=True)
# step 4: Query the collection to check the value of nullable field
result = self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
output_fields=[default_int32_field_name],
check_items={exp_res: new_row,
"with_vec": True,
"pk_name": default_primary_key_field_name})[0]
assert len(result) == default_nb
# Verify that all nullable fields are indeed null
for data in result:
assert data[default_int32_field_name] is None, f"Expected null value for {default_int32_field_name}, got {data[default_int32_field_name]}"
log.info("Partial update null to null test completed successfully")
self.drop_collection(client, collection_name)

View File

@ -719,24 +719,6 @@ class TestInsertOperation(TestcaseBase):
assert cf._check_primary_keys(mutation_res.primary_keys, ct.default_nb) assert cf._check_primary_keys(mutation_res.primary_keys, ct.default_nb)
assert collection_w.num_entities == ct.default_nb assert collection_w.num_entities == ct.default_nb
@pytest.mark.tags(CaseLabel.L1)
def test_insert_auto_id_true_with_dataframe_values(self, pk_field):
"""
target: test insert with auto_id=True
method: create collection with auto_id=True
expected: 1.verify num entities 2.verify ids
"""
c_name = cf.gen_unique_str(prefix)
schema = cf.gen_default_collection_schema(
primary_field=pk_field, auto_id=True)
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
df = cf.gen_default_dataframe_data(nb=100)
error = {ct.err_code: 999,
ct.err_msg: f"Expect no data for auto_id primary field: {pk_field}"}
collection_w.insert(
data=df, check_task=CheckTasks.err_res, check_items=error)
assert collection_w.is_empty
@pytest.mark.tags(CaseLabel.L2) @pytest.mark.tags(CaseLabel.L2)
def test_insert_auto_id_true_with_list_values(self, pk_field): def test_insert_auto_id_true_with_list_values(self, pk_field):
""" """
@ -888,107 +870,6 @@ class TestInsertOperation(TestcaseBase):
collection_w.insert(data) collection_w.insert(data)
assert collection_w.num_entities == ct.default_nb assert collection_w.num_entities == ct.default_nb
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_partition_key", [True, False])
@pytest.mark.parametrize("default_value", [[], [None for _ in range(ct.default_nb)]])
def test_insert_multi_fields_using_none_data(self, enable_partition_key, default_value, auto_id):
"""
target: test insert with multi fields include array using none value
method: 1. create a collection with multi fields using default value
2. insert using none value to replace the field value
expected: insert successfully
"""
json_embedded_object = "json_embedded_object"
fields = [
cf.gen_int64_field(is_primary=True),
cf.gen_int32_field(default_value=np.int32(1), nullable=True),
cf.gen_float_field(default_value=np.float32(1.0), nullable=True),
cf.gen_string_field(default_value="abc", enable_partition_key=enable_partition_key, nullable=True),
cf.gen_array_field(name=ct.default_int32_array_field_name, element_type=DataType.INT32, nullable=True),
cf.gen_array_field(name=ct.default_float_array_field_name, element_type=DataType.FLOAT, nullable=True),
cf.gen_array_field(name=ct.default_string_array_field_name, element_type=DataType.VARCHAR, max_length=100, nullable=True),
cf.gen_json_field(name=json_embedded_object, nullable=True),
cf.gen_float_vec_field()
]
schema = cf.gen_collection_schema(fields, auto_id=auto_id)
collection_w = self.init_collection_wrap(schema=schema)
# default value fields, [] or [None]
data = [
[i for i in range(ct.default_nb)],
default_value,
default_value,
default_value,
[[np.int32(j) for j in range(10)] for _ in range(ct.default_nb)],
[[np.float32(j) for j in range(10)] for _ in range(ct.default_nb)],
default_value,
default_value,
cf.gen_vectors(ct.default_nb, ct.default_dim)
]
if auto_id:
del data[0]
collection_w.insert(data=data)
assert collection_w.num_entities == ct.default_nb
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_partition_key", [True, False])
@pytest.mark.parametrize("nullable", [True, False])
def test_insert_multi_fields_by_rows_using_default(self, enable_partition_key, nullable):
"""
target: test insert multi fields by rows with default value
method: 1. create a collection with one field using default value
2. insert using default value to replace the field value
expected: insert successfully
"""
# 1. initialize with data
if enable_partition_key is True and nullable is True:
pytest.skip("partition key field not support nullable")
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(default_value=np.float32(3.14), nullable=nullable),
cf.gen_string_field(default_value="abc", is_partition_key=enable_partition_key, nullable=nullable),
cf.gen_json_field(), cf.gen_float_vec_field()]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
collection_w.load()
# 2. insert data
array = cf.gen_default_rows_data()
for i in range(0, ct.default_nb, 2):
array[i][ct.default_string_field_name] = None
collection_w.insert(array)
exp = f"{ct.default_string_field_name} == 'abc'"
res = collection_w.query(exp, output_fields=[ct.default_float_field_name, ct.default_string_field_name])[0]
assert len(res) == ct.default_nb/2
@pytest.mark.tags(CaseLabel.L1)
def test_insert_multi_fields_by_rows_using_none(self):
"""
target: test insert multi fields by rows with none value
method: 1. create a collection with one field using none value
2. insert using none to replace the field value
expected: insert successfully
"""
# 1. initialize with data
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(nullable=True),
cf.gen_string_field(default_value="abc", nullable=True), cf.gen_json_field(), cf.gen_float_vec_field()]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
collection_w.load()
# 2. insert data
array = cf.gen_default_rows_data()
for i in range(0, ct.default_nb, 2):
array[i][ct.default_float_field_name] = None
array[i][ct.default_string_field_name] = None
collection_w.insert(array)
exp = f"{ct.default_string_field_name} == 'abc'"
res = collection_w.query(exp, output_fields=[ct.default_float_field_name, ct.default_string_field_name])[0]
assert len(res) == ct.default_nb/2
assert res[0][ct.default_float_field_name] is None
@pytest.mark.tags(CaseLabel.L2) @pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("enable_partition_key", [True, False]) @pytest.mark.parametrize("enable_partition_key", [True, False])