mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
related issue: https://github.com/milvus-io/milvus/issues/44425 pr: #44801 split insert.py into a few files: upsert.py, insert.py, partial_upsert.py ... add test for allow insert auto id --------- Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
1389 lines
70 KiB
Python
1389 lines
70 KiB
Python
import pytest
|
|
import numpy as np
|
|
|
|
from base.client_v2_base import TestMilvusClientV2Base
|
|
from utils.util_log import test_log as log
|
|
from common import common_func as cf
|
|
from common import common_type as ct
|
|
from common.common_type import CaseLabel, CheckTasks
|
|
from utils.util_pymilvus import *
|
|
|
|
prefix = "client_insert"
|
|
epsilon = ct.epsilon
|
|
default_nb = ct.default_nb
|
|
default_nb_medium = ct.default_nb_medium
|
|
default_nq = ct.default_nq
|
|
default_dim = ct.default_dim
|
|
default_limit = ct.default_limit
|
|
default_search_exp = "id >= 0"
|
|
exp_res = "exp_res"
|
|
default_search_string_exp = "varchar >= \"0\""
|
|
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
|
|
default_invaild_string_exp = "varchar >= 0"
|
|
default_json_search_exp = "json_field[\"number\"] >= 0"
|
|
perfix_expr = 'varchar like "0%"'
|
|
default_search_field = ct.default_float_vec_field_name
|
|
default_search_params = ct.default_search_params
|
|
default_primary_key_field_name = "id"
|
|
default_vector_field_name = "vector"
|
|
default_dynamic_field_name = "field_new"
|
|
default_float_field_name = ct.default_float_field_name
|
|
default_bool_field_name = ct.default_bool_field_name
|
|
default_string_field_name = ct.default_string_field_name
|
|
default_int32_array_field_name = ct.default_int32_array_field_name
|
|
default_string_array_field_name = ct.default_string_array_field_name
|
|
default_int32_field_name = ct.default_int32_field_name
|
|
default_int32_value = ct.default_int32_value
|
|
|
|
class TestMilvusClientInsertInvalid(TestMilvusClientV2Base):
|
|
""" Test case of search interface """
|
|
|
|
@pytest.fixture(scope="function", params=[False, True])
|
|
def auto_id(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=["COSINE", "L2"])
|
|
def metric_type(self, request):
|
|
yield request.param
|
|
|
|
"""
|
|
******************************************************************
|
|
# The following are invalid base cases
|
|
******************************************************************
|
|
"""
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_insert_after_client_closed(self):
|
|
"""
|
|
target: test insert after client is closed
|
|
method: insert after client is closed
|
|
expected: raise exception
|
|
"""
|
|
client = self._client(alias='my_client')
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
self.create_collection(client, collection_name, default_dim)
|
|
self.close(client)
|
|
|
|
data = cf.gen_default_list_data(10)
|
|
error = {ct.err_code: 999, ct.err_msg: 'should create connection first'}
|
|
self.insert(client, collection_name, data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_insert_column_data(self):
|
|
"""
|
|
target: test insert column data
|
|
method: create connection, collection, insert and search
|
|
expected: raise error
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim)
|
|
# 2. insert
|
|
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nb)]
|
|
data = [[i for i in range(default_nb)], vectors]
|
|
error = {ct.err_code: 999,
|
|
ct.err_msg: "The Input data type is inconsistent with defined schema, please check it."}
|
|
self.insert(client, collection_name, data,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_insert_empty_collection_name(self):
|
|
"""
|
|
target: test high level api: client.create_collection
|
|
method: create collection with invalid primary field
|
|
expected: Raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = ""
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
error = {ct.err_code: 1, ct.err_msg: f"`collection_name` value {collection_name} is illegal"}
|
|
self.insert(client, collection_name, rows,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
@pytest.mark.parametrize("collection_name", ["12-s", "12 s", "(mn)", "中文", "%$#"])
|
|
def test_milvus_client_insert_invalid_collection_name(self, collection_name):
|
|
"""
|
|
target: test high level api: client.create_collection
|
|
method: create collection with invalid primary field
|
|
expected: Raise exception
|
|
"""
|
|
client = self._client()
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {collection_name}. the first character of a "
|
|
f"collection name must be an underscore or letter: invalid parameter"}
|
|
self.insert(client, collection_name, rows,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_insert_collection_name_over_max_length(self):
|
|
"""
|
|
target: test high level api: client.create_collection
|
|
method: create collection with invalid primary field
|
|
expected: Raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = "a".join("a" for i in range(256))
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
error = {ct.err_code: 1100, ct.err_msg: f"the length of a collection name must be less than 255 characters"}
|
|
self.insert(client, collection_name, rows,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_insert_not_exist_collection_name(self):
|
|
"""
|
|
target: test high level api: client.create_collection
|
|
method: create collection with invalid primary field
|
|
expected: Raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
error = {ct.err_code: 100, ct.err_msg: f"can't find collection[database=default][collection={collection_name}]"}
|
|
self.insert(client, collection_name, rows,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
@pytest.mark.parametrize("data", ["12-s", "中文", "%$#", " ", ""])
|
|
def test_milvus_client_insert_data_invalid_type(self, data):
|
|
"""
|
|
target: test high level api: client.create_collection
|
|
method: create collection with invalid primary field
|
|
expected: Raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
# 2. insert
|
|
error = {ct.err_code: 999,
|
|
ct.err_msg: "wrong type of argument 'data',expected 'Dict' or list of 'Dict', got 'str'"}
|
|
self.insert(client, collection_name, data,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_insert_data_vector_field_missing(self):
|
|
"""
|
|
target: test high level api: client.create_collection
|
|
method: create collection with invalid primary field
|
|
expected: Raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
# 2. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i,
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
error = {ct.err_code: 1,
|
|
ct.err_msg: f"Insert missed an field `vector` to collection "
|
|
f"without set nullable==true or set default_value"}
|
|
self.insert(client, collection_name, data=rows,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_insert_data_id_field_missing(self):
|
|
"""
|
|
target: test high level api: client.create_collection
|
|
method: create collection with invalid primary field
|
|
expected: Raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
# 2. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
error = {ct.err_code: 1,
|
|
ct.err_msg: f"Insert missed an field `id` to collection without set nullable==true or set default_value"}
|
|
self.insert(client, collection_name, data=rows,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_insert_data_extra_field(self):
|
|
"""
|
|
target: test milvus client: insert extra field than schema
|
|
method: insert extra field than schema when enable_dynamic_field is False
|
|
expected: Raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, enable_dynamic_field=False)
|
|
# 2. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
error = {ct.err_code: 1,
|
|
ct.err_msg: f"Attempt to insert an unexpected field `float` to collection without enabling dynamic field"}
|
|
self.insert(client, collection_name, data=rows,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_insert_data_dim_not_match(self):
|
|
"""
|
|
target: test milvus client: insert extra field than schema
|
|
method: insert extra field than schema when enable_dynamic_field is False
|
|
expected: Raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim)
|
|
# 2. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [
|
|
{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim + 1))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
error = {ct.err_code: 65536, ct.err_msg: f"of float data should divide the dim({default_dim})"}
|
|
self.insert(client, collection_name, data=rows,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_insert_not_matched_data(self):
|
|
"""
|
|
target: test milvus client: insert not matched data then defined
|
|
method: insert string to int primary field
|
|
expected: Raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim)
|
|
# 2. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [
|
|
{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
error = {ct.err_code: 1,
|
|
ct.err_msg: f"The Input data type is inconsistent with defined schema, "
|
|
f"{{id}} field should be a int64"}
|
|
self.insert(client, collection_name, data=rows,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("partition_name", ["12 s", "(mn)", "中文", "%$#", " "])
|
|
def test_milvus_client_insert_invalid_partition_name(self, partition_name):
|
|
"""
|
|
target: test milvus client: insert extra field than schema
|
|
method: insert extra field than schema when enable_dynamic_field is False
|
|
expected: Raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim)
|
|
# 2. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}."}
|
|
if partition_name == " ":
|
|
error = {ct.err_code: 1, ct.err_msg: f"Invalid partition name: . Partition name should not be empty."}
|
|
self.insert(client, collection_name, data=rows, partition_name=partition_name,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_insert_not_exist_partition_name(self):
|
|
"""
|
|
target: test milvus client: insert extra field than schema
|
|
method: insert extra field than schema when enable_dynamic_field is False
|
|
expected: Raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim)
|
|
# 2. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
partition_name = cf.gen_unique_str("partition_not_exist")
|
|
error = {ct.err_code: 200, ct.err_msg: f"partition not found[partition={partition_name}]"}
|
|
self.insert(client, collection_name, data=rows, partition_name=partition_name,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_insert_collection_partition_not_match(self):
|
|
"""
|
|
target: test milvus client: insert extra field than schema
|
|
method: insert extra field than schema when enable_dynamic_field is False
|
|
expected: Raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
another_collection_name = cf.gen_collection_name_by_testcase_name()
|
|
partition_name = cf.gen_unique_str("partition")
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim)
|
|
self.create_collection(client, another_collection_name, default_dim)
|
|
self.create_partition(client, another_collection_name, partition_name)
|
|
# 2. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
error = {ct.err_code: 200, ct.err_msg: f"partition not found[partition={partition_name}]"}
|
|
self.insert(client, collection_name, data=rows, partition_name=partition_name,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
|
|
class TestMilvusClientInsertValid(TestMilvusClientV2Base):
|
|
""" Test case of search interface """
|
|
|
|
@pytest.fixture(scope="function", params=[False, True])
|
|
def auto_id(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=["COSINE", "L2"])
|
|
def metric_type(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=[True, False])
|
|
def nullable(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=[DataType.FLOAT_VECTOR, DataType.FLOAT16_VECTOR,
|
|
DataType.BFLOAT16_VECTOR, DataType.INT8_VECTOR])
|
|
def vector_type(self, request):
|
|
yield request.param
|
|
|
|
"""
|
|
******************************************************************
|
|
# The following are valid base cases
|
|
******************************************************************
|
|
"""
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
def test_milvus_client_insert_default(self, vector_type, nullable):
|
|
"""
|
|
target: test search (high level api) normal case
|
|
method: create connection, collection, insert and search
|
|
expected: search/query successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
dim = 8
|
|
# 1. create collection
|
|
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, max_length=64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, vector_type, dim=dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64, is_partition_key=True)
|
|
schema.add_field(default_float_field_name, DataType.FLOAT, nullable=nullable)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
|
|
# 2. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
vectors = cf.gen_vectors(default_nb, dim, vector_data_type=vector_type)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == default_nb
|
|
# 3. search
|
|
vectors_to_search = [vectors[0]]
|
|
insert_ids = [i for i in range(default_nb)]
|
|
self.search(client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"limit": default_limit,
|
|
"pk_name": default_primary_key_field_name})
|
|
# 4. query
|
|
self.query(client, collection_name, filter=default_search_exp,
|
|
check_task=CheckTasks.check_query_results,
|
|
check_items={exp_res: rows,
|
|
"with_vec": True,
|
|
"pk_name": default_primary_key_field_name,
|
|
"vector_type": vector_type})
|
|
self.release_collection(client, collection_name)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_insert_different_fields(self):
|
|
"""
|
|
target: test search (high level api) normal case
|
|
method: create connection, collection, insert and search
|
|
expected: search/query successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
collections = self.list_collections(client)[0]
|
|
assert collection_name in collections
|
|
self.describe_collection(client, collection_name,
|
|
check_task=CheckTasks.check_describe_collection_property,
|
|
check_items={"collection_name": collection_name,
|
|
"dim": default_dim,
|
|
"consistency_level": 0})
|
|
# 2. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == default_nb
|
|
# 3. insert diff fields
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, "new_diff_str_field": str(i)} for i in range(default_nb)]
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == default_nb
|
|
# 3. search
|
|
vectors_to_search = rng.random((1, default_dim))
|
|
insert_ids = [i for i in range(default_nb)]
|
|
self.search(client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"limit": default_limit,
|
|
"pk_name": default_primary_key_field_name})
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_insert_empty_data(self):
|
|
"""
|
|
target: test search (high level api) normal case
|
|
method: create connection, collection, insert and search
|
|
expected: search/query successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
# 2. insert
|
|
rows = []
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == 0
|
|
# 3. search
|
|
rng = np.random.default_rng(seed=19530)
|
|
vectors_to_search = rng.random((1, default_dim))
|
|
self.search(client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": [],
|
|
"pk_name": default_primary_key_field_name,
|
|
"limit": 0})
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_insert_partition(self):
|
|
"""
|
|
target: test fast create collection normal case
|
|
method: create collection
|
|
expected: create collection with default schema, index, and load successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
partition_name = cf.gen_unique_str('partition')
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
# 2. create partition
|
|
self.create_partition(client, collection_name, partition_name)
|
|
partitions = self.list_partitions(client, collection_name)[0]
|
|
assert partition_name in partitions
|
|
index = self.list_indexes(client, collection_name)[0]
|
|
assert index == ['vector']
|
|
# load_state = self.get_load_state(collection_name)[0]
|
|
# 3. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
results = self.insert(client, collection_name, rows, partition_name=partition_name)[0]
|
|
assert results['insert_count'] == default_nb
|
|
# 3. search
|
|
vectors_to_search = rng.random((1, default_dim))
|
|
insert_ids = [i for i in range(default_nb)]
|
|
self.search(client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"limit": default_limit,
|
|
"pk_name": default_primary_key_field_name})
|
|
# partition_number = self.get_partition_stats(client, collection_name, "_default")[0]
|
|
# assert partition_number == default_nb
|
|
# partition_number = self.get_partition_stats(client, collection_name, partition_name)[0]
|
|
# assert partition_number[0]['value'] == 0
|
|
if self.has_partition(client, collection_name, partition_name)[0]:
|
|
self.release_partitions(client, collection_name, partition_name)
|
|
self.drop_partition(client, collection_name, partition_name)
|
|
if self.has_collection(client, collection_name)[0]:
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
@pytest.mark.parametrize("default_value", ["a" * 64, "aa"])
|
|
def test_milvus_client_insert_with_added_field(self, default_value):
|
|
"""
|
|
target: test search (high level api) normal case
|
|
method: create connection, collection, insert, add field, insert and search
|
|
expected: search/query successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
dim = 8
|
|
# 1. create collection
|
|
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, max_length=64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64, is_partition_key=True)
|
|
schema.add_field(default_float_field_name, DataType.FLOAT, nullable=True)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
|
|
# 2. insert before add field
|
|
vectors = cf.gen_vectors(default_nb * 2, dim, vector_data_type=DataType.FLOAT_VECTOR)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == default_nb
|
|
# 3. add new field
|
|
self.add_collection_field(client, collection_name, field_name="field_new", data_type=DataType.VARCHAR,
|
|
nullable=True, default_value=default_value, max_length=64)
|
|
vectors_to_search = [vectors[0]]
|
|
insert_ids = [i for i in range(default_nb)]
|
|
# 4. check old dynamic data search is not impacted after add new field
|
|
self.search(client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"pk_name": default_primary_key_field_name,
|
|
"limit": default_limit})
|
|
# 5. insert data(old + new field)
|
|
rows_t = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i),
|
|
"field_new": "field_new"} for i in range(default_nb, default_nb * 2)]
|
|
results = self.insert(client, collection_name, rows_t)[0]
|
|
assert results['insert_count'] == default_nb
|
|
insert_ids_after_add_field = [i for i in range(default_nb, default_nb * 2)]
|
|
# 6. search filtered with the new field
|
|
self.search(client, collection_name, vectors_to_search,
|
|
filter=f'field_new=="{default_value}"',
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"pk_name": default_primary_key_field_name,
|
|
"limit": default_limit})
|
|
self.search(client, collection_name, vectors_to_search,
|
|
filter=f"field_new=='field_new'",
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids_after_add_field,
|
|
"pk_name": default_primary_key_field_name,
|
|
"limit": default_limit})
|
|
self.release_collection(client, collection_name)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
|
|
class TestInsertOperation(TestMilvusClientV2Base):
|
|
"""
|
|
******************************************************************
|
|
The following cases are used to test insert interface operations
|
|
******************************************************************
|
|
"""
|
|
|
|
@pytest.fixture(scope="function", params=[8, 4096])
|
|
def dim(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=[False, True])
|
|
def auto_id(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=[ct.default_int64_field_name, ct.default_string_field_name])
|
|
def pk_field(self, request):
|
|
yield request.param
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_insert_without_connection(self):
|
|
"""
|
|
target: test insert without connection
|
|
method: insert after remove connection
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
self.create_collection(client, collection_name, default_dim)
|
|
self.close(client)
|
|
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(10)]
|
|
error = {ct.err_code: 999, ct.err_msg: 'should create connection first'}
|
|
self.insert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_default_partition(self):
|
|
"""
|
|
target: test insert entities into default partition
|
|
method: create partition and insert info collection
|
|
expected: the collection insert count equals to nb
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
partition_name = cf.gen_unique_str("partition")
|
|
|
|
self.create_collection(client, collection_name, default_dim)
|
|
self.create_partition(client, collection_name, partition_name)
|
|
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)]
|
|
results = self.insert(client, collection_name, rows, partition_name=partition_name)[0]
|
|
assert results['insert_count'] == ct.default_nb
|
|
self.drop_collection(client, collection_name)
|
|
|
|
def test_insert_partition_not_existed(self):
|
|
"""
|
|
target: test insert entities in collection created before
|
|
method: create collection and insert entities in it, with the not existed partition_name param
|
|
expected: error raised
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
self.create_collection(client, collection_name, default_dim)
|
|
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(10)]
|
|
error = {ct.err_code: 200, ct.err_msg: "partition not found[partition=p]"}
|
|
self.insert(client, collection_name, rows, partition_name="p",
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_partition_repeatedly(self):
|
|
"""
|
|
target: test insert entities in collection created before
|
|
method: create collection and insert entities in it repeatedly, with the partition_name param
|
|
expected: the collection row count equals to nq
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
partition_name_1 = cf.gen_unique_str("partition1")
|
|
partition_name_2 = cf.gen_unique_str("partition2")
|
|
|
|
self.create_collection(client, collection_name, default_dim)
|
|
self.create_partition(client, collection_name, partition_name_1)
|
|
self.create_partition(client, collection_name, partition_name_2)
|
|
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)]
|
|
result_1 = self.insert(client, collection_name, rows, partition_name=partition_name_1)[0]
|
|
result_2 = self.insert(client, collection_name, rows, partition_name=partition_name_2)[0]
|
|
assert result_1['insert_count'] == ct.default_nb
|
|
assert result_2['insert_count'] == ct.default_nb
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
def test_insert_partition_with_ids(self):
|
|
"""
|
|
target: test insert entities in collection created before, insert with ids
|
|
method: create collection and insert entities in it, with the partition_name param
|
|
expected: the collection insert count equals to nq
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
partition_name = cf.gen_unique_str("partition")
|
|
|
|
self.create_collection(client, collection_name, default_dim)
|
|
self.create_partition(client, collection_name, partition_name)
|
|
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)]
|
|
results = self.insert(client, collection_name, rows, partition_name=partition_name)[0]
|
|
assert results['insert_count'] == ct.default_nb
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_exceed_varchar_limit(self):
|
|
"""
|
|
target: test insert exceed varchar limit
|
|
method: create a collection with varchar limit=2 and insert invalid data
|
|
expected: error raised
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
|
|
# Create schema with varchar limit
|
|
schema = self.create_schema(client, auto_id=True, enable_dynamic_field=False)[0]
|
|
schema.add_field("id", DataType.INT64, is_primary=True, auto_id=True)
|
|
schema.add_field("vector", DataType.FLOAT_VECTOR, dim=ct.default_dim)
|
|
schema.add_field("small_limit", DataType.VARCHAR, max_length=2)
|
|
schema.add_field("big_limit", DataType.VARCHAR, max_length=65530)
|
|
|
|
self.create_collection(client, collection_name, dimension=ct.default_dim, schema=schema)
|
|
|
|
# Insert data exceeding varchar limit
|
|
rows = [
|
|
{"vector": list(cf.gen_vectors(1, ct.default_dim)[0]), "small_limit": "limit_1___________", "big_limit": "1"},
|
|
{"vector": list(cf.gen_vectors(1, ct.default_dim)[0]), "small_limit": "limit_2___________", "big_limit": "2"}
|
|
]
|
|
error = {ct.err_code: 999, ct.err_msg: "length of varchar field small_limit exceeds max length"}
|
|
self.insert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_insert_with_no_vector_field_dtype(self):
|
|
"""
|
|
target: test insert entities, with no vector field
|
|
method: vector field is missing in data
|
|
expected: error raised
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
self.create_collection(client, collection_name, default_dim)
|
|
|
|
# Generate data without vector field
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i,
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(10)]
|
|
error = {ct.err_code: 1, ct.err_msg: f"Insert missed an field `vector` to collection"}
|
|
self.insert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_insert_with_vector_field_dismatch_dtype(self):
|
|
"""
|
|
target: test insert entities, with no vector field
|
|
method: vector field is missing in data
|
|
expected: error raised
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
self.create_collection(client, collection_name, default_dim)
|
|
|
|
# Generate data with wrong vector type (scalar instead of list)
|
|
rows = [{default_primary_key_field_name: 0, default_vector_field_name: 0.0001,
|
|
default_float_field_name: 0.0, default_string_field_name: "0"}]
|
|
error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema"}
|
|
self.insert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_drop_collection(self):
|
|
"""
|
|
target: test insert and drop
|
|
method: insert data and drop collection
|
|
expected: verify collection if exist
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
self.create_collection(client, collection_name, default_dim)
|
|
|
|
collections = self.list_collections(client)[0]
|
|
assert collection_name in collections
|
|
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
|
|
self.drop_collection(client, collection_name)
|
|
collections = self.list_collections(client)[0]
|
|
assert collection_name not in collections
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_create_index(self):
|
|
"""
|
|
target: test insert and create index
|
|
method: 1. insert 2. create index
|
|
expected: verify num entities and index
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
self.create_collection(client, collection_name, default_dim)
|
|
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
self.flush(client, collection_name)
|
|
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == ct.default_nb
|
|
|
|
# Create index (note: quick setup collection already has index)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
|
|
self.create_index(client, collection_name, index_params)
|
|
|
|
indexes = self.list_indexes(client, collection_name)[0]
|
|
assert default_vector_field_name in indexes
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_after_create_index(self):
|
|
"""
|
|
target: test insert after create index
|
|
method: 1. create index 2. insert data
|
|
expected: verify index and num entities
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
self.create_collection(client, collection_name, default_dim)
|
|
|
|
# Create index first
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
|
|
self.create_index(client, collection_name, index_params)
|
|
|
|
indexes = self.list_indexes(client, collection_name)[0]
|
|
assert default_vector_field_name in indexes
|
|
|
|
# Then insert data
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
|
|
self.flush(client, collection_name)
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == ct.default_nb
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_binary_after_index(self):
|
|
"""
|
|
target: test insert binary after index
|
|
method: 1.create index 2.insert binary data
|
|
expected: 1.index ok 2.num entities correct
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
|
|
# Create binary vector collection
|
|
schema = self.create_schema(client, enable_dynamic_field=True)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(ct.default_binary_vec_field_name, DataType.BINARY_VECTOR, dim=default_dim)
|
|
schema.add_field(default_float_field_name, DataType.FLOAT)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length)
|
|
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(ct.default_binary_vec_field_name, index_type="BIN_IVF_FLAT", metric_type="HAMMING")
|
|
|
|
self.create_collection(client, collection_name, dimension=default_dim, schema=schema, index_params=index_params)
|
|
|
|
indexes = self.list_indexes(client, collection_name)[0]
|
|
assert ct.default_binary_vec_field_name in indexes
|
|
|
|
# Insert binary data
|
|
rows = cf.gen_row_data_by_schema(nb=ct.default_nb, schema=schema)
|
|
self.insert(client, collection_name, rows)
|
|
|
|
self.flush(client, collection_name)
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == ct.default_nb
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_auto_id_create_index(self):
|
|
"""
|
|
target: test create index in auto_id=True collection
|
|
method: 1.create auto_id=True collection and insert
|
|
2.create index
|
|
expected: index correct
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
|
|
# Create schema with auto_id=True
|
|
schema = self.create_schema(client, auto_id=True, enable_dynamic_field=True)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=True)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_float_field_name, DataType.FLOAT)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length)
|
|
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
|
|
|
|
self.create_collection(client, collection_name, dimension=default_dim, schema=schema,
|
|
index_params=index_params, auto_id=True)
|
|
|
|
# Insert without primary key (auto_id)
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)]
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == ct.default_nb
|
|
|
|
self.flush(client, collection_name)
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == ct.default_nb
|
|
|
|
indexes = self.list_indexes(client, collection_name)[0]
|
|
assert default_vector_field_name in indexes
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_insert_auto_id_true(self, pk_field):
|
|
"""
|
|
target: test insert ids fields values when auto_id=True
|
|
method: 1.create collection with auto_id=True 2.insert without ids
|
|
expected: verify primary_keys and num_entities
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
|
|
# Create schema with auto_id=True and specific primary field
|
|
schema = self.create_schema(client, auto_id=True, enable_dynamic_field=True)[0]
|
|
if pk_field == ct.default_int64_field_name:
|
|
schema.add_field(pk_field, DataType.INT64, is_primary=True, auto_id=True)
|
|
else:
|
|
schema.add_field(pk_field, DataType.VARCHAR, max_length=ct.default_length, is_primary=True, auto_id=True)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_float_field_name, DataType.FLOAT)
|
|
if pk_field != ct.default_string_field_name:
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length)
|
|
|
|
self.create_collection(client, collection_name, dimension=default_dim, schema=schema, auto_id=True)
|
|
|
|
# Insert without primary key (auto_id)
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0} for i in range(ct.default_nb)]
|
|
if pk_field != ct.default_string_field_name:
|
|
for i, row in enumerate(rows):
|
|
row[default_string_field_name] = str(i)
|
|
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == ct.default_nb
|
|
|
|
self.flush(client, collection_name)
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == ct.default_nb
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_twice_auto_id_true(self, pk_field):
|
|
"""
|
|
target: test insert ids fields twice when auto_id=True
|
|
method: 1.create collection with auto_id=True 2.insert twice
|
|
expected: verify primary_keys unique
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
nb = 10
|
|
|
|
# Create schema with auto_id=True and specific primary field
|
|
schema = self.create_schema(client, auto_id=True, enable_dynamic_field=True)[0]
|
|
if pk_field == ct.default_int64_field_name:
|
|
schema.add_field(pk_field, DataType.INT64, is_primary=True, auto_id=True)
|
|
else:
|
|
schema.add_field(pk_field, DataType.VARCHAR, max_length=ct.default_length, is_primary=True, auto_id=True)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_float_field_name, DataType.FLOAT)
|
|
if pk_field != ct.default_string_field_name:
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length)
|
|
|
|
self.create_collection(client, collection_name, dimension=default_dim, schema=schema, auto_id=True)
|
|
|
|
# Insert twice
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0} for i in range(nb)]
|
|
if pk_field != ct.default_string_field_name:
|
|
for i, row in enumerate(rows):
|
|
row[default_string_field_name] = str(i)
|
|
|
|
results_1 = self.insert(client, collection_name, rows)[0]
|
|
assert results_1['insert_count'] == nb
|
|
|
|
results_2 = self.insert(client, collection_name, rows)[0]
|
|
assert results_2['insert_count'] == nb
|
|
|
|
self.flush(client, collection_name)
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == nb * 2
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_insert_auto_id_true_list_data(self, pk_field):
|
|
"""
|
|
target: test insert ids fields values when auto_id=True
|
|
method: 1.create collection with auto_id=True 2.insert list data with ids field values
|
|
expected: assert num entities
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
|
|
# Create schema with auto_id=True and specific primary field
|
|
schema = self.create_schema(client, auto_id=True, enable_dynamic_field=True)[0]
|
|
if pk_field == ct.default_int64_field_name:
|
|
schema.add_field(pk_field, DataType.INT64, is_primary=True, auto_id=True)
|
|
else:
|
|
schema.add_field(pk_field, DataType.VARCHAR, max_length=ct.default_length, is_primary=True, auto_id=True)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_float_field_name, DataType.FLOAT)
|
|
if pk_field != ct.default_string_field_name:
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length)
|
|
|
|
self.create_collection(client, collection_name, dimension=default_dim, schema=schema, auto_id=True)
|
|
|
|
# Insert without primary key (auto_id)
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0} for i in range(ct.default_nb)]
|
|
if pk_field != ct.default_string_field_name:
|
|
for i, row in enumerate(rows):
|
|
row[default_string_field_name] = str(i)
|
|
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == ct.default_nb
|
|
|
|
self.flush(client, collection_name)
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == ct.default_nb
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_auto_id_true_with_dataframe_values(self, pk_field):
|
|
"""
|
|
target: test insert with dataframe data
|
|
method: create collection with auto_id=True
|
|
expected: milvus client does not support insert with dataframe
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
|
|
# Create schema with auto_id=True
|
|
schema = self.create_schema(client, auto_id=True, enable_dynamic_field=True)[0]
|
|
if pk_field == ct.default_int64_field_name:
|
|
schema.add_field(pk_field, DataType.INT64, is_primary=True, auto_id=True)
|
|
else:
|
|
schema.add_field(pk_field, DataType.VARCHAR, max_length=ct.default_length, is_primary=True, auto_id=True)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_float_field_name, DataType.FLOAT)
|
|
if pk_field != ct.default_string_field_name:
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length)
|
|
|
|
self.create_collection(client, collection_name, dimension=default_dim, schema=schema, auto_id=True)
|
|
|
|
# Try to insert with primary key included (should fail)
|
|
df = cf.gen_default_dataframe_data(nb=100, auto_id=True)
|
|
error = {ct.err_code: 999,
|
|
ct.err_msg: f"wrong type of argument 'data',expected 'Dict' or list of 'Dict', got 'DataFrame'"}
|
|
self.insert(client, collection_name, df, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
self.flush(client, collection_name)
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == 0
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_insert_auto_id_true_with_list_values(self, pk_field):
|
|
"""
|
|
target: test insert with auto_id=True
|
|
method: create collection with auto_id=True
|
|
expected: 1.verify num entities 2.verify ids
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
nb = 100
|
|
|
|
# Create schema with auto_id=True
|
|
schema = self.create_schema(client, auto_id=True, enable_dynamic_field=True)[0]
|
|
if pk_field == ct.default_int64_field_name:
|
|
schema.add_field(pk_field, DataType.INT64, is_primary=True, auto_id=True)
|
|
else:
|
|
schema.add_field(pk_field, DataType.VARCHAR, max_length=ct.default_length, is_primary=True, auto_id=True)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_float_field_name, DataType.FLOAT)
|
|
if pk_field != ct.default_string_field_name:
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length)
|
|
|
|
self.create_collection(client, collection_name, dimension=default_dim, schema=schema, auto_id=True)
|
|
|
|
# Insert without primary key (auto_id)
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0} for i in range(nb)]
|
|
if pk_field != ct.default_string_field_name:
|
|
for i, row in enumerate(rows):
|
|
row[default_string_field_name] = str(i)
|
|
|
|
self.insert(client, collection_name, rows)
|
|
self.flush(client, collection_name)
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == nb
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_auto_id_false_same_values(self):
|
|
"""
|
|
target: test insert same ids with auto_id false
|
|
method: 1.create collection with auto_id=False 2.insert same int64 field values
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
nb = 100
|
|
|
|
self.create_collection(client, collection_name, default_dim, auto_id=False)
|
|
|
|
# Insert with same primary key values
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: 1, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(nb)]
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == nb
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_auto_id_false_negative_values(self):
|
|
"""
|
|
target: test insert negative ids with auto_id false
|
|
method: auto_id=False, primary field values is negative
|
|
expected: verify num entities
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
nb = 100
|
|
|
|
self.create_collection(client, collection_name, default_dim, auto_id=False)
|
|
|
|
# Insert with negative primary key values
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: -i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(nb)]
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == nb
|
|
|
|
self.flush(client, collection_name)
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == nb
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
# @pytest.mark.xfail(reason="issue 15416")
|
|
def test_insert_multi_threading(self):
|
|
"""
|
|
target: test concurrent insert
|
|
method: multi threads insert
|
|
expected: verify num entities
|
|
"""
|
|
import threading
|
|
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
|
|
thread_num = 4
|
|
threads = []
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)]
|
|
|
|
def insert(thread_i):
|
|
log.debug(f'In thread-{thread_i}')
|
|
# Adjust primary keys to be unique per thread
|
|
thread_rows = [{default_primary_key_field_name: i + thread_i * ct.default_nb,
|
|
default_vector_field_name: row[default_vector_field_name],
|
|
default_float_field_name: row[default_float_field_name],
|
|
default_string_field_name: row[default_string_field_name]} for i, row in enumerate(rows)]
|
|
results = self.insert(client, collection_name, thread_rows)[0]
|
|
assert results['insert_count'] == ct.default_nb
|
|
|
|
for i in range(thread_num):
|
|
x = threading.Thread(target=insert, args=(i,))
|
|
threads.append(x)
|
|
x.start()
|
|
for t in threads:
|
|
t.join()
|
|
|
|
self.flush(client, collection_name)
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == ct.default_nb * thread_num
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_multi_times(self, dim):
|
|
"""
|
|
target: test insert multi times
|
|
method: insert data multi times
|
|
expected: verify num entities
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
step = 120
|
|
nb = 12000
|
|
|
|
self.create_collection(client, collection_name, dim, auto_id=False)
|
|
|
|
rng = np.random.default_rng(seed=19530)
|
|
start_id = 0
|
|
for _ in range(nb // step):
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)}
|
|
for i in range(start_id, start_id + step)]
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == step
|
|
start_id += step
|
|
|
|
self.flush(client, collection_name)
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == nb
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_all_datatype_collection(self):
|
|
"""
|
|
target: test insert into collection that contains all datatype fields
|
|
method: 1.create all datatype collection 2.insert data
|
|
expected: verify num entities
|
|
"""
|
|
# MilvusClient doesn't support construct_from_dataframe, skip this test
|
|
# or reimplement using schema with all data types
|
|
pytest.skip("MilvusClient doesn't support construct_from_dataframe")
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_insert_equal_to_resource_limit(self):
|
|
"""
|
|
target: test insert data equal to RPC limitation 64MB (67108864)
|
|
method: calculated critical value and insert equivalent data
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# nb = 127583 without json field
|
|
nb = 108993
|
|
|
|
self.create_collection(client, collection_name, default_dim, auto_id=False)
|
|
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(nb)]
|
|
self.insert(client, collection_name, rows)
|
|
|
|
self.flush(client, collection_name)
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == nb
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("nullable", [True, False])
|
|
@pytest.mark.parametrize("default_value_type", ["empty", "none"])
|
|
def test_insert_one_field_using_default_value(self, default_value_type, nullable, auto_id):
|
|
"""
|
|
target: test insert with one field using default value
|
|
method: 1. create a collection with one field using default value
|
|
2. insert using default value to replace the field value []/[None]
|
|
expected: insert successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
|
|
# Create schema with default value field
|
|
schema = self.create_schema(client, auto_id=auto_id, enable_dynamic_field=False)[0]
|
|
if not auto_id:
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
else:
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=True)
|
|
schema.add_field(default_float_field_name, DataType.FLOAT)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length,
|
|
default_value="abc", nullable=nullable)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
|
|
self.create_collection(client, collection_name, dimension=default_dim, schema=schema, auto_id=auto_id)
|
|
|
|
# Insert data with None or omitting the default value field
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = []
|
|
for i in range(ct.default_nb):
|
|
row = {default_float_field_name: float(i),
|
|
default_vector_field_name: list(rng.random((1, default_dim))[0])}
|
|
if not auto_id:
|
|
row[default_primary_key_field_name] = i
|
|
if default_value_type == "none":
|
|
row[default_string_field_name] = None
|
|
# If default_value_type == "empty", we don't include the field at all
|
|
rows.append(row)
|
|
|
|
self.insert(client, collection_name, rows)
|
|
self.flush(client, collection_name)
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == ct.default_nb
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_insert_multi_fields_none_with_default_value(self):
|
|
"""
|
|
target: test insert with multi fields include array using none value
|
|
method: 1. create a collection with multi fields using default value
|
|
2. insert using none value to replace the field value
|
|
expected: insert successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
schema = self.create_schema(client)[0]
|
|
dim = 16
|
|
nb = 100
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True)
|
|
schema.add_field(default_int32_field_name, DataType.INT32, default_value=np.int32(1), nullable=True)
|
|
schema.add_field(default_float_field_name, DataType.FLOAT, default_value=np.float32(1.0), nullable=True)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, default_value="abc", max_length=100, nullable=True)
|
|
schema.add_field('int32_array', datatype=DataType.ARRAY, element_type=DataType.INT32, max_capacity=20, nullable=True)
|
|
schema.add_field('float_array', datatype=DataType.ARRAY, element_type=DataType.FLOAT, max_capacity=20, nullable=True)
|
|
schema.add_field('string_array', datatype=DataType.ARRAY, element_type=DataType.VARCHAR, max_capacity=20, max_length=100, nullable=True)
|
|
schema.add_field('json', DataType.JSON, nullable=True)
|
|
schema.add_field(default_float_vec_field_name, DataType.FLOAT_VECTOR, dim=dim)
|
|
self.create_collection(client, collection_name, schema=schema)
|
|
|
|
rows = [{
|
|
default_primary_key_field_name: i,
|
|
default_int32_field_name: None,
|
|
default_float_field_name: None,
|
|
default_string_field_name: None,
|
|
'int32_array': None,
|
|
'float_array': None,
|
|
'string_array': None,
|
|
'json': None,
|
|
default_float_vec_field_name: cf.gen_vectors(1, dim=dim)[0]
|
|
} for i in range(nb)]
|
|
self.insert(client, collection_name, rows)
|
|
self.flush(client, collection_name)
|
|
num_entities = self.get_collection_stats(client, collection_name)[0]
|
|
assert num_entities.get("row_count", None) == nb
|
|
|
|
# build index and load
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_float_vec_field_name, metric_type="L2")
|
|
self.create_index(client, collection_name, index_params)
|
|
self.load_collection(client, collection_name)
|
|
|
|
# try to query None value entities, should be empty
|
|
res, _ = self.query(client, collection_name, filter=f"{default_string_field_name} is null")
|
|
assert len(res) == 0
|
|
|
|
# try to query default value entities, should be not empty
|
|
res, _ = self.query(client, collection_name, filter=f"{default_string_field_name}=='abc'")
|
|
assert len(res) == nb
|
|
|
|
# try to query None value entities on json field, should not be empty
|
|
res, _ = self.query(client, collection_name, filter=f"json is null")
|
|
assert len(res) == nb
|
|
|
|
res, _ = self.query(client, collection_name, filter=f"int32_array is null")
|
|
assert len(res) == nb
|
|
|
|
self.drop_collection(client, collection_name)
|