mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
issue: #42942 This pr includes the following changes: 1. Added checks for index checker in querycoord to generate drop index tasks 2. Added drop index interface to querynode 3. To avoid search failure after dropping the index, the querynode allows the use of lazy mode (warmup=disable) to load raw data even when indexes contain raw data. 4. In segcore, loading the index no longer deletes raw data; instead, it evicts it. 5. In expr, the index is pinned to prevent concurrent errors. --------- Signed-off-by: sunby <sunbingyi1992@gmail.com>
1694 lines
100 KiB
Python
1694 lines
100 KiB
Python
import pytest
|
|
|
|
from base.client_v2_base import TestMilvusClientV2Base
|
|
from utils.util_log import test_log as log
|
|
from common import common_func as cf
|
|
from common import common_type as ct
|
|
from common.common_type import CaseLabel, CheckTasks
|
|
from utils.util_pymilvus import *
|
|
from pymilvus import DataType
|
|
|
|
prefix = "client_index"
|
|
epsilon = ct.epsilon
|
|
default_nb = ct.default_nb
|
|
default_nb_medium = ct.default_nb_medium
|
|
default_nq = ct.default_nq
|
|
default_dim = ct.default_dim
|
|
default_limit = ct.default_limit
|
|
default_search_exp = "id >= 0"
|
|
exp_res = "exp_res"
|
|
default_search_string_exp = "varchar >= \"0\""
|
|
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
|
|
default_invaild_string_exp = "varchar >= 0"
|
|
default_json_search_exp = "json_field[\"number\"] >= 0"
|
|
perfix_expr = 'varchar like "0%"'
|
|
default_search_field = ct.default_float_vec_field_name
|
|
default_search_params = ct.default_search_params
|
|
default_primary_key_field_name = "id"
|
|
default_vector_field_name = "vector"
|
|
default_multiple_vector_field_name = "vector_new"
|
|
default_float_field_name = ct.default_float_field_name
|
|
default_bool_field_name = ct.default_bool_field_name
|
|
default_string_field_name = ct.default_string_field_name
|
|
default_int32_array_field_name = ct.default_int32_array_field_name
|
|
default_string_array_field_name = ct.default_string_array_field_name
|
|
|
|
|
|
class TestMilvusClientIndexInvalid(TestMilvusClientV2Base):
|
|
""" Test case of search interface """
|
|
|
|
@pytest.fixture(scope="function", params=[False, True])
|
|
def auto_id(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=["COSINE", "L2"])
|
|
def metric_type(self, request):
|
|
yield request.param
|
|
|
|
"""
|
|
******************************************************************
|
|
# The following are invalid base cases
|
|
******************************************************************
|
|
"""
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("name", ["12-s", "12 s", "(mn)", "中文", "%$#"])
|
|
def test_milvus_client_index_invalid_collection_name(self, name):
|
|
"""
|
|
target: test index abnormal case
|
|
method: create index on invalid collection name
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "vector")
|
|
# 2. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name="vector")
|
|
# 3. create index
|
|
error = {ct.err_code: 100, ct.err_msg: f"collection not found[database=default][collection={name}"}
|
|
self.create_index(client, name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("name", ["a".join("a" for i in range(256))])
|
|
def test_milvus_client_index_collection_name_over_max_length(self, name):
|
|
"""
|
|
target: test index abnormal case
|
|
method: create index on collection name over max length
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "vector")
|
|
# 2. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name="vector")
|
|
# 3. create index
|
|
error = {ct.err_code: 100, ct.err_msg: f"collection not found[database=default][collection={name}]"}
|
|
self.create_index(client, name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_index_not_exist_collection_name(self):
|
|
"""
|
|
target: test index abnormal case
|
|
method: create index on not exist collection name
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
not_existed_collection_name = cf.gen_unique_str("not_existed_collection")
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "vector")
|
|
# 2. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name="vector")
|
|
# 3. create index
|
|
error = {ct.err_code: 100,
|
|
ct.err_msg: f"collection not found[database=default][collection={not_existed_collection_name}]"}
|
|
self.create_index(client, not_existed_collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.skip(reason="pymilvus issue 1885")
|
|
@pytest.mark.parametrize("index", ["12-s", "12 s", "(mn)", "中文", "%$#", "a".join("a" for i in range(256))])
|
|
def test_milvus_client_index_invalid_index_type(self, index):
|
|
"""
|
|
target: test index abnormal case
|
|
method: create index on invalid index type
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "vector")
|
|
# 2. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name="vector", index_type=index)
|
|
# 3. create index
|
|
error = {ct.err_code: 100, ct.err_msg: f"can't find collection collection not "
|
|
f"found[database=default][collection=not_existed]"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.skip(reason="pymilvus issue 1885")
|
|
@pytest.mark.parametrize("metric", ["12-s", "12 s", "(mn)", "中文", "%$#", "a".join("a" for i in range(256))])
|
|
def test_milvus_client_index_invalid_metric_type(self, metric):
|
|
"""
|
|
target: test index abnormal case
|
|
method: create index on invalid metric type
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "vector")
|
|
# 2. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name="vector", metric_type=metric)
|
|
# 3. create index
|
|
error = {ct.err_code: 100, ct.err_msg: f"can't find collection collection not "
|
|
f"found[database=default][collection=not_existed]"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_index_drop_index_before_release(self):
|
|
"""
|
|
target: test index abnormal case
|
|
method: drop index before release
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
error = {ct.err_code: 1100, ct.err_msg: f"vector index cannot be dropped on loaded collection"}
|
|
self.drop_index(client, collection_name, "vector",
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_create_multiple_diff_index_without_release(self):
|
|
"""
|
|
target: test index abnormal case
|
|
method: create different index on one field without release
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
# 2. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name="vector", index_type="IVF_FLAT", metric_type="L2")
|
|
# 3. create another index
|
|
error = {ct.err_code: 65535, ct.err_msg: "CreateIndex failed: at most one distinct index is allowed per field"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("not_supported_index", ct.all_index_types[:-2])
|
|
def test_milvus_client_int8_vector_create_not_supported_cpu_index(self, not_supported_index):
|
|
"""
|
|
target: test create non-supported index on int8 vector
|
|
method: create non-supported index on int8 vector
|
|
expected: raise exception
|
|
"""
|
|
if not_supported_index in ct.int8_vector_index:
|
|
pytest.skip("This index is supported by int8 vector")
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
dim = 128
|
|
# 1. create collection
|
|
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
|
schema.add_field("id_string", DataType.VARCHAR, max_length=64, is_primary=True, auto_id=False)
|
|
schema.add_field("embeddings", DataType.INT8_VECTOR, dim=dim)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index("embeddings", metric_type="COSINE")
|
|
# 2. index_params.add_index("title")
|
|
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "embeddings")
|
|
# 3. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name="embeddings", index_type=not_supported_index, metric_type="L2")
|
|
# 4. create another index
|
|
error = {ct.err_code: 1100, ct.err_msg: f"data type Int8Vector can't build with this index {not_supported_index}: "
|
|
f"invalid parameter[expected=valid index params][actual=invalid index params]"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("not_supported_index", ct.all_index_types[-2:])
|
|
def test_milvus_client_int8_vector_create_not_supported_GPU_index(self, not_supported_index):
|
|
"""
|
|
target: test create non-supported index on int8 vector
|
|
method: create non-supported index on int8 vector
|
|
expected: raise exception
|
|
"""
|
|
if not_supported_index in ct.int8_vector_index:
|
|
pytest.skip("This index is supported by int8 vector")
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
dim = 128
|
|
# 1. create collection
|
|
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
|
schema.add_field("id_string", DataType.VARCHAR, max_length=64, is_primary=True, auto_id=False)
|
|
schema.add_field("embeddings", DataType.INT8_VECTOR, dim=dim)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index("embeddings", metric_type="COSINE")
|
|
# 2. index_params.add_index("title")
|
|
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "embeddings")
|
|
# 3. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name="embeddings", index_type=not_supported_index, metric_type="L2")
|
|
# 4. create another index
|
|
error = {ct.err_code: 1100, ct.err_msg: f"invalid parameter[expected=valid index][actual=invalid "
|
|
f"index type: {not_supported_index}"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
|
|
class TestMilvusClientIndexValid(TestMilvusClientV2Base):
|
|
""" Test case of index interface """
|
|
|
|
@pytest.fixture(scope="function", params=[False, True])
|
|
def auto_id(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=["COSINE", "L2", "IP"])
|
|
def metric_type(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=["TRIE", "STL_SORT", "INVERTED", "AUTOINDEX"])
|
|
def scalar_index(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=["TRIE", "INVERTED", "AUTOINDEX", ""])
|
|
def varchar_index(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=["STL_SORT", "INVERTED", "AUTOINDEX", ""])
|
|
def numeric_index(self, request):
|
|
yield request.param
|
|
|
|
"""
|
|
******************************************************************
|
|
# The following are valid base cases
|
|
******************************************************************
|
|
"""
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
@pytest.mark.parametrize("index", ct.all_index_types[:8])
|
|
def test_milvus_client_index_with_params(self, index, metric_type):
|
|
"""
|
|
target: test index with user defined params
|
|
method: create connection, collection, index, insert and search
|
|
expected: index/search/query successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "vector")
|
|
res = self.list_indexes(client, collection_name)[0]
|
|
assert res == []
|
|
# 2. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
params = cf.get_index_params_params(index_type=index)
|
|
index_params.add_index(field_name="vector", index_type=index, params=params, metric_type=metric_type)
|
|
# 3. create index
|
|
self.create_index(client, collection_name, index_params)
|
|
# 4. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
# 5. load collection
|
|
self.load_collection(client, collection_name)
|
|
# 6. search
|
|
vectors_to_search = rng.random((1, default_dim))
|
|
insert_ids = [i for i in range(default_nb)]
|
|
self.search(client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"limit": default_limit,
|
|
"pk_name": default_primary_key_field_name})
|
|
# 7. query
|
|
self.query(client, collection_name, filter=default_search_exp,
|
|
check_task=CheckTasks.check_query_results,
|
|
check_items={exp_res: rows,
|
|
"with_vec": True,
|
|
"pk_name": default_primary_key_field_name})
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
@pytest.mark.parametrize("index", ct.all_index_types[:8])
|
|
def test_milvus_client_index_after_insert(self, index, metric_type):
|
|
"""
|
|
target: test index after insert
|
|
method: create connection, collection, insert, index and search
|
|
expected: index/search/query successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "vector")
|
|
# 2. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
# 3. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
params = cf.get_index_params_params(index)
|
|
index_params.add_index(field_name="vector", index_type=index, metric_type=metric_type, params=params)
|
|
# 4. create index
|
|
self.create_index(client, collection_name, index_params)
|
|
# 5. load collection
|
|
self.load_collection(client, collection_name)
|
|
# 5. search
|
|
vectors_to_search = rng.random((1, default_dim))
|
|
insert_ids = [i for i in range(default_nb)]
|
|
self.search(client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"limit": default_limit,
|
|
"pk_name": default_primary_key_field_name})
|
|
# 4. query
|
|
self.query(client, collection_name, filter=default_search_exp,
|
|
check_task=CheckTasks.check_query_results,
|
|
check_items={exp_res: rows,
|
|
"with_vec": True,
|
|
"pk_name": default_primary_key_field_name})
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
@pytest.mark.parametrize("add_field", [True, False])
|
|
def test_milvus_client_index_auto_index(self, numeric_index, varchar_index, metric_type, add_field):
|
|
"""
|
|
target: test index with autoindex on both scalar and vector field
|
|
method: create connection, collection, insert and search
|
|
expected: index/search/query successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
schema = self.create_schema(client)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True)
|
|
schema.add_field(ct.default_int32_field_name, DataType.INT32)
|
|
schema.add_field(ct.default_int16_field_name, DataType.INT16)
|
|
schema.add_field(ct.default_int8_field_name, DataType.INT8)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
schema.add_field(default_float_field_name, DataType.FLOAT)
|
|
schema.add_field(ct.default_double_field_name, DataType.DOUBLE)
|
|
schema.add_field(ct.default_bool_field_name, DataType.BOOL)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
self.create_collection(client, collection_name, schema=schema, consistency_level="Strong")
|
|
if add_field:
|
|
self.add_collection_field(client, collection_name, field_name="field_int", data_type=DataType.INT32,
|
|
nullable=True)
|
|
self.add_collection_field(client, collection_name, field_name="field_varchar", data_type=DataType.VARCHAR,
|
|
nullable=True, max_length=64)
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "vector")
|
|
res = self.list_indexes(client, collection_name)[0]
|
|
assert res == []
|
|
# 2. prepare index params
|
|
index = "AUTOINDEX"
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type=index, metric_type=metric_type)
|
|
index_params.add_index(field_name=ct.default_int32_field_name, index_type=numeric_index, metric_type=metric_type)
|
|
index_params.add_index(field_name=ct.default_int16_field_name, index_type=numeric_index, metric_type=metric_type)
|
|
index_params.add_index(field_name=ct.default_int8_field_name, index_type=numeric_index, metric_type=metric_type)
|
|
index_params.add_index(field_name=default_float_field_name, index_type=numeric_index, metric_type=metric_type)
|
|
index_params.add_index(field_name=ct.default_double_field_name, index_type=numeric_index, metric_type=metric_type)
|
|
index_params.add_index(field_name=ct.default_bool_field_name, index_type="", metric_type=metric_type)
|
|
index_params.add_index(field_name=default_string_field_name, index_type=varchar_index, metric_type=metric_type)
|
|
index_params.add_index(field_name=default_primary_key_field_name, index_type=numeric_index, metric_type=metric_type)
|
|
if add_field:
|
|
index_params.add_index(field_name="field_int", index_type=numeric_index, metric_type=metric_type)
|
|
index_params.add_index(field_name="field_varchar", index_type=varchar_index, metric_type=metric_type)
|
|
# 3. create index
|
|
self.create_index(client, collection_name, index_params)
|
|
# 4. drop index
|
|
self.drop_index(client, collection_name, default_vector_field_name)
|
|
self.drop_index(client, collection_name, ct.default_int32_field_name)
|
|
self.drop_index(client, collection_name, ct.default_int16_field_name)
|
|
self.drop_index(client, collection_name, ct.default_int8_field_name)
|
|
self.drop_index(client, collection_name, default_float_field_name)
|
|
self.drop_index(client, collection_name, ct.default_double_field_name)
|
|
self.drop_index(client, collection_name, ct.default_bool_field_name)
|
|
self.drop_index(client, collection_name, default_string_field_name)
|
|
self.drop_index(client, collection_name, default_primary_key_field_name)
|
|
if add_field:
|
|
self.drop_index(client, collection_name, "field_int")
|
|
self.drop_index(client, collection_name, "field_varchar")
|
|
# 5. create index
|
|
self.create_index(client, collection_name, index_params)
|
|
# 6. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
ct.default_int32_field_name: np.int32(i), ct.default_int16_field_name: np.int16(i),
|
|
ct.default_int8_field_name: np.int8(i), default_float_field_name: i * 1.0,
|
|
ct.default_double_field_name: np.double(i), ct.default_bool_field_name: np.bool_(i),
|
|
default_string_field_name: str(i),
|
|
**({"field_int": 10} if add_field else {}),
|
|
**({"field_varchar": "default"} if add_field else {})
|
|
} for i in range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
# 7. load collection
|
|
self.load_collection(client, collection_name)
|
|
# 8. search
|
|
vectors_to_search = rng.random((1, default_dim))
|
|
insert_ids = [i for i in range(default_nb)]
|
|
self.search(client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"limit": default_limit,
|
|
"pk_name": default_primary_key_field_name})
|
|
# 9. query
|
|
self.query(client, collection_name, filter=default_search_exp,
|
|
check_task=CheckTasks.check_query_results,
|
|
check_items={exp_res: rows,
|
|
"with_vec": True,
|
|
"pk_name": default_primary_key_field_name})
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_scalar_hybrid_index_small_distinct_before_insert(self, metric_type):
|
|
"""
|
|
target: test index with autoindex on int/varchar with small distinct value (<=100)
|
|
method: create connection, collection, insert and search
|
|
expected: index/search/query successfully (autoindex is bitmap index indeed)
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
int64_field_name = "int"
|
|
schema = self.create_schema(client)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
schema.add_field(int64_field_name, DataType.INT64)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
self.create_collection(client, collection_name, schema=schema, consistency_level="Strong")
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "vector")
|
|
res = self.list_indexes(client, collection_name)[0]
|
|
assert res == []
|
|
# 2. prepare index params
|
|
index = "AUTOINDEX"
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type=index, metric_type=metric_type)
|
|
index_params.add_index(field_name=int64_field_name, index_type=index, metric_type=metric_type)
|
|
index_params.add_index(field_name=default_string_field_name, index_type=index, metric_type=metric_type)
|
|
# 3. create index
|
|
self.create_index(client, collection_name, index_params)
|
|
# 4. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
int64_field_name: np.random.randint(0, 99), default_string_field_name: str(np.random.randint(0, 99))}
|
|
for i in range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
# 5. load collection
|
|
self.load_collection(client, collection_name)
|
|
# 6. search
|
|
vectors_to_search = rng.random((1, default_dim))
|
|
insert_ids = [i for i in range(default_nb)]
|
|
self.search(client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"limit": default_limit,
|
|
"pk_name": default_primary_key_field_name})
|
|
# 7. query
|
|
self.query(client, collection_name, filter=default_search_exp,
|
|
check_task=CheckTasks.check_query_results,
|
|
check_items={exp_res: rows,
|
|
"with_vec": True,
|
|
"pk_name": default_primary_key_field_name})
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_scalar_hybrid_index_small_to_large_distinct_after_insert(self, metric_type):
|
|
"""
|
|
target: test index with autoindex on int/varchar with small distinct value (<=100) first and
|
|
insert to large distinct (2000+) later
|
|
method: create connection, collection, insert and search
|
|
expected: index/search/query successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
int64_field_name = "int"
|
|
schema = self.create_schema(client)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True)
|
|
schema.add_field(ct.default_int32_field_name, DataType.INT32)
|
|
schema.add_field(ct.default_int16_field_name, DataType.INT16)
|
|
schema.add_field(ct.default_int8_field_name, DataType.INT8)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
schema.add_field(int64_field_name, DataType.INT64)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
self.create_collection(client, collection_name, schema=schema, consistency_level="Strong")
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "vector")
|
|
res = self.list_indexes(client, collection_name)[0]
|
|
assert res == []
|
|
# 2. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
int64_field_name: np.random.randint(0, 99), ct.default_int32_field_name: np.int32(i),
|
|
ct.default_int16_field_name: np.int16(i), ct.default_int8_field_name: np.int8(i),
|
|
default_string_field_name: str(np.random.randint(0, 99))} for i in range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
# 3. prepare index params
|
|
index = "AUTOINDEX"
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type=index, metric_type=metric_type)
|
|
index_params.add_index(field_name=int64_field_name, index_type=index, metric_type=metric_type)
|
|
index_params.add_index(field_name=ct.default_int32_field_name, index_type="", metric_type=metric_type)
|
|
index_params.add_index(field_name=ct.default_int16_field_name, metric_type=metric_type)
|
|
index_params.add_index(field_name=ct.default_int8_field_name, index_type=index, metric_type=metric_type)
|
|
index_params.add_index(field_name=default_string_field_name, index_type=index, metric_type=metric_type)
|
|
# 4. create index
|
|
self.create_index(client, collection_name, index_params)
|
|
# 5. load collection
|
|
self.load_collection(client, collection_name)
|
|
# 6. search
|
|
vectors_to_search = rng.random((1, default_dim))
|
|
insert_ids = [i for i in range(default_nb)]
|
|
self.search(client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"limit": default_limit,
|
|
"pk_name": default_primary_key_field_name})
|
|
# 7. query
|
|
self.query(client, collection_name, filter=default_search_exp,
|
|
check_task=CheckTasks.check_query_results,
|
|
check_items={exp_res: rows,
|
|
"with_vec": True,
|
|
"pk_name": default_primary_key_field_name})
|
|
# 8. insert more distinct value to the scalar field to make the autoindex change
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
int64_field_name: np.random.randint(0, 99), ct.default_int32_field_name: np.int32(i),
|
|
ct.default_int16_field_name: np.int16(i), ct.default_int8_field_name: np.int8(i),
|
|
default_string_field_name: str(np.random.randint(0, 99))} for i in range(default_nb, 2*default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
self.flush(client, collection_name)
|
|
# 9. search
|
|
vectors_to_search = rng.random((1, default_dim))
|
|
insert_ids = [i for i in range(2*default_nb)]
|
|
self.search(client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"limit": default_limit,
|
|
"pk_name": default_primary_key_field_name})
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_index_multiple_vectors(self, numeric_index, metric_type):
|
|
"""
|
|
target: test index for multiple vectors
|
|
method: create connection, collection, index, insert and search
|
|
expected: index/search/query successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "vector")
|
|
res = self.list_indexes(client, collection_name)[0]
|
|
assert res == []
|
|
# 2. prepare index params
|
|
index = "AUTOINDEX"
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name="vector", index_type=index, metric_type=metric_type)
|
|
index_params.add_index(field_name="id", index_type=numeric_index, metric_type=metric_type)
|
|
# 3. create index
|
|
self.create_index(client, collection_name, index_params)
|
|
# 4. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i),
|
|
default_multiple_vector_field_name: list(rng.random((1, default_dim))[0])} for i in range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
# 5. load collection
|
|
self.load_collection(client, collection_name)
|
|
# 6. search
|
|
vectors_to_search = rng.random((1, default_dim))
|
|
insert_ids = [i for i in range(default_nb)]
|
|
self.search(client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"limit": default_limit,
|
|
"pk_name": default_primary_key_field_name})
|
|
# 7. query
|
|
self.query(client, collection_name, filter=default_search_exp,
|
|
check_task=CheckTasks.check_query_results,
|
|
check_items={exp_res: rows,
|
|
"with_vec": True,
|
|
"pk_name": default_primary_key_field_name})
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_index_drop_create_same_index(self):
|
|
"""
|
|
target: test index after drop and create same index twice
|
|
method: create connection, collection, create/drop/create index, insert and search
|
|
expected: index create/drop and search/query successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "vector")
|
|
res = self.list_indexes(client, collection_name)[0]
|
|
assert res == []
|
|
# 2. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name="vector", index_type="HNSW", metric_type="L2")
|
|
# 3. create index
|
|
self.create_index(client, collection_name, index_params)
|
|
# 4. drop index
|
|
self.drop_index(client, collection_name, "vector")
|
|
# 4. create same index twice
|
|
self.create_index(client, collection_name, index_params)
|
|
# 5. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
# 6. load collection
|
|
self.load_collection(client, collection_name)
|
|
# 7. search
|
|
vectors_to_search = rng.random((1, default_dim))
|
|
insert_ids = [i for i in range(default_nb)]
|
|
self.search(client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"limit": default_limit,
|
|
"pk_name": default_primary_key_field_name})
|
|
# 8. query
|
|
self.query(client, collection_name, filter=default_search_exp,
|
|
check_task=CheckTasks.check_query_results,
|
|
check_items={exp_res: rows,
|
|
"with_vec": True,
|
|
"pk_name": default_primary_key_field_name})
|
|
self.drop_collection(client, collection_name)
|
|
|
|
|
|
class TestMilvusClientJsonPathIndexInvalid(TestMilvusClientV2Base):
|
|
""" Test case of search interface """
|
|
|
|
@pytest.fixture(scope="function", params=["TRIE", "STL_SORT", "BITMAP"])
|
|
def not_supported_varchar_scalar_index(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=["INVERTED"])
|
|
def supported_varchar_scalar_index(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=[DataType.INT8.name, DataType.INT16.name, DataType.INT32.name,
|
|
DataType.INT64.name, DataType.FLOAT.name,
|
|
DataType.ARRAY.name, DataType.FLOAT_VECTOR.name,
|
|
DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name, DataType.BINARY_VECTOR.name,
|
|
DataType.SPARSE_FLOAT_VECTOR.name, DataType.INT8_VECTOR.name])
|
|
def not_supported_json_cast_type(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=["Json", "BOOL", "double", "varchar"])
|
|
def supported_json_cast_type(self, request):
|
|
yield request.param
|
|
|
|
"""
|
|
******************************************************************
|
|
# The following are invalid base cases
|
|
******************************************************************
|
|
"""
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_json_path_index_no_index_params(self):
|
|
"""
|
|
target: test json path index with:
|
|
1. no all index params
|
|
2. no json_cast_type
|
|
3. no json_path
|
|
method: create index on invalid collection name
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "vector")
|
|
# 2. prepare index params with no index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name="my_json", index_type="INVERTED")
|
|
# 3. create index
|
|
error = {ct.err_code: 1100, ct.err_msg: "json index must specify cast type: missing parameter"
|
|
"[missing_param=json_cast_type]: invalid parameter"
|
|
"[expected=valid index params][actual=invalid index params]"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
# 4. prepare index params with no json_cast_type
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name="my_json", index_type="INVERTED", params={"json_path": "my_json['a']['b']"})
|
|
# 5. create index
|
|
error = {ct.err_code: 1100, ct.err_msg: "json index must specify cast type: missing parameter"
|
|
"[missing_param=json_cast_type]: invalid parameter"
|
|
"[expected=valid index params][actual=invalid index params]"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
# 6. prepare index params with no json_path
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name="my_json", index_type="INVERTED", params={"json_cast_type": "varchar"})
|
|
# 7. create index
|
|
self.create_index(client, collection_name, index_params)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("invalid_index_type", ["12-s", "12 s", "(mn)", "中文", "%$#"])
|
|
def test_milvus_client_json_path_index_invalid_index_type(self, invalid_index_type):
|
|
"""
|
|
target: test json path index with invalid index type
|
|
method: create json path index with invalid index type
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
self.release_collection(client, collection_name)
|
|
self.drop_index(client, collection_name, "vector")
|
|
# 2. prepare index params with invalid index type
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name="my_json", index_type=invalid_index_type, params={"json_cast_type": "double",
|
|
"json_path": "my_json['a']['b']"})
|
|
# 3. create index
|
|
error = {ct.err_code: 1100, ct.err_msg: f"invalid parameter[expected=valid index]"
|
|
f"[actual=invalid index type: {invalid_index_type}]"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
def test_milvus_client_json_path_index_not_support_index_type(self, enable_dynamic_field, not_supported_varchar_scalar_index):
|
|
"""
|
|
target: test json path index with not supported index type
|
|
method: create json path index with not supported index type
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
json_field_name = "my_json"
|
|
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
if not enable_dynamic_field:
|
|
schema.add_field(json_field_name, DataType.JSON)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, default_dim)
|
|
# 2. prepare index params with invalid json index type
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name=json_field_name, index_type=not_supported_varchar_scalar_index,
|
|
params={"json_cast_type": "DOUBLE", "json_path": "my_json['a']['b']"})
|
|
# 3. create index
|
|
if not_supported_varchar_scalar_index == "TRIE":
|
|
supported_field_type = "varchar"
|
|
if not_supported_varchar_scalar_index == "STL_SORT":
|
|
supported_field_type = "numeric"
|
|
if not_supported_varchar_scalar_index == "BITMAP":
|
|
supported_field_type = "bool, int, string and array"
|
|
not_supported_varchar_scalar_index = "bitmap index"
|
|
error = {ct.err_code: 1100, ct.err_msg: f"{not_supported_varchar_scalar_index} are only supported on "
|
|
f"{supported_field_type} field: invalid parameter[expected=valid "
|
|
f"index params][actual=invalid index params]"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
@pytest.mark.parametrize("invalid_json_cast_type", ["12-s", "12 s", "(mn)", "中文", "%$#", 1, 1.0])
|
|
def test_milvus_client_json_path_index_invalid_json_cast_type(self, enable_dynamic_field, invalid_json_cast_type,
|
|
supported_varchar_scalar_index):
|
|
"""
|
|
target: test json path index with invalid json_cast_type
|
|
method: create json path index with invalid json_cast_type
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
json_field_name = "my_json"
|
|
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
if not enable_dynamic_field:
|
|
schema.add_field(json_field_name, DataType.JSON)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, default_dim)
|
|
# 2. prepare index params with invalid json index type
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name=json_field_name, index_name="json_index", index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": invalid_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
|
|
# 3. create index
|
|
error = {ct.err_code: 1100, ct.err_msg: f"index params][actual=invalid index params]"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
def test_milvus_client_json_path_index_not_supported_json_cast_type(self, enable_dynamic_field, not_supported_json_cast_type,
|
|
supported_varchar_scalar_index):
|
|
"""
|
|
target: test json path index with not supported json_cast_type
|
|
method: create json path index with not supported json_cast_type
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
json_field_name = "my_json"
|
|
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
if not enable_dynamic_field:
|
|
schema.add_field(json_field_name, DataType.JSON)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, default_dim)
|
|
# 2. prepare index params with invalid json index type
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name=json_field_name, index_name="json_index", index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": not_supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
|
|
# 3. create index
|
|
error = {ct.err_code: 1100, ct.err_msg: f"index params][actual=invalid index params]"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
@pytest.mark.parametrize("invalid_json_path", [1, 1.0, '/'])
|
|
def test_milvus_client_json_path_index_invalid_json_path(self, enable_dynamic_field, invalid_json_path,
|
|
supported_varchar_scalar_index):
|
|
"""
|
|
target: test json path index with invalid json_cast_type
|
|
method: create json path index with invalid json_cast_type
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
json_field_name = "my_json"
|
|
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
if not enable_dynamic_field:
|
|
schema.add_field(json_field_name, DataType.JSON)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, default_dim)
|
|
# 2. prepare index params with invalid json index type
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name=json_field_name, index_name="json_index",
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": "Double", "json_path": invalid_json_path})
|
|
# 3. create index
|
|
error = {ct.err_code: 65535, ct.err_msg: f"cannot parse identifier: {invalid_json_path}"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_json_path_index_not_exist_field_non_dynamic(self, supported_varchar_scalar_index):
|
|
"""
|
|
target: test json path index with not exist field in non dynamic field scenario
|
|
method: create json path index with not exist field with enable_dynamic_field disabled
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
json_field_name = "my_json"
|
|
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": "double", "json_path": f"{json_field_name}['a']"})
|
|
error = {ct.err_code: 65535, ct.err_msg: f"cannot create index on non-exist field: {json_field_name}"}
|
|
self.create_collection(client, collection_name, schema=schema, index_params=index_params,
|
|
check_task = CheckTasks.err_res, check_items = error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
def test_milvus_client_different_index_same_json_path(self, enable_dynamic_field, supported_varchar_scalar_index):
|
|
"""
|
|
target: test create different index with different json_cast_type on the same json path of the same field
|
|
method: create different index with different json_cast_type on the same
|
|
json path of the same field (same index name)
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
json_field_name = "my_json"
|
|
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
if not enable_dynamic_field:
|
|
schema.add_field(json_field_name, DataType.JSON)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, default_dim)
|
|
# 2. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name=json_field_name, index_name="json_index",
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": "double", "json_path": f"{json_field_name}['a']"})
|
|
self.create_index(client, collection_name, index_params)
|
|
# 4. prepare another index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=json_field_name, index_name="json_index",
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": "varchar", "json_path": f"{json_field_name}['a']"})
|
|
# 5. create index
|
|
error = {ct.err_code: 65535, ct.err_msg: "CreateIndex failed: at most one distinct index is allowed per field"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
def test_milvus_client_different_index_name_same_json_path(self, enable_dynamic_field, supported_varchar_scalar_index):
|
|
"""
|
|
target: test json path index with different index name but with same json path
|
|
method: create json path index with different index name but with same json path
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
json_field_name = "my_json"
|
|
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
if not enable_dynamic_field:
|
|
schema.add_field(json_field_name, DataType.JSON)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, default_dim)
|
|
# 2. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name=json_field_name, index_name="json_index_1",
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": "varchar", "json_path": f"{json_field_name}['a']"})
|
|
self.create_index(client, collection_name, index_params)
|
|
# 4. prepare another index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=json_field_name, index_name="json_index_2",
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": "varchar", "json_path": f"{json_field_name}['a']"})
|
|
# 5. create index
|
|
error = {ct.err_code: 65535, ct.err_msg: "CreateIndex failed: creating multiple "
|
|
"indexes on same field is not supported"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
def test_milvus_client_different_json_path_index_same_field_same_index_name(self, enable_dynamic_field, supported_json_cast_type,
|
|
supported_varchar_scalar_index):
|
|
"""
|
|
target: test different json path index with same index name at the same time
|
|
method: test different json path index with same index name at the same index_params object
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
json_field_name = "my_json"
|
|
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
if not enable_dynamic_field:
|
|
schema.add_field(json_field_name, DataType.JSON)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
|
|
# 2. insert
|
|
vectors = cf.gen_vectors(default_nb, default_dim)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
# 3. prepare index params
|
|
index_name = "json_index"
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
|
|
index_params.add_index(field_name=json_field_name, index_name=index_name,
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a']"})
|
|
index_params.add_index(field_name=json_field_name, index_name=index_name,
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}"})
|
|
# 4. create index
|
|
error = {ct.err_code: 65535, ct.err_msg: "CreateIndex failed: at most one distinct index is allowed per field"}
|
|
self.create_index(client, collection_name, index_params,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
|
|
class TestMilvusClientJsonPathIndexValid(TestMilvusClientV2Base):
|
|
""" Test case of search interface """
|
|
|
|
@pytest.fixture(scope="function", params=["TRIE", "STL_SORT", "BITMAP"])
|
|
def not_supported_varchar_scalar_index(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=["INVERTED"])
|
|
def supported_varchar_scalar_index(self, request):
|
|
yield request.param
|
|
|
|
@pytest.fixture(scope="function", params=["DOUBLE", "VARCHAR", "BOOL", "double", "varchar", "bool"])
|
|
def supported_json_cast_type(self, request):
|
|
yield request.param
|
|
|
|
"""
|
|
******************************************************************
|
|
# The following are valid base cases
|
|
******************************************************************
|
|
"""
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
def test_milvus_client_json_path_index_default(self, enable_dynamic_field, supported_json_cast_type,
|
|
supported_varchar_scalar_index):
|
|
"""
|
|
target: test json path index with default parameter
|
|
method: create json path index with default parameter
|
|
expected: create json path index successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
json_field_name = "my_json"
|
|
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
if not enable_dynamic_field:
|
|
schema.add_field(json_field_name, DataType.JSON)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
|
|
# 2. insert with different data distribution
|
|
vectors = cf.gen_vectors(default_nb+50, default_dim)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in
|
|
range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: i} for i in
|
|
range(default_nb, default_nb+10)]
|
|
self.insert(client, collection_name, rows)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {}} for i in
|
|
range(default_nb+10, default_nb+20)]
|
|
self.insert(client, collection_name, rows)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {'a': [1, 2, 3]}} for i in
|
|
range(default_nb + 20, default_nb + 30)]
|
|
self.insert(client, collection_name, rows)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {'a': [{'b': 1}, 2, 3]}} for i in
|
|
range(default_nb + 20, default_nb + 30)]
|
|
self.insert(client, collection_name, rows)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {'a': [{'b': None}, 2, 3]}} for i in
|
|
range(default_nb + 30, default_nb + 40)]
|
|
self.insert(client, collection_name, rows)
|
|
# 2. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
|
|
index_params.add_index(field_name=json_field_name,
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a']"})
|
|
index_params.add_index(field_name=json_field_name,
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}"})
|
|
index_params.add_index(field_name=json_field_name,
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a'][0]['b']"})
|
|
index_params.add_index(field_name=json_field_name,
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a'][0]"})
|
|
# 3. create index
|
|
self.create_index(client, collection_name, index_params)
|
|
self.list_indexes(client, collection_name)
|
|
index_name = json_field_name
|
|
if enable_dynamic_field:
|
|
index_name = "$meta/" + json_field_name
|
|
self.describe_index(client, collection_name, index_name + "/a/b",
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a']['b']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name + "/a/b"})
|
|
# 5. create same json index twice
|
|
self.create_index(client, collection_name, index_params)
|
|
self.describe_index(client, collection_name, index_name + "/a/b",
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a']['b']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name + "/a/b"})
|
|
self.describe_index(client, collection_name, index_name + '/a',
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name + '/a'})
|
|
self.describe_index(client, collection_name, index_name,
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name})
|
|
self.describe_index(client, collection_name, index_name + '/a/0/b',
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a'][0]['b']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name + '/a/0/b'})
|
|
self.describe_index(client, collection_name, index_name + '/a/0',
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a'][0]",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name + '/a/0'})
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
def test_milvus_client_json_path_index_default_index_name(self, enable_dynamic_field, supported_json_cast_type,
|
|
supported_varchar_scalar_index):
|
|
"""
|
|
target: test json path index with not supported json_cast_type
|
|
method: create json path index with not supported json_cast_type
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
json_field_name = "my_json"
|
|
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
if not enable_dynamic_field:
|
|
schema.add_field(json_field_name, DataType.JSON)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
|
|
# 2. insert
|
|
vectors = cf.gen_vectors(default_nb, default_dim)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
# 2. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
|
|
# 3. create index
|
|
if enable_dynamic_field:
|
|
index_name = "$meta/" + json_field_name + '/a/b'
|
|
else:
|
|
index_name = json_field_name + '/a/b'
|
|
self.create_index(client, collection_name, index_params)
|
|
self.describe_index(client, collection_name, index_name,
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a']['b']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name})
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
@pytest.mark.skip(reason="issue #40636")
|
|
def test_milvus_client_json_path_index_on_non_json_field(self, supported_json_cast_type,
|
|
supported_varchar_scalar_index):
|
|
"""
|
|
target: test json path index with "json_cast_type" and "json_path" parameters on non json field
|
|
method: create json path index with "json_cast_type" and "json_path" parameters on int64 field
|
|
steps: 1. create schema with id, vector and varchar fields
|
|
2. prepare index parameters with default vector index
|
|
3. create collection with the above defined schema and index params
|
|
4. insert default_nb numbers of data
|
|
5. prepare index params with "json_cast_type" and "json_path" params on int64 field
|
|
6. create index with the new index params
|
|
7. check that the results of describe_index interface does not contain the "json_cast_type" and "json_path" parameters
|
|
expected: create the original inverted index successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
|
|
# 2. insert
|
|
vectors = cf.gen_vectors(default_nb, default_dim)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i)} for i in range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
# 3. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name=default_string_field_name, index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type, "json_path": f"{default_string_field_name}['a']['b']"})
|
|
# 4. create index
|
|
index_name = default_string_field_name
|
|
self.create_index(client, collection_name, index_params)
|
|
self.describe_index(client, collection_name, index_name,
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{default_string_field_name}['a']['b']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": default_string_field_name,
|
|
"index_name": index_name})
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
def test_milvus_client_different_json_path_index_same_field_different_index_name(self, enable_dynamic_field, supported_json_cast_type,
|
|
supported_varchar_scalar_index):
|
|
"""
|
|
target: test different json path index with different default index name at the same time
|
|
method: test different json path index with different default index name at the same index_params object
|
|
expected: create index successfully using the last index params with the same index name
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
json_field_name = "my_json"
|
|
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
if not enable_dynamic_field:
|
|
schema.add_field(json_field_name, DataType.JSON)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
|
|
# 2. insert
|
|
vectors = cf.gen_vectors(default_nb, default_dim)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
# 2. prepare index params
|
|
index_name = "json_index"
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name=json_field_name, index_name=index_name + "1", index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
|
|
index_params.add_index(field_name=json_field_name, index_name=index_name + "2",
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a']"})
|
|
index_params.add_index(field_name=json_field_name, index_name=index_name + "3",
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}"})
|
|
# 3. create index
|
|
self.create_index(client, collection_name, index_params)
|
|
self.describe_index(client, collection_name, index_name + '1',
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a']['b']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name + '1'})
|
|
self.describe_index(client, collection_name, index_name + '2',
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name + '2'})
|
|
self.describe_index(client, collection_name, index_name + '3',
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name + '3'})
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
def test_milvus_client_diff_index_same_field_diff_index_name_diff_index_params(self, enable_dynamic_field,
|
|
supported_json_cast_type,
|
|
supported_varchar_scalar_index):
|
|
"""
|
|
target: test different json path index with different default index name at the same time
|
|
method: test different json path index with different default index name at different index_params object
|
|
expected: create index successfully with all the indexes created
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
json_field_name = "my_json"
|
|
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
if not enable_dynamic_field:
|
|
schema.add_field(json_field_name, DataType.JSON)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
|
|
self.load_collection(client, collection_name)
|
|
# 2. insert
|
|
vectors = cf.gen_vectors(default_nb, default_dim)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
# 2. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
|
|
self.create_index(client, collection_name, index_params)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=json_field_name,
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a']"})
|
|
self.create_index(client, collection_name, index_params)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=json_field_name,
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}"})
|
|
self.create_index(client, collection_name, index_params)
|
|
# 3. create index
|
|
index_name = f"{json_field_name}/a/b"
|
|
if enable_dynamic_field:
|
|
index_name = "$meta/" + index_name
|
|
self.describe_index(client, collection_name, index_name,
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": "my_json['a']['b']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name})
|
|
index_name = f"{json_field_name}/a"
|
|
if enable_dynamic_field:
|
|
index_name = "$meta/" + index_name
|
|
self.describe_index(client, collection_name, index_name,
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": "my_json['a']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name})
|
|
index_name = f"{json_field_name}"
|
|
if enable_dynamic_field:
|
|
index_name = "$meta/" + index_name
|
|
self.describe_index(client, collection_name, index_name,
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": "my_json",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name
|
|
})
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
def test_milvus_client_json_index_same_json_path_diff_field(self, enable_dynamic_field, supported_json_cast_type,
|
|
supported_varchar_scalar_index):
|
|
"""
|
|
target: test different json path index with different default index name at the same time
|
|
method: test different json path index with different default index name at the same index_params object
|
|
expected: create index successfully using the last index params with the same index name
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
json_field_name = "my_json"
|
|
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
if not enable_dynamic_field:
|
|
schema.add_field(json_field_name, DataType.JSON)
|
|
schema.add_field(json_field_name + "1", DataType.JSON)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
|
|
# 2. insert
|
|
vectors = cf.gen_vectors(default_nb, default_dim)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {'a': {'b': i}},
|
|
json_field_name + "1": {'a': {'b': i}}} for i in range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
# 2. prepare index params
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a']['b']"})
|
|
self.create_index(client, collection_name, index_params)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=json_field_name + "1",
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}1['a']['b']"})
|
|
self.create_index(client, collection_name, index_params)
|
|
# 3. create index
|
|
index_name = f"{json_field_name}/a/b"
|
|
if enable_dynamic_field:
|
|
index_name = "$meta/" + index_name
|
|
self.describe_index(client, collection_name, index_name,
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a']['b']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name})
|
|
index_name = f"{json_field_name}1/a/b"
|
|
if enable_dynamic_field:
|
|
index_name = "$meta/" + index_name
|
|
self.describe_index(client, collection_name, index_name,
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}1['a']['b']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name + "1",
|
|
"index_name": index_name})
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
def test_milvus_client_json_path_index_before_load(self, enable_dynamic_field, supported_json_cast_type,
|
|
supported_varchar_scalar_index):
|
|
"""
|
|
target: test json path index with not supported json_cast_type
|
|
method: create json path index with not supported json_cast_type
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
# 1. create collection
|
|
json_field_name = "my_json"
|
|
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
if not enable_dynamic_field:
|
|
schema.add_field(json_field_name, DataType.JSON)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
|
|
# 2. release collection
|
|
self.release_collection(client, collection_name)
|
|
# 3. insert with different data distribution
|
|
vectors = cf.gen_vectors(default_nb+50, default_dim)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in
|
|
range(default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: i} for i in
|
|
range(default_nb, default_nb+10)]
|
|
self.insert(client, collection_name, rows)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {}} for i in
|
|
range(default_nb+10, default_nb+20)]
|
|
self.insert(client, collection_name, rows)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {'a': [1, 2, 3]}} for i in
|
|
range(default_nb + 20, default_nb + 30)]
|
|
self.insert(client, collection_name, rows)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {'a': [{'b': 1}, 2, 3]}} for i in
|
|
range(default_nb + 20, default_nb + 30)]
|
|
self.insert(client, collection_name, rows)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i), json_field_name: {'a': [{'b': None}, 2, 3]}} for i in
|
|
range(default_nb + 30, default_nb + 40)]
|
|
self.insert(client, collection_name, rows)
|
|
# 4. prepare index params
|
|
index_name = "json_index"
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
|
index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
|
|
index_params.add_index(field_name=json_field_name, index_name=index_name + '1',
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a']"})
|
|
index_params.add_index(field_name=json_field_name, index_name=index_name + '2',
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}"})
|
|
index_params.add_index(field_name=json_field_name, index_name=index_name + '3',
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a'][0]['b']"})
|
|
index_params.add_index(field_name=json_field_name, index_name=index_name + '4',
|
|
index_type=supported_varchar_scalar_index,
|
|
params={"json_cast_type": supported_json_cast_type,
|
|
"json_path": f"{json_field_name}['a'][0]"})
|
|
# 5. create index
|
|
self.create_index(client, collection_name, index_params)
|
|
self.describe_index(client, collection_name, index_name,
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
#"json_cast_type": supported_json_cast_type, # issue 40426
|
|
"json_path": f"{json_field_name}['a']['b']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name})
|
|
# 6. create json index on different json path
|
|
self.create_index(client, collection_name, index_params)
|
|
# 7. create same json index twice
|
|
self.create_index(client, collection_name, index_params)
|
|
self.describe_index(client, collection_name, index_name,
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
# "json_cast_type": supported_json_cast_type, # issue 40426
|
|
"json_path": f"{json_field_name}['a']['b']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name})
|
|
self.describe_index(client, collection_name, index_name + '1',
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
# "json_cast_type": supported_json_cast_type, # issue 40426
|
|
"json_path": f"{json_field_name}['a']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name + '1'})
|
|
self.describe_index(client, collection_name, index_name +'2',
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
# "json_cast_type": supported_json_cast_type, # issue 40426
|
|
"json_path": f"{json_field_name}",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name + '2'})
|
|
self.describe_index(client, collection_name, index_name + '3',
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
# "json_cast_type": supported_json_cast_type, # issue 40426
|
|
"json_path": f"{json_field_name}['a'][0]['b']",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name + '3'})
|
|
self.describe_index(client, collection_name, index_name + '4',
|
|
check_task=CheckTasks.check_describe_index_property,
|
|
check_items={
|
|
# "json_cast_type": supported_json_cast_type, # issue 40426
|
|
"json_path": f"{json_field_name}['a'][0]",
|
|
"index_type": supported_varchar_scalar_index,
|
|
"field_name": json_field_name,
|
|
"index_name": index_name + '4'})
|
|
|