milvus/tests/python_client/milvus_client/test_milvus_client_index.py
Bingyi Sun 0c0630cc38
feat: support dropping index without releasing collection (#42941)
issue: #42942

This pr includes the following changes:
1. Added checks for index checker in querycoord to generate drop index
tasks
2. Added drop index interface to querynode
3. To avoid search failure after dropping the index, the querynode
allows the use of lazy mode (warmup=disable) to load raw data even when
indexes contain raw data.
4. In segcore, loading the index no longer deletes raw data; instead, it
evicts it.
5. In expr, the index is pinned to prevent concurrent errors.

---------

Signed-off-by: sunby <sunbingyi1992@gmail.com>
2025-09-02 16:17:52 +08:00

1694 lines
100 KiB
Python

import pytest
from base.client_v2_base import TestMilvusClientV2Base
from utils.util_log import test_log as log
from common import common_func as cf
from common import common_type as ct
from common.common_type import CaseLabel, CheckTasks
from utils.util_pymilvus import *
from pymilvus import DataType
prefix = "client_index"
epsilon = ct.epsilon
default_nb = ct.default_nb
default_nb_medium = ct.default_nb_medium
default_nq = ct.default_nq
default_dim = ct.default_dim
default_limit = ct.default_limit
default_search_exp = "id >= 0"
exp_res = "exp_res"
default_search_string_exp = "varchar >= \"0\""
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
default_invaild_string_exp = "varchar >= 0"
default_json_search_exp = "json_field[\"number\"] >= 0"
perfix_expr = 'varchar like "0%"'
default_search_field = ct.default_float_vec_field_name
default_search_params = ct.default_search_params
default_primary_key_field_name = "id"
default_vector_field_name = "vector"
default_multiple_vector_field_name = "vector_new"
default_float_field_name = ct.default_float_field_name
default_bool_field_name = ct.default_bool_field_name
default_string_field_name = ct.default_string_field_name
default_int32_array_field_name = ct.default_int32_array_field_name
default_string_array_field_name = ct.default_string_array_field_name
class TestMilvusClientIndexInvalid(TestMilvusClientV2Base):
""" Test case of search interface """
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=["COSINE", "L2"])
def metric_type(self, request):
yield request.param
"""
******************************************************************
# The following are invalid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("name", ["12-s", "12 s", "(mn)", "中文", "%$#"])
def test_milvus_client_index_invalid_collection_name(self, name):
"""
target: test index abnormal case
method: create index on invalid collection name
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name="vector")
# 3. create index
error = {ct.err_code: 100, ct.err_msg: f"collection not found[database=default][collection={name}"}
self.create_index(client, name, index_params,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("name", ["a".join("a" for i in range(256))])
def test_milvus_client_index_collection_name_over_max_length(self, name):
"""
target: test index abnormal case
method: create index on collection name over max length
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name="vector")
# 3. create index
error = {ct.err_code: 100, ct.err_msg: f"collection not found[database=default][collection={name}]"}
self.create_index(client, name, index_params,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_index_not_exist_collection_name(self):
"""
target: test index abnormal case
method: create index on not exist collection name
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
not_existed_collection_name = cf.gen_unique_str("not_existed_collection")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name="vector")
# 3. create index
error = {ct.err_code: 100,
ct.err_msg: f"collection not found[database=default][collection={not_existed_collection_name}]"}
self.create_index(client, not_existed_collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip(reason="pymilvus issue 1885")
@pytest.mark.parametrize("index", ["12-s", "12 s", "(mn)", "中文", "%$#", "a".join("a" for i in range(256))])
def test_milvus_client_index_invalid_index_type(self, index):
"""
target: test index abnormal case
method: create index on invalid index type
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name="vector", index_type=index)
# 3. create index
error = {ct.err_code: 100, ct.err_msg: f"can't find collection collection not "
f"found[database=default][collection=not_existed]"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip(reason="pymilvus issue 1885")
@pytest.mark.parametrize("metric", ["12-s", "12 s", "(mn)", "中文", "%$#", "a".join("a" for i in range(256))])
def test_milvus_client_index_invalid_metric_type(self, metric):
"""
target: test index abnormal case
method: create index on invalid metric type
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name="vector", metric_type=metric)
# 3. create index
error = {ct.err_code: 100, ct.err_msg: f"can't find collection collection not "
f"found[database=default][collection=not_existed]"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_index_drop_index_before_release(self):
"""
target: test index abnormal case
method: drop index before release
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
error = {ct.err_code: 1100, ct.err_msg: f"vector index cannot be dropped on loaded collection"}
self.drop_index(client, collection_name, "vector",
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_create_multiple_diff_index_without_release(self):
"""
target: test index abnormal case
method: create different index on one field without release
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name="vector", index_type="IVF_FLAT", metric_type="L2")
# 3. create another index
error = {ct.err_code: 65535, ct.err_msg: "CreateIndex failed: at most one distinct index is allowed per field"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("not_supported_index", ct.all_index_types[:-2])
def test_milvus_client_int8_vector_create_not_supported_cpu_index(self, not_supported_index):
"""
target: test create non-supported index on int8 vector
method: create non-supported index on int8 vector
expected: raise exception
"""
if not_supported_index in ct.int8_vector_index:
pytest.skip("This index is supported by int8 vector")
client = self._client()
collection_name = cf.gen_unique_str(prefix)
dim = 128
# 1. create collection
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field("id_string", DataType.VARCHAR, max_length=64, is_primary=True, auto_id=False)
schema.add_field("embeddings", DataType.INT8_VECTOR, dim=dim)
index_params = self.prepare_index_params(client)[0]
index_params.add_index("embeddings", metric_type="COSINE")
# 2. index_params.add_index("title")
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "embeddings")
# 3. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name="embeddings", index_type=not_supported_index, metric_type="L2")
# 4. create another index
error = {ct.err_code: 1100, ct.err_msg: f"data type Int8Vector can't build with this index {not_supported_index}: "
f"invalid parameter[expected=valid index params][actual=invalid index params]"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("not_supported_index", ct.all_index_types[-2:])
def test_milvus_client_int8_vector_create_not_supported_GPU_index(self, not_supported_index):
"""
target: test create non-supported index on int8 vector
method: create non-supported index on int8 vector
expected: raise exception
"""
if not_supported_index in ct.int8_vector_index:
pytest.skip("This index is supported by int8 vector")
client = self._client()
collection_name = cf.gen_unique_str(prefix)
dim = 128
# 1. create collection
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field("id_string", DataType.VARCHAR, max_length=64, is_primary=True, auto_id=False)
schema.add_field("embeddings", DataType.INT8_VECTOR, dim=dim)
index_params = self.prepare_index_params(client)[0]
index_params.add_index("embeddings", metric_type="COSINE")
# 2. index_params.add_index("title")
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "embeddings")
# 3. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name="embeddings", index_type=not_supported_index, metric_type="L2")
# 4. create another index
error = {ct.err_code: 1100, ct.err_msg: f"invalid parameter[expected=valid index][actual=invalid "
f"index type: {not_supported_index}"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
class TestMilvusClientIndexValid(TestMilvusClientV2Base):
""" Test case of index interface """
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=["COSINE", "L2", "IP"])
def metric_type(self, request):
yield request.param
@pytest.fixture(scope="function", params=["TRIE", "STL_SORT", "INVERTED", "AUTOINDEX"])
def scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["TRIE", "INVERTED", "AUTOINDEX", ""])
def varchar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["STL_SORT", "INVERTED", "AUTOINDEX", ""])
def numeric_index(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("index", ct.all_index_types[:8])
def test_milvus_client_index_with_params(self, index, metric_type):
"""
target: test index with user defined params
method: create connection, collection, index, insert and search
expected: index/search/query successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
res = self.list_indexes(client, collection_name)[0]
assert res == []
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
params = cf.get_index_params_params(index_type=index)
index_params.add_index(field_name="vector", index_type=index, params=params, metric_type=metric_type)
# 3. create index
self.create_index(client, collection_name, index_params)
# 4. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 5. load collection
self.load_collection(client, collection_name)
# 6. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit,
"pk_name": default_primary_key_field_name})
# 7. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"pk_name": default_primary_key_field_name})
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("index", ct.all_index_types[:8])
def test_milvus_client_index_after_insert(self, index, metric_type):
"""
target: test index after insert
method: create connection, collection, insert, index and search
expected: index/search/query successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 3. prepare index params
index_params = self.prepare_index_params(client)[0]
params = cf.get_index_params_params(index)
index_params.add_index(field_name="vector", index_type=index, metric_type=metric_type, params=params)
# 4. create index
self.create_index(client, collection_name, index_params)
# 5. load collection
self.load_collection(client, collection_name)
# 5. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit,
"pk_name": default_primary_key_field_name})
# 4. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"pk_name": default_primary_key_field_name})
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("add_field", [True, False])
def test_milvus_client_index_auto_index(self, numeric_index, varchar_index, metric_type, add_field):
"""
target: test index with autoindex on both scalar and vector field
method: create connection, collection, insert and search
expected: index/search/query successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
schema = self.create_schema(client)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True)
schema.add_field(ct.default_int32_field_name, DataType.INT32)
schema.add_field(ct.default_int16_field_name, DataType.INT16)
schema.add_field(ct.default_int8_field_name, DataType.INT8)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
schema.add_field(default_float_field_name, DataType.FLOAT)
schema.add_field(ct.default_double_field_name, DataType.DOUBLE)
schema.add_field(ct.default_bool_field_name, DataType.BOOL)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
self.create_collection(client, collection_name, schema=schema, consistency_level="Strong")
if add_field:
self.add_collection_field(client, collection_name, field_name="field_int", data_type=DataType.INT32,
nullable=True)
self.add_collection_field(client, collection_name, field_name="field_varchar", data_type=DataType.VARCHAR,
nullable=True, max_length=64)
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
res = self.list_indexes(client, collection_name)[0]
assert res == []
# 2. prepare index params
index = "AUTOINDEX"
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type=index, metric_type=metric_type)
index_params.add_index(field_name=ct.default_int32_field_name, index_type=numeric_index, metric_type=metric_type)
index_params.add_index(field_name=ct.default_int16_field_name, index_type=numeric_index, metric_type=metric_type)
index_params.add_index(field_name=ct.default_int8_field_name, index_type=numeric_index, metric_type=metric_type)
index_params.add_index(field_name=default_float_field_name, index_type=numeric_index, metric_type=metric_type)
index_params.add_index(field_name=ct.default_double_field_name, index_type=numeric_index, metric_type=metric_type)
index_params.add_index(field_name=ct.default_bool_field_name, index_type="", metric_type=metric_type)
index_params.add_index(field_name=default_string_field_name, index_type=varchar_index, metric_type=metric_type)
index_params.add_index(field_name=default_primary_key_field_name, index_type=numeric_index, metric_type=metric_type)
if add_field:
index_params.add_index(field_name="field_int", index_type=numeric_index, metric_type=metric_type)
index_params.add_index(field_name="field_varchar", index_type=varchar_index, metric_type=metric_type)
# 3. create index
self.create_index(client, collection_name, index_params)
# 4. drop index
self.drop_index(client, collection_name, default_vector_field_name)
self.drop_index(client, collection_name, ct.default_int32_field_name)
self.drop_index(client, collection_name, ct.default_int16_field_name)
self.drop_index(client, collection_name, ct.default_int8_field_name)
self.drop_index(client, collection_name, default_float_field_name)
self.drop_index(client, collection_name, ct.default_double_field_name)
self.drop_index(client, collection_name, ct.default_bool_field_name)
self.drop_index(client, collection_name, default_string_field_name)
self.drop_index(client, collection_name, default_primary_key_field_name)
if add_field:
self.drop_index(client, collection_name, "field_int")
self.drop_index(client, collection_name, "field_varchar")
# 5. create index
self.create_index(client, collection_name, index_params)
# 6. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
ct.default_int32_field_name: np.int32(i), ct.default_int16_field_name: np.int16(i),
ct.default_int8_field_name: np.int8(i), default_float_field_name: i * 1.0,
ct.default_double_field_name: np.double(i), ct.default_bool_field_name: np.bool_(i),
default_string_field_name: str(i),
**({"field_int": 10} if add_field else {}),
**({"field_varchar": "default"} if add_field else {})
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 7. load collection
self.load_collection(client, collection_name)
# 8. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit,
"pk_name": default_primary_key_field_name})
# 9. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"pk_name": default_primary_key_field_name})
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_scalar_hybrid_index_small_distinct_before_insert(self, metric_type):
"""
target: test index with autoindex on int/varchar with small distinct value (<=100)
method: create connection, collection, insert and search
expected: index/search/query successfully (autoindex is bitmap index indeed)
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
int64_field_name = "int"
schema = self.create_schema(client)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
schema.add_field(int64_field_name, DataType.INT64)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
self.create_collection(client, collection_name, schema=schema, consistency_level="Strong")
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
res = self.list_indexes(client, collection_name)[0]
assert res == []
# 2. prepare index params
index = "AUTOINDEX"
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type=index, metric_type=metric_type)
index_params.add_index(field_name=int64_field_name, index_type=index, metric_type=metric_type)
index_params.add_index(field_name=default_string_field_name, index_type=index, metric_type=metric_type)
# 3. create index
self.create_index(client, collection_name, index_params)
# 4. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
int64_field_name: np.random.randint(0, 99), default_string_field_name: str(np.random.randint(0, 99))}
for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 5. load collection
self.load_collection(client, collection_name)
# 6. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit,
"pk_name": default_primary_key_field_name})
# 7. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"pk_name": default_primary_key_field_name})
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_scalar_hybrid_index_small_to_large_distinct_after_insert(self, metric_type):
"""
target: test index with autoindex on int/varchar with small distinct value (<=100) first and
insert to large distinct (2000+) later
method: create connection, collection, insert and search
expected: index/search/query successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
int64_field_name = "int"
schema = self.create_schema(client)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True)
schema.add_field(ct.default_int32_field_name, DataType.INT32)
schema.add_field(ct.default_int16_field_name, DataType.INT16)
schema.add_field(ct.default_int8_field_name, DataType.INT8)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
schema.add_field(int64_field_name, DataType.INT64)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
self.create_collection(client, collection_name, schema=schema, consistency_level="Strong")
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
res = self.list_indexes(client, collection_name)[0]
assert res == []
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
int64_field_name: np.random.randint(0, 99), ct.default_int32_field_name: np.int32(i),
ct.default_int16_field_name: np.int16(i), ct.default_int8_field_name: np.int8(i),
default_string_field_name: str(np.random.randint(0, 99))} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 3. prepare index params
index = "AUTOINDEX"
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type=index, metric_type=metric_type)
index_params.add_index(field_name=int64_field_name, index_type=index, metric_type=metric_type)
index_params.add_index(field_name=ct.default_int32_field_name, index_type="", metric_type=metric_type)
index_params.add_index(field_name=ct.default_int16_field_name, metric_type=metric_type)
index_params.add_index(field_name=ct.default_int8_field_name, index_type=index, metric_type=metric_type)
index_params.add_index(field_name=default_string_field_name, index_type=index, metric_type=metric_type)
# 4. create index
self.create_index(client, collection_name, index_params)
# 5. load collection
self.load_collection(client, collection_name)
# 6. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit,
"pk_name": default_primary_key_field_name})
# 7. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"pk_name": default_primary_key_field_name})
# 8. insert more distinct value to the scalar field to make the autoindex change
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
int64_field_name: np.random.randint(0, 99), ct.default_int32_field_name: np.int32(i),
ct.default_int16_field_name: np.int16(i), ct.default_int8_field_name: np.int8(i),
default_string_field_name: str(np.random.randint(0, 99))} for i in range(default_nb, 2*default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 9. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(2*default_nb)]
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit,
"pk_name": default_primary_key_field_name})
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_index_multiple_vectors(self, numeric_index, metric_type):
"""
target: test index for multiple vectors
method: create connection, collection, index, insert and search
expected: index/search/query successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
res = self.list_indexes(client, collection_name)[0]
assert res == []
# 2. prepare index params
index = "AUTOINDEX"
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name="vector", index_type=index, metric_type=metric_type)
index_params.add_index(field_name="id", index_type=numeric_index, metric_type=metric_type)
# 3. create index
self.create_index(client, collection_name, index_params)
# 4. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i),
default_multiple_vector_field_name: list(rng.random((1, default_dim))[0])} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 5. load collection
self.load_collection(client, collection_name)
# 6. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit,
"pk_name": default_primary_key_field_name})
# 7. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"pk_name": default_primary_key_field_name})
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_index_drop_create_same_index(self):
"""
target: test index after drop and create same index twice
method: create connection, collection, create/drop/create index, insert and search
expected: index create/drop and search/query successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
res = self.list_indexes(client, collection_name)[0]
assert res == []
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name="vector", index_type="HNSW", metric_type="L2")
# 3. create index
self.create_index(client, collection_name, index_params)
# 4. drop index
self.drop_index(client, collection_name, "vector")
# 4. create same index twice
self.create_index(client, collection_name, index_params)
# 5. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 6. load collection
self.load_collection(client, collection_name)
# 7. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit,
"pk_name": default_primary_key_field_name})
# 8. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"pk_name": default_primary_key_field_name})
self.drop_collection(client, collection_name)
class TestMilvusClientJsonPathIndexInvalid(TestMilvusClientV2Base):
""" Test case of search interface """
@pytest.fixture(scope="function", params=["TRIE", "STL_SORT", "BITMAP"])
def not_supported_varchar_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["INVERTED"])
def supported_varchar_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=[DataType.INT8.name, DataType.INT16.name, DataType.INT32.name,
DataType.INT64.name, DataType.FLOAT.name,
DataType.ARRAY.name, DataType.FLOAT_VECTOR.name,
DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name, DataType.BINARY_VECTOR.name,
DataType.SPARSE_FLOAT_VECTOR.name, DataType.INT8_VECTOR.name])
def not_supported_json_cast_type(self, request):
yield request.param
@pytest.fixture(scope="function", params=["Json", "BOOL", "double", "varchar"])
def supported_json_cast_type(self, request):
yield request.param
"""
******************************************************************
# The following are invalid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_json_path_index_no_index_params(self):
"""
target: test json path index with:
1. no all index params
2. no json_cast_type
3. no json_path
method: create index on invalid collection name
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
# 2. prepare index params with no index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name="my_json", index_type="INVERTED")
# 3. create index
error = {ct.err_code: 1100, ct.err_msg: "json index must specify cast type: missing parameter"
"[missing_param=json_cast_type]: invalid parameter"
"[expected=valid index params][actual=invalid index params]"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
# 4. prepare index params with no json_cast_type
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name="my_json", index_type="INVERTED", params={"json_path": "my_json['a']['b']"})
# 5. create index
error = {ct.err_code: 1100, ct.err_msg: "json index must specify cast type: missing parameter"
"[missing_param=json_cast_type]: invalid parameter"
"[expected=valid index params][actual=invalid index params]"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
# 6. prepare index params with no json_path
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name="my_json", index_type="INVERTED", params={"json_cast_type": "varchar"})
# 7. create index
self.create_index(client, collection_name, index_params)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("invalid_index_type", ["12-s", "12 s", "(mn)", "中文", "%$#"])
def test_milvus_client_json_path_index_invalid_index_type(self, invalid_index_type):
"""
target: test json path index with invalid index type
method: create json path index with invalid index type
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, "vector")
# 2. prepare index params with invalid index type
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name="my_json", index_type=invalid_index_type, params={"json_cast_type": "double",
"json_path": "my_json['a']['b']"})
# 3. create index
error = {ct.err_code: 1100, ct.err_msg: f"invalid parameter[expected=valid index]"
f"[actual=invalid index type: {invalid_index_type}]"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_json_path_index_not_support_index_type(self, enable_dynamic_field, not_supported_varchar_scalar_index):
"""
target: test json path index with not supported index type
method: create json path index with not supported index type
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, default_dim)
# 2. prepare index params with invalid json index type
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_type=not_supported_varchar_scalar_index,
params={"json_cast_type": "DOUBLE", "json_path": "my_json['a']['b']"})
# 3. create index
if not_supported_varchar_scalar_index == "TRIE":
supported_field_type = "varchar"
if not_supported_varchar_scalar_index == "STL_SORT":
supported_field_type = "numeric"
if not_supported_varchar_scalar_index == "BITMAP":
supported_field_type = "bool, int, string and array"
not_supported_varchar_scalar_index = "bitmap index"
error = {ct.err_code: 1100, ct.err_msg: f"{not_supported_varchar_scalar_index} are only supported on "
f"{supported_field_type} field: invalid parameter[expected=valid "
f"index params][actual=invalid index params]"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
@pytest.mark.parametrize("invalid_json_cast_type", ["12-s", "12 s", "(mn)", "中文", "%$#", 1, 1.0])
def test_milvus_client_json_path_index_invalid_json_cast_type(self, enable_dynamic_field, invalid_json_cast_type,
supported_varchar_scalar_index):
"""
target: test json path index with invalid json_cast_type
method: create json path index with invalid json_cast_type
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, default_dim)
# 2. prepare index params with invalid json index type
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_name="json_index", index_type=supported_varchar_scalar_index,
params={"json_cast_type": invalid_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
# 3. create index
error = {ct.err_code: 1100, ct.err_msg: f"index params][actual=invalid index params]"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_json_path_index_not_supported_json_cast_type(self, enable_dynamic_field, not_supported_json_cast_type,
supported_varchar_scalar_index):
"""
target: test json path index with not supported json_cast_type
method: create json path index with not supported json_cast_type
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, default_dim)
# 2. prepare index params with invalid json index type
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_name="json_index", index_type=supported_varchar_scalar_index,
params={"json_cast_type": not_supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
# 3. create index
error = {ct.err_code: 1100, ct.err_msg: f"index params][actual=invalid index params]"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
@pytest.mark.parametrize("invalid_json_path", [1, 1.0, '/'])
def test_milvus_client_json_path_index_invalid_json_path(self, enable_dynamic_field, invalid_json_path,
supported_varchar_scalar_index):
"""
target: test json path index with invalid json_cast_type
method: create json path index with invalid json_cast_type
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, default_dim)
# 2. prepare index params with invalid json index type
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_name="json_index",
index_type=supported_varchar_scalar_index,
params={"json_cast_type": "Double", "json_path": invalid_json_path})
# 3. create index
error = {ct.err_code: 65535, ct.err_msg: f"cannot parse identifier: {invalid_json_path}"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_json_path_index_not_exist_field_non_dynamic(self, supported_varchar_scalar_index):
"""
target: test json path index with not exist field in non dynamic field scenario
method: create json path index with not exist field with enable_dynamic_field disabled
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": "double", "json_path": f"{json_field_name}['a']"})
error = {ct.err_code: 65535, ct.err_msg: f"cannot create index on non-exist field: {json_field_name}"}
self.create_collection(client, collection_name, schema=schema, index_params=index_params,
check_task = CheckTasks.err_res, check_items = error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_different_index_same_json_path(self, enable_dynamic_field, supported_varchar_scalar_index):
"""
target: test create different index with different json_cast_type on the same json path of the same field
method: create different index with different json_cast_type on the same
json path of the same field (same index name)
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, default_dim)
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_name="json_index",
index_type=supported_varchar_scalar_index,
params={"json_cast_type": "double", "json_path": f"{json_field_name}['a']"})
self.create_index(client, collection_name, index_params)
# 4. prepare another index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=json_field_name, index_name="json_index",
index_type=supported_varchar_scalar_index,
params={"json_cast_type": "varchar", "json_path": f"{json_field_name}['a']"})
# 5. create index
error = {ct.err_code: 65535, ct.err_msg: "CreateIndex failed: at most one distinct index is allowed per field"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_different_index_name_same_json_path(self, enable_dynamic_field, supported_varchar_scalar_index):
"""
target: test json path index with different index name but with same json path
method: create json path index with different index name but with same json path
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, default_dim)
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_name="json_index_1",
index_type=supported_varchar_scalar_index,
params={"json_cast_type": "varchar", "json_path": f"{json_field_name}['a']"})
self.create_index(client, collection_name, index_params)
# 4. prepare another index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=json_field_name, index_name="json_index_2",
index_type=supported_varchar_scalar_index,
params={"json_cast_type": "varchar", "json_path": f"{json_field_name}['a']"})
# 5. create index
error = {ct.err_code: 65535, ct.err_msg: "CreateIndex failed: creating multiple "
"indexes on same field is not supported"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_different_json_path_index_same_field_same_index_name(self, enable_dynamic_field, supported_json_cast_type,
supported_varchar_scalar_index):
"""
target: test different json path index with same index name at the same time
method: test different json path index with same index name at the same index_params object
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert
vectors = cf.gen_vectors(default_nb, default_dim)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 3. prepare index params
index_name = "json_index"
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
index_params.add_index(field_name=json_field_name, index_name=index_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']"})
index_params.add_index(field_name=json_field_name, index_name=index_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}"})
# 4. create index
error = {ct.err_code: 65535, ct.err_msg: "CreateIndex failed: at most one distinct index is allowed per field"}
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
class TestMilvusClientJsonPathIndexValid(TestMilvusClientV2Base):
""" Test case of search interface """
@pytest.fixture(scope="function", params=["TRIE", "STL_SORT", "BITMAP"])
def not_supported_varchar_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["INVERTED"])
def supported_varchar_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["DOUBLE", "VARCHAR", "BOOL", "double", "varchar", "bool"])
def supported_json_cast_type(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_json_path_index_default(self, enable_dynamic_field, supported_json_cast_type,
supported_varchar_scalar_index):
"""
target: test json path index with default parameter
method: create json path index with default parameter
expected: create json path index successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert with different data distribution
vectors = cf.gen_vectors(default_nb+50, default_dim)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in
range(default_nb)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: i} for i in
range(default_nb, default_nb+10)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {}} for i in
range(default_nb+10, default_nb+20)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [1, 2, 3]}} for i in
range(default_nb + 20, default_nb + 30)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [{'b': 1}, 2, 3]}} for i in
range(default_nb + 20, default_nb + 30)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [{'b': None}, 2, 3]}} for i in
range(default_nb + 30, default_nb + 40)]
self.insert(client, collection_name, rows)
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]"})
# 3. create index
self.create_index(client, collection_name, index_params)
self.list_indexes(client, collection_name)
index_name = json_field_name
if enable_dynamic_field:
index_name = "$meta/" + json_field_name
self.describe_index(client, collection_name, index_name + "/a/b",
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + "/a/b"})
# 5. create same json index twice
self.create_index(client, collection_name, index_params)
self.describe_index(client, collection_name, index_name + "/a/b",
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + "/a/b"})
self.describe_index(client, collection_name, index_name + '/a',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '/a'})
self.describe_index(client, collection_name, index_name,
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name})
self.describe_index(client, collection_name, index_name + '/a/0/b',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '/a/0/b'})
self.describe_index(client, collection_name, index_name + '/a/0',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '/a/0'})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_json_path_index_default_index_name(self, enable_dynamic_field, supported_json_cast_type,
supported_varchar_scalar_index):
"""
target: test json path index with not supported json_cast_type
method: create json path index with not supported json_cast_type
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert
vectors = cf.gen_vectors(default_nb, default_dim)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
# 3. create index
if enable_dynamic_field:
index_name = "$meta/" + json_field_name + '/a/b'
else:
index_name = json_field_name + '/a/b'
self.create_index(client, collection_name, index_params)
self.describe_index(client, collection_name, index_name,
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip(reason="issue #40636")
def test_milvus_client_json_path_index_on_non_json_field(self, supported_json_cast_type,
supported_varchar_scalar_index):
"""
target: test json path index with "json_cast_type" and "json_path" parameters on non json field
method: create json path index with "json_cast_type" and "json_path" parameters on int64 field
steps: 1. create schema with id, vector and varchar fields
2. prepare index parameters with default vector index
3. create collection with the above defined schema and index params
4. insert default_nb numbers of data
5. prepare index params with "json_cast_type" and "json_path" params on int64 field
6. create index with the new index params
7. check that the results of describe_index interface does not contain the "json_cast_type" and "json_path" parameters
expected: create the original inverted index successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert
vectors = cf.gen_vectors(default_nb, default_dim)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 3. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=default_string_field_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type, "json_path": f"{default_string_field_name}['a']['b']"})
# 4. create index
index_name = default_string_field_name
self.create_index(client, collection_name, index_params)
self.describe_index(client, collection_name, index_name,
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{default_string_field_name}['a']['b']",
"index_type": supported_varchar_scalar_index,
"field_name": default_string_field_name,
"index_name": index_name})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_different_json_path_index_same_field_different_index_name(self, enable_dynamic_field, supported_json_cast_type,
supported_varchar_scalar_index):
"""
target: test different json path index with different default index name at the same time
method: test different json path index with different default index name at the same index_params object
expected: create index successfully using the last index params with the same index name
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert
vectors = cf.gen_vectors(default_nb, default_dim)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 2. prepare index params
index_name = "json_index"
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_name=index_name + "1", index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + "2",
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + "3",
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}"})
# 3. create index
self.create_index(client, collection_name, index_params)
self.describe_index(client, collection_name, index_name + '1',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '1'})
self.describe_index(client, collection_name, index_name + '2',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '2'})
self.describe_index(client, collection_name, index_name + '3',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '3'})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_diff_index_same_field_diff_index_name_diff_index_params(self, enable_dynamic_field,
supported_json_cast_type,
supported_varchar_scalar_index):
"""
target: test different json path index with different default index name at the same time
method: test different json path index with different default index name at different index_params object
expected: create index successfully with all the indexes created
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
self.load_collection(client, collection_name)
# 2. insert
vectors = cf.gen_vectors(default_nb, default_dim)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
self.create_index(client, collection_name, index_params)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']"})
self.create_index(client, collection_name, index_params)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}"})
self.create_index(client, collection_name, index_params)
# 3. create index
index_name = f"{json_field_name}/a/b"
if enable_dynamic_field:
index_name = "$meta/" + index_name
self.describe_index(client, collection_name, index_name,
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": "my_json['a']['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name})
index_name = f"{json_field_name}/a"
if enable_dynamic_field:
index_name = "$meta/" + index_name
self.describe_index(client, collection_name, index_name,
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": "my_json['a']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name})
index_name = f"{json_field_name}"
if enable_dynamic_field:
index_name = "$meta/" + index_name
self.describe_index(client, collection_name, index_name,
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": "my_json",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name
})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_json_index_same_json_path_diff_field(self, enable_dynamic_field, supported_json_cast_type,
supported_varchar_scalar_index):
"""
target: test different json path index with different default index name at the same time
method: test different json path index with different default index name at the same index_params object
expected: create index successfully using the last index params with the same index name
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
schema.add_field(json_field_name + "1", DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert
vectors = cf.gen_vectors(default_nb, default_dim)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': {'b': i}},
json_field_name + "1": {'a': {'b': i}}} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']['b']"})
self.create_index(client, collection_name, index_params)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=json_field_name + "1",
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}1['a']['b']"})
self.create_index(client, collection_name, index_params)
# 3. create index
index_name = f"{json_field_name}/a/b"
if enable_dynamic_field:
index_name = "$meta/" + index_name
self.describe_index(client, collection_name, index_name,
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name})
index_name = f"{json_field_name}1/a/b"
if enable_dynamic_field:
index_name = "$meta/" + index_name
self.describe_index(client, collection_name, index_name,
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}1['a']['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name + "1",
"index_name": index_name})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_json_path_index_before_load(self, enable_dynamic_field, supported_json_cast_type,
supported_varchar_scalar_index):
"""
target: test json path index with not supported json_cast_type
method: create json path index with not supported json_cast_type
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. release collection
self.release_collection(client, collection_name)
# 3. insert with different data distribution
vectors = cf.gen_vectors(default_nb+50, default_dim)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in
range(default_nb)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: i} for i in
range(default_nb, default_nb+10)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {}} for i in
range(default_nb+10, default_nb+20)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [1, 2, 3]}} for i in
range(default_nb + 20, default_nb + 30)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [{'b': 1}, 2, 3]}} for i in
range(default_nb + 20, default_nb + 30)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [{'b': None}, 2, 3]}} for i in
range(default_nb + 30, default_nb + 40)]
self.insert(client, collection_name, rows)
# 4. prepare index params
index_name = "json_index"
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '1',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '2',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '3',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '4',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]"})
# 5. create index
self.create_index(client, collection_name, index_params)
self.describe_index(client, collection_name, index_name,
check_task=CheckTasks.check_describe_index_property,
check_items={
#"json_cast_type": supported_json_cast_type, # issue 40426
"json_path": f"{json_field_name}['a']['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name})
# 6. create json index on different json path
self.create_index(client, collection_name, index_params)
# 7. create same json index twice
self.create_index(client, collection_name, index_params)
self.describe_index(client, collection_name, index_name,
check_task=CheckTasks.check_describe_index_property,
check_items={
# "json_cast_type": supported_json_cast_type, # issue 40426
"json_path": f"{json_field_name}['a']['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name})
self.describe_index(client, collection_name, index_name + '1',
check_task=CheckTasks.check_describe_index_property,
check_items={
# "json_cast_type": supported_json_cast_type, # issue 40426
"json_path": f"{json_field_name}['a']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '1'})
self.describe_index(client, collection_name, index_name +'2',
check_task=CheckTasks.check_describe_index_property,
check_items={
# "json_cast_type": supported_json_cast_type, # issue 40426
"json_path": f"{json_field_name}",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '2'})
self.describe_index(client, collection_name, index_name + '3',
check_task=CheckTasks.check_describe_index_property,
check_items={
# "json_cast_type": supported_json_cast_type, # issue 40426
"json_path": f"{json_field_name}['a'][0]['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '3'})
self.describe_index(client, collection_name, index_name + '4',
check_task=CheckTasks.check_describe_index_property,
check_items={
# "json_cast_type": supported_json_cast_type, # issue 40426
"json_path": f"{json_field_name}['a'][0]",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '4'})