From 76959244ef466c9370a3ac8809c2b772d0401929 Mon Sep 17 00:00:00 2001 From: binbin <83755740+binbinlv@users.noreply.github.com> Date: Tue, 18 Feb 2025 14:14:50 +0800 Subject: [PATCH] test: Enable more test cases for auto index (#39939) Signed-off-by: binbin lv --- .../milvus_client/test_milvus_client_index.py | 321 +++++++++++++----- 1 file changed, 229 insertions(+), 92 deletions(-) diff --git a/tests/python_client/milvus_client/test_milvus_client_index.py b/tests/python_client/milvus_client/test_milvus_client_index.py index bc19b92e79..073c2cbbdf 100644 --- a/tests/python_client/milvus_client/test_milvus_client_index.py +++ b/tests/python_client/milvus_client/test_milvus_client_index.py @@ -54,9 +54,9 @@ class TestMilvusClientIndexInvalid(TestMilvusClientV2Base): @pytest.mark.parametrize("name", ["12-s", "12 s", "(mn)", "中文", "%$#"]) def test_milvus_client_index_invalid_collection_name(self, name): """ - target: test search (high level api) normal case - method: create connection, collection, insert and search - expected: search/query successfully + target: test index abnormal case + method: create index on invalid collection name + expected: raise exception """ client = self._client() collection_name = cf.gen_unique_str(prefix) @@ -78,9 +78,9 @@ class TestMilvusClientIndexInvalid(TestMilvusClientV2Base): @pytest.mark.parametrize("name", ["a".join("a" for i in range(256))]) def test_milvus_client_index_collection_name_over_max_length(self, name): """ - target: test search (high level api) normal case - method: create connection, collection, insert and search - expected: search/query successfully + target: test index abnormal case + method: create index on collection name over max length + expected: raise exception """ client = self._client() collection_name = cf.gen_unique_str(prefix) @@ -101,9 +101,9 @@ class TestMilvusClientIndexInvalid(TestMilvusClientV2Base): @pytest.mark.tags(CaseLabel.L1) def test_milvus_client_index_not_exist_collection_name(self): """ - target: test search (high level api) normal case - method: create connection, collection, insert and search - expected: search/query successfully + target: test index abnormal case + method: create index on not exist collection name + expected: raise exception """ client = self._client() collection_name = cf.gen_unique_str(prefix) @@ -127,9 +127,9 @@ class TestMilvusClientIndexInvalid(TestMilvusClientV2Base): @pytest.mark.parametrize("index", ["12-s", "12 s", "(mn)", "中文", "%$#", "a".join("a" for i in range(256))]) def test_milvus_client_index_invalid_index_type(self, index): """ - target: test search (high level api) normal case - method: create connection, collection, insert and search - expected: search/query successfully + target: test index abnormal case + method: create index on invalid index type + expected: raise exception """ client = self._client() collection_name = cf.gen_unique_str(prefix) @@ -152,9 +152,9 @@ class TestMilvusClientIndexInvalid(TestMilvusClientV2Base): @pytest.mark.parametrize("metric", ["12-s", "12 s", "(mn)", "中文", "%$#", "a".join("a" for i in range(256))]) def test_milvus_client_index_invalid_metric_type(self, metric): """ - target: test search (high level api) normal case - method: create connection, collection, insert and search - expected: search/query successfully + target: test index abnormal case + method: create index on invalid metric type + expected: raise exception """ client = self._client() collection_name = cf.gen_unique_str(prefix) @@ -175,9 +175,9 @@ class TestMilvusClientIndexInvalid(TestMilvusClientV2Base): @pytest.mark.tags(CaseLabel.L1) def test_milvus_client_index_drop_index_before_release(self): """ - target: test search (high level api) normal case - method: create connection, collection, insert and search - expected: search/query successfully + target: test index abnormal case + method: drop index before release + expected: raise exception """ client = self._client() collection_name = cf.gen_unique_str(prefix) @@ -190,37 +190,10 @@ class TestMilvusClientIndexInvalid(TestMilvusClientV2Base): self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason="pymilvus issue 1886") - def test_milvus_client_index_multiple_indexes_one_field(self): + def test_milvus_client_create_multiple_diff_index_without_release(self): """ - target: test search (high level api) normal case - method: create connection, collection, insert and search - expected: search/query successfully - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - # 1. create collection - self.create_collection(client, collection_name, default_dim, consistency_level="Strong") - # 2. prepare index params - index_params = self.prepare_index_params(client)[0] - index_params.add_index(field_name="vector", index_type="HNSW", metric_type="IP") - # 3. create index - self.create_index(client, collection_name, index_params) - # 4. prepare index params - index_params = self.prepare_index_params(client)[0] - index_params.add_index(field_name="vector", index_type="IVF_FLAT", metric_type="L2") - error = {ct.err_code: 1100, ct.err_msg: f""} - # 5. create another index - self.create_index(client, collection_name, index_params, - check_task=CheckTasks.err_res, check_items=error) - self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason="pymilvus issue 1886") - def test_milvus_client_create_diff_index_without_release(self): - """ - target: test search (high level api) normal case - method: create connection, collection, insert and search + target: test index abnormal case + method: create different index on one field without release expected: raise exception """ client = self._client() @@ -229,14 +202,16 @@ class TestMilvusClientIndexInvalid(TestMilvusClientV2Base): self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. prepare index params index_params = self.prepare_index_params(client)[0] - index_params.add_index(field_name="vector", index_type="HNSW", metric_type="L2") - # 3. create index - self.create_index(client, collection_name, index_params) + index_params.add_index(field_name="vector", index_type="IVF_FLAT", metric_type="L2") + # 3. create another index + error = {ct.err_code: 65535, ct.err_msg: "CreateIndex failed: at most one distinct index is allowed per field"} + self.create_index(client, collection_name, index_params, + check_task=CheckTasks.err_res, check_items=error) self.drop_collection(client, collection_name) class TestMilvusClientIndexValid(TestMilvusClientV2Base): - """ Test case of search interface """ + """ Test case of index interface """ @pytest.fixture(scope="function", params=[False, True]) def auto_id(self, request): @@ -246,10 +221,18 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): def metric_type(self, request): yield request.param - @pytest.fixture(scope="function", params=["TRIE", "STL_SORT", "AUTOINDEX"]) + @pytest.fixture(scope="function", params=["TRIE", "STL_SORT", "INVERTED", "AUTOINDEX"]) def scalar_index(self, request): yield request.param + @pytest.fixture(scope="function", params=["TRIE", "INVERTED", "AUTOINDEX", ""]) + def varchar_index(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["STL_SORT", "INVERTED", "AUTOINDEX", ""]) + def numeric_index(self, request): + yield request.param + """ ****************************************************************** # The following are valid base cases @@ -263,9 +246,9 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): ct.default_all_indexes_params[:7])) def test_milvus_client_index_default(self, index, params, metric_type): """ - target: test search (high level api) normal case - method: create connection, collection, insert and search - expected: search/query successfully + target: test index normal case + method: create connection, collection, create index, insert and search + expected: index/search/query successfully """ client = self._client() collection_name = cf.gen_unique_str(prefix) @@ -307,15 +290,14 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason="pymilvus issue 1884") @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:7], ct.default_all_indexes_params[:7])) def test_milvus_client_index_with_params(self, index, params, metric_type): """ - target: test search (high level api) normal case - method: create connection, collection, insert and search - expected: search/query successfully + target: test index with user defined params + method: create connection, collection, index, insert and search + expected: index/search/query successfully """ client = self._client() collection_name = cf.gen_unique_str(prefix) @@ -355,15 +337,14 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip("wait for modification") @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:7], ct.default_all_indexes_params[:7])) def test_milvus_client_index_after_insert(self, index, params, metric_type): """ - target: test search (high level api) normal case - method: create connection, collection, insert and search - expected: search/query successfully + target: test index after insert + method: create connection, collection, insert, index and search + expected: index/search/query successfully """ client = self._client() collection_name = cf.gen_unique_str(prefix) @@ -378,7 +359,7 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): self.insert(client, collection_name, rows) # 3. prepare index params index_params = self.prepare_index_params(client)[0] - index_params.add_index(field_name="vector", index_type=index, metric_type=metric_type) + index_params.add_index(field_name="vector", index_type=index, metric_type=metric_type, params=params) # 4. create index self.create_index(client, collection_name, index_params) # 5. load collection @@ -400,18 +381,27 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): "primary_field": default_primary_key_field_name}) self.drop_collection(client, collection_name) - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip("wait for modification") - def test_milvus_client_index_auto_index(self, scalar_index, metric_type): + @pytest.mark.tags(CaseLabel.L2) + def test_milvus_client_index_auto_index(self, numeric_index, varchar_index, metric_type): """ - target: test search (high level api) normal case + target: test index with autoindex on both scalar and vector field method: create connection, collection, insert and search - expected: search/query successfully + expected: index/search/query successfully """ client = self._client() collection_name = cf.gen_unique_str(prefix) # 1. create collection - self.create_collection(client, collection_name, default_dim, consistency_level="Strong") + schema = self.create_schema(client)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True) + schema.add_field(ct.default_int32_field_name, DataType.INT32) + schema.add_field(ct.default_int16_field_name, DataType.INT16) + schema.add_field(ct.default_int8_field_name, DataType.INT8) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + schema.add_field(default_float_field_name, DataType.FLOAT) + schema.add_field(ct.default_double_field_name, DataType.DOUBLE) + schema.add_field(ct.default_bool_field_name, DataType.BOOL) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + self.create_collection(client, collection_name, schema=schema, consistency_level="Strong") self.release_collection(client, collection_name) self.drop_index(client, collection_name, "vector") res = self.list_indexes(client, collection_name)[0] @@ -419,19 +409,36 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): # 2. prepare index params index = "AUTOINDEX" index_params = self.prepare_index_params(client)[0] - index_params.add_index(field_name="vector", index_type=index, metric_type=metric_type) - index_params.add_index(field_name="id", index_type=scalar_index, metric_type=metric_type) + index_params.add_index(field_name=default_vector_field_name, index_type=index, metric_type=metric_type) + index_params.add_index(field_name=ct.default_int32_field_name, index_type=numeric_index, metric_type=metric_type) + index_params.add_index(field_name=ct.default_int16_field_name, index_type=numeric_index, metric_type=metric_type) + index_params.add_index(field_name=ct.default_int8_field_name, index_type=numeric_index, metric_type=metric_type) + index_params.add_index(field_name=default_float_field_name, index_type=numeric_index, metric_type=metric_type) + index_params.add_index(field_name=ct.default_double_field_name, index_type=numeric_index, metric_type=metric_type) + index_params.add_index(field_name=ct.default_bool_field_name, index_type="", metric_type=metric_type) + index_params.add_index(field_name=default_string_field_name, index_type=varchar_index, metric_type=metric_type) + index_params.add_index(field_name=default_primary_key_field_name, index_type=numeric_index, metric_type=metric_type) # 3. create index self.create_index(client, collection_name, index_params) # 4. drop index - self.drop_index(client, collection_name, "vector") - self.drop_index(client, collection_name, "id") + self.drop_index(client, collection_name, default_vector_field_name) + self.drop_index(client, collection_name, ct.default_int32_field_name) + self.drop_index(client, collection_name, ct.default_int16_field_name) + self.drop_index(client, collection_name, ct.default_int8_field_name) + self.drop_index(client, collection_name, default_float_field_name) + self.drop_index(client, collection_name, ct.default_double_field_name) + self.drop_index(client, collection_name, ct.default_bool_field_name) + self.drop_index(client, collection_name, default_string_field_name) + self.drop_index(client, collection_name, default_primary_key_field_name) # 5. create index self.create_index(client, collection_name, index_params) # 6. insert rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] + ct.default_int32_field_name: np.int32(i), ct.default_int16_field_name: np.int16(i), + ct.default_int8_field_name: np.int8(i), default_float_field_name: i * 1.0, + ct.default_double_field_name: np.double(i), ct.default_bool_field_name: np.bool_(i), + default_string_field_name: str(i)} for i in range(default_nb)] self.insert(client, collection_name, rows) # 7. load collection self.load_collection(client, collection_name) @@ -452,13 +459,145 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): "primary_field": default_primary_key_field_name}) self.drop_collection(client, collection_name) - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip("wait for modification") - def test_milvus_client_index_multiple_vectors(self, scalar_index, metric_type): + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_scalar_hybrid_index_small_distinct_before_insert(self, metric_type): """ - target: test search (high level api) normal case + target: test index with autoindex on int/varchar with small distinct value (<=100) method: create connection, collection, insert and search - expected: search/query successfully + expected: index/search/query successfully (autoindex is bitmap index indeed) + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + int64_field_name = "int" + schema = self.create_schema(client)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + schema.add_field(int64_field_name, DataType.INT64) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + self.create_collection(client, collection_name, schema=schema, consistency_level="Strong") + self.release_collection(client, collection_name) + self.drop_index(client, collection_name, "vector") + res = self.list_indexes(client, collection_name)[0] + assert res == [] + # 2. prepare index params + index = "AUTOINDEX" + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type=index, metric_type=metric_type) + index_params.add_index(field_name=int64_field_name, index_type=index, metric_type=metric_type) + index_params.add_index(field_name=default_string_field_name, index_type=index, metric_type=metric_type) + # 3. create index + self.create_index(client, collection_name, index_params) + # 4. insert + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + int64_field_name: np.random.randint(0, 99), default_string_field_name: str(np.random.randint(0, 99))} + for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 5. load collection + self.load_collection(client, collection_name) + # 6. search + vectors_to_search = rng.random((1, default_dim)) + insert_ids = [i for i in range(default_nb)] + self.search(client, collection_name, vectors_to_search, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": default_limit}) + # 7. query + self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + check_items={exp_res: rows, + "with_vec": True, + "primary_field": default_primary_key_field_name}) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_scalar_hybrid_index_small_to_large_distinct_after_insert(self, metric_type): + """ + target: test index with autoindex on int/varchar with small distinct value (<=100) first and + insert to large distinct (2000+) later + method: create connection, collection, insert and search + expected: index/search/query successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + int64_field_name = "int" + schema = self.create_schema(client)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True) + schema.add_field(ct.default_int32_field_name, DataType.INT32) + schema.add_field(ct.default_int16_field_name, DataType.INT16) + schema.add_field(ct.default_int8_field_name, DataType.INT8) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + schema.add_field(int64_field_name, DataType.INT64) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + self.create_collection(client, collection_name, schema=schema, consistency_level="Strong") + self.release_collection(client, collection_name) + self.drop_index(client, collection_name, "vector") + res = self.list_indexes(client, collection_name)[0] + assert res == [] + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + int64_field_name: np.random.randint(0, 99), ct.default_int32_field_name: np.int32(i), + ct.default_int16_field_name: np.int16(i), ct.default_int8_field_name: np.int8(i), + default_string_field_name: str(np.random.randint(0, 99))} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. prepare index params + index = "AUTOINDEX" + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type=index, metric_type=metric_type) + index_params.add_index(field_name=int64_field_name, index_type=index, metric_type=metric_type) + index_params.add_index(field_name=ct.default_int32_field_name, index_type="", metric_type=metric_type) + index_params.add_index(field_name=ct.default_int16_field_name, metric_type=metric_type) + index_params.add_index(field_name=ct.default_int8_field_name, index_type=index, metric_type=metric_type) + index_params.add_index(field_name=default_string_field_name, index_type=index, metric_type=metric_type) + # 4. create index + self.create_index(client, collection_name, index_params) + # 5. load collection + self.load_collection(client, collection_name) + # 6. search + vectors_to_search = rng.random((1, default_dim)) + insert_ids = [i for i in range(default_nb)] + self.search(client, collection_name, vectors_to_search, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": default_limit}) + # 7. query + self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + check_items={exp_res: rows, + "with_vec": True, + "primary_field": default_primary_key_field_name}) + # 8. insert more distinct value to the scalar field to make the autoindex change + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + int64_field_name: np.random.randint(0, 99), ct.default_int32_field_name: np.int32(i), + ct.default_int16_field_name: np.int16(i), ct.default_int8_field_name: np.int8(i), + default_string_field_name: str(np.random.randint(0, 99))} for i in range(default_nb, 2*default_nb)] + self.insert(client, collection_name, rows) + self.flush(client, collection_name) + # 9. search + vectors_to_search = rng.random((1, default_dim)) + insert_ids = [i for i in range(2*default_nb)] + self.search(client, collection_name, vectors_to_search, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": default_limit}) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L2) + def test_milvus_client_index_multiple_vectors(self, numeric_index, metric_type): + """ + target: test index for multiple vectors + method: create connection, collection, index, insert and search + expected: index/search/query successfully """ client = self._client() collection_name = cf.gen_unique_str(prefix) @@ -472,7 +611,7 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): index = "AUTOINDEX" index_params = self.prepare_index_params(client)[0] index_params.add_index(field_name="vector", index_type=index, metric_type=metric_type) - index_params.add_index(field_name="id", index_type=scalar_index, metric_type=metric_type) + index_params.add_index(field_name="id", index_type=numeric_index, metric_type=metric_type) # 3. create index self.create_index(client, collection_name, index_params) # 4. insert @@ -501,15 +640,14 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip("wait for modification") @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:7], ct.default_all_indexes_params[:7])) def test_milvus_client_index_drop_create_same_index(self, index, params, metric_type): """ - target: test search (high level api) normal case - method: create connection, collection, insert and search - expected: search/query successfully + target: test index after drop and create same index twice + method: create connection, collection, create/drop/create index, insert and search + expected: index create/drop and search/query successfully """ client = self._client() collection_name = cf.gen_unique_str(prefix) @@ -521,7 +659,7 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): assert res == [] # 2. prepare index params index_params = self.prepare_index_params(client)[0] - index_params.add_index(field_name="vector", index_type=index, metric_type=metric_type) + index_params.add_index(field_name="vector", index_type=index, params=params, metric_type=metric_type) # 3. create index self.create_index(client, collection_name, index_params) # 4. drop index @@ -553,15 +691,14 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip("wait for modification") @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:7], ct.default_all_indexes_params[:7])) def test_milvus_client_index_drop_create_different_index(self, index, params, metric_type): """ - target: test search (high level api) normal case - method: create connection, collection, insert and search - expected: search/query successfully + target: test index after drop and create different index twice + method: create connection, collection, create/drop/create index, insert and search + expected: index create/drop and search/query successfully """ client = self._client() collection_name = cf.gen_unique_str(prefix) @@ -579,7 +716,7 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): # 4. drop index self.drop_index(client, collection_name, "vector") # 4. create different index - index_params.add_index(field_name="vector", index_type=index, metric_type=metric_type) + index_params.add_index(field_name="vector", index_type=index, params=params, metric_type=metric_type) self.create_index(client, collection_name, index_params) # 5. insert rng = np.random.default_rng(seed=19530)