diff --git a/tests/milvus_python_test/conftest.py b/tests/milvus_python_test/conftest.py index f17bc597f9..4cca06d760 100644 --- a/tests/milvus_python_test/conftest.py +++ b/tests/milvus_python_test/conftest.py @@ -203,5 +203,48 @@ def tanimoto_collection(request, connect): connect.drop_collection(collection_name) request.addfinalizer(teardown) - + return collection_name + +@pytest.fixture(scope="function") +def substructure_collection(request, connect): + ori_collection_name = getattr(request.module, "collection_id", "test") + collection_name = gen_unique_str(ori_collection_name) + dim = getattr(request.module, "dim", "128") + param = {'collection_name': collection_name, + 'dimension': dim, + 'index_file_size': index_file_size, + 'metric_type': MetricType.SUBSTRUCTURE} + status = connect.create_collection(param) + # logging.getLogger().info(status) + if not status.OK(): + pytest.exit("collection can not be created, exit pytest ...") + + def teardown(): + status, collection_names = connect.show_collections() + for collection_name in collection_names: + connect.drop_collection(collection_name) + + request.addfinalizer(teardown) + return collection_name + +@pytest.fixture(scope="function") +def superstructure_collection(request, connect): + ori_collection_name = getattr(request.module, "collection_id", "test") + collection_name = gen_unique_str(ori_collection_name) + dim = getattr(request.module, "dim", "128") + param = {'collection_name': collection_name, + 'dimension': dim, + 'index_file_size': index_file_size, + 'metric_type': MetricType.SUPERSTRUCTURE} + status = connect.create_collection(param) + # logging.getLogger().info(status) + if not status.OK(): + pytest.exit("collection can not be created, exit pytest ...") + + def teardown(): + status, collection_names = connect.show_collections() + for collection_name in collection_names: + connect.drop_collection(collection_name) + + request.addfinalizer(teardown) return collection_name diff --git a/tests/milvus_python_test/test_collection.py b/tests/milvus_python_test/test_collection.py index 3468e22d57..c09a3e3341 100644 --- a/tests/milvus_python_test/test_collection.py +++ b/tests/milvus_python_test/test_collection.py @@ -77,6 +77,34 @@ class TestCollection: status = connect.create_collection(param) assert status.OK() + def test_create_collection_substructure(self, connect): + ''' + target: test create normal collection + method: create collection with corrent params + expected: create status return ok + ''' + collection_name = gen_unique_str("test_collection") + param = {'collection_name': collection_name, + 'dimension': dim, + 'index_file_size': index_file_size, + 'metric_type': MetricType.SUBSTRUCTURE} + status = connect.create_collection(param) + assert status.OK() + + def test_create_collection_superstructure(self, connect): + ''' + target: test create normal collection + method: create collection with corrent params + expected: create status return ok + ''' + collection_name = gen_unique_str("test_collection") + param = {'collection_name': collection_name, + 'dimension': dim, + 'index_file_size': index_file_size, + 'metric_type': MetricType.SUPERSTRUCTURE} + status = connect.create_collection(param) + assert status.OK() + @pytest.mark.level(2) def test_create_collection_without_connection(self, dis_connect): ''' @@ -253,6 +281,38 @@ class TestCollection: assert res.collection_name == collection_name assert res.metric_type == MetricType.HAMMING + def test_collection_describe_collection_name_substructure(self, connect): + ''' + target: test describe collection created with correct params + method: create collection, assert the value returned by describe method + expected: collection_name equals with the collection name created + ''' + collection_name = gen_unique_str("test_collection") + param = {'collection_name': collection_name, + 'dimension': dim, + 'index_file_size': index_file_size, + 'metric_type': MetricType.SUBSTRUCTURE} + connect.create_collection(param) + status, res = connect.describe_collection(collection_name) + assert res.collection_name == collection_name + assert res.metric_type == MetricType.SUBSTRUCTURE + + def test_collection_describe_collection_name_superstructure(self, connect): + ''' + target: test describe collection created with correct params + method: create collection, assert the value returned by describe method + expected: collection_name equals with the collection name created + ''' + collection_name = gen_unique_str("test_collection") + param = {'collection_name': collection_name, + 'dimension': dim, + 'index_file_size': index_file_size, + 'metric_type': MetricType.SUPERSTRUCTURE} + connect.create_collection(param) + status, res = connect.describe_collection(collection_name) + assert res.collection_name == collection_name + assert res.metric_type == MetricType.SUPERSTRUCTURE + # TODO: enable @pytest.mark.level(2) def _test_collection_describe_collection_name_multiprocessing(self, connect, args): @@ -658,6 +718,38 @@ class TestCollection: assert status.OK() assert collection_name in result + def test_show_collections_substructure(self, connect): + ''' + target: test show collections is correct or not, if collection created + method: create collection, assert the value returned by show_collections method is equal to 0 + expected: collection_name in show collections + ''' + collection_name = gen_unique_str("test_collection") + param = {'collection_name': collection_name, + 'dimension': dim, + 'index_file_size': index_file_size, + 'metric_type': MetricType.SUBSTRUCTURE} + connect.create_collection(param) + status, result = connect.show_collections() + assert status.OK() + assert collection_name in result + + def test_show_collections_superstructure(self, connect): + ''' + target: test show collections is correct or not, if collection created + method: create collection, assert the value returned by show_collections method is equal to 0 + expected: collection_name in show collections + ''' + collection_name = gen_unique_str("test_collection") + param = {'collection_name': collection_name, + 'dimension': dim, + 'index_file_size': index_file_size, + 'metric_type': MetricType.SUPERSTRUCTURE} + connect.create_collection(param) + status, result = connect.show_collections() + assert status.OK() + assert collection_name in result + @pytest.mark.level(2) def test_show_collections_without_connection(self, dis_connect): ''' diff --git a/tests/milvus_python_test/test_collection_count.py b/tests/milvus_python_test/test_collection_count.py index d235d7bf23..e34623aa8d 100644 --- a/tests/milvus_python_test/test_collection_count.py +++ b/tests/milvus_python_test/test_collection_count.py @@ -485,7 +485,7 @@ class TestCollectionCountJAC: assert status.OK() assert res == nq -class TestCollectionCountHAM: +class TestCollectionCountBinary: """ params means different nb, the nb value may trigger merge, or not """ @@ -516,6 +516,28 @@ class TestCollectionCountHAM: else: pytest.skip("Skip index Temporary") + @pytest.fixture( + scope="function", + params=gen_simple_index() + ) + def get_substructure_index(self, request, connect): + logging.getLogger().info(request.param) + if request.param["index_type"] == IndexType.FLAT: + return request.param + else: + pytest.skip("Skip index Temporary") + + @pytest.fixture( + scope="function", + params=gen_simple_index() + ) + def get_superstructure_index(self, request, connect): + logging.getLogger().info(request.param) + if request.param["index_type"] == IndexType.FLAT: + return request.param + else: + pytest.skip("Skip index Temporary") + def test_collection_rows_count(self, connect, ham_collection, add_vectors_nb): ''' target: test collection rows_count is correct or not @@ -530,6 +552,34 @@ class TestCollectionCountHAM: status, res = connect.count_collection(ham_collection) assert res == nb + def test_collection_rows_count_substructure(self, connect, substructure_collection, add_vectors_nb): + ''' + target: test collection rows_count is correct or not + method: create collection and add vectors in it, + assert the value returned by count_collection method is equal to length of vectors + expected: the count is equal to the length of vectors + ''' + nb = add_vectors_nb + tmp, vectors = gen_binary_vectors(nb, dim) + res = connect.add_vectors(collection_name=substructure_collection, records=vectors) + connect.flush([substructure_collection]) + status, res = connect.count_collection(substructure_collection) + assert res == nb + + def test_collection_rows_count_superstructure(self, connect, superstructure_collection, add_vectors_nb): + ''' + target: test collection rows_count is correct or not + method: create collection and add vectors in it, + assert the value returned by count_collection method is equal to length of vectors + expected: the count is equal to the length of vectors + ''' + nb = add_vectors_nb + tmp, vectors = gen_binary_vectors(nb, dim) + res = connect.add_vectors(collection_name=superstructure_collection, records=vectors) + connect.flush([superstructure_collection]) + status, res = connect.count_collection(superstructure_collection) + assert res == nb + def test_collection_rows_count_after_index_created(self, connect, ham_collection, get_hamming_index): ''' target: test count_collection, after index have been created @@ -546,6 +596,38 @@ class TestCollectionCountHAM: status, res = connect.count_collection(ham_collection) assert res == nb + def test_collection_rows_count_after_index_created_substructure(self, connect, substructure_collection, get_substructure_index): + ''' + target: test count_collection, after index have been created + method: add vectors in db, and create index, then calling count_collection with correct params + expected: count_collection raise exception + ''' + nb = 100 + index_type = get_substructure_index["index_type"] + index_param = get_substructure_index["index_param"] + tmp, vectors = gen_binary_vectors(nb, dim) + res = connect.add_vectors(collection_name=substructure_collection, records=vectors) + connect.flush([substructure_collection]) + connect.create_index(substructure_collection, index_type, index_param) + status, res = connect.count_collection(substructure_collection) + assert res == nb + + def test_collection_rows_count_after_index_created_superstructure(self, connect, superstructure_collection, get_superstructure_index): + ''' + target: test count_collection, after index have been created + method: add vectors in db, and create index, then calling count_collection with correct params + expected: count_collection raise exception + ''' + nb = 100 + index_type = get_superstructure_index["index_type"] + index_param = get_superstructure_index["index_param"] + tmp, vectors = gen_binary_vectors(nb, dim) + res = connect.add_vectors(collection_name=superstructure_collection, records=vectors) + connect.flush([superstructure_collection]) + connect.create_index(superstructure_collection, index_type, index_param) + status, res = connect.count_collection(superstructure_collection) + assert res == nb + @pytest.mark.level(2) def test_count_without_connection(self, ham_collection, dis_connect): ''' diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index d02fb914cc..a0c3ecf873 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -1437,7 +1437,7 @@ class TestIndexJAC: assert result._index_type == IndexType.FLAT -class TestIndexHAM: +class TestIndexBinary: tmp, vectors = gen_binary_vectors(nb, dim) @pytest.fixture( @@ -1475,6 +1475,28 @@ class TestIndexHAM: else: pytest.skip("Skip index Temporary") + @pytest.fixture( + scope="function", + params=gen_simple_index() + ) + def get_substructure_index(self, request, connect): + logging.getLogger().info(request.param) + if request.param["index_type"] == IndexType.FLAT: + return request.param + else: + pytest.skip("Skip index Temporary") + + @pytest.fixture( + scope="function", + params=gen_simple_index() + ) + def get_superstructure_index(self, request, connect): + logging.getLogger().info(request.param) + if request.param["index_type"] == IndexType.FLAT: + return request.param + else: + pytest.skip("Skip index Temporary") + """ ****************************************************************** The following cases are used to test `create_index` function @@ -1514,6 +1536,23 @@ class TestIndexHAM: status, res = connect.count_collection(ham_collection) assert res == len(self.vectors) + @pytest.mark.timeout(BUILD_TIMEOUT) + def test_create_index_partition_structure(self, connect, substructure_collection, get_substructure_index): + ''' + target: test create index interface + method: create collection, create partition, and add vectors in it, create index + expected: return code equals to 0, and search success + ''' + index_param = get_substructure_index["index_param"] + index_type = get_substructure_index["index_type"] + logging.getLogger().info(get_substructure_index) + status = connect.create_partition(substructure_collection, tag) + status, ids = connect.add_vectors(substructure_collection, self.vectors, partition_tag=tag) + status = connect.create_index(substructure_collection, index_type, index_param) + assert status.OK() + status, res = connect.count_collection(substructure_collection,) + assert res == len(self.vectors) + @pytest.mark.level(2) def test_create_index_without_connect(self, dis_connect, ham_collection): ''' @@ -1547,6 +1586,27 @@ class TestIndexHAM: assert status.OK() assert len(result) == len(query_vecs) + @pytest.mark.timeout(BUILD_TIMEOUT) + def test_create_index_search_with_query_vectors_superstructure(self, connect, superstructure_collection, get_superstructure_index): + ''' + target: test create index interface, search with more query vectors + method: create collection and add vectors in it, create index + expected: return code equals to 0, and search success + ''' + index_param = get_superstructure_index["index_param"] + index_type = get_superstructure_index["index_type"] + logging.getLogger().info(get_superstructure_index) + status, ids = connect.add_vectors(superstructure_collection, self.vectors) + status = connect.create_index(superstructure_collection, index_type, index_param) + logging.getLogger().info(connect.describe_index(superstructure_collection)) + query_vecs = [self.vectors[0], self.vectors[1], self.vectors[2]] + top_k = 5 + search_param = get_search_param(index_type) + status, result = connect.search_vectors(superstructure_collection, top_k, query_vecs, params=search_param) + logging.getLogger().info(result) + assert status.OK() + assert len(result) == len(query_vecs) + """ ****************************************************************** The following cases are used to test `describe_index` function @@ -1588,6 +1648,24 @@ class TestIndexHAM: assert result._collection_name == ham_collection assert result._index_type == index_type + def test_describe_index_partition_superstructrue(self, connect, superstructure_collection, get_superstructure_index): + ''' + target: test describe index interface + method: create collection, create partition and add vectors in it, create index, call describe index + expected: return code 0, and index instructure + ''' + index_param = get_superstructure_index["index_param"] + index_type = get_superstructure_index["index_type"] + logging.getLogger().info(get_superstructure_index) + status = connect.create_partition(superstructure_collection, tag) + status, ids = connect.add_vectors(superstructure_collection, vectors, partition_tag=tag) + status = connect.create_index(superstructure_collection, index_type, index_param) + status, result = connect.describe_index(superstructure_collection) + logging.getLogger().info(result) + assert result._params == index_param + assert result._collection_name == superstructure_collection + assert result._index_type == index_type + """ ****************************************************************** The following cases are used to test `drop_index` function @@ -1616,6 +1694,27 @@ class TestIndexHAM: assert result._collection_name == ham_collection assert result._index_type == IndexType.FLAT + def test_drop_index_substructure(self, connect, substructure_collection, get_substructure_index): + ''' + target: test drop index interface + method: create collection and add vectors in it, create index, call drop index + expected: return code 0, and default index param + ''' + index_param = get_substructure_index["index_param"] + index_type = get_substructure_index["index_type"] + status, mode = connect._cmd("mode") + assert status.OK() + status = connect.create_index(substructure_collection, index_type, index_param) + assert status.OK() + status, result = connect.describe_index(substructure_collection) + logging.getLogger().info(result) + status = connect.drop_index(substructure_collection) + assert status.OK() + status, result = connect.describe_index(substructure_collection) + logging.getLogger().info(result) + assert result._collection_name == substructure_collection + assert result._index_type == IndexType.FLAT + def test_drop_index_partition(self, connect, ham_collection, get_hamming_index): ''' target: test drop index interface diff --git a/tests/milvus_python_test/test_search_vectors.py b/tests/milvus_python_test/test_search_vectors.py index 8c09dbff05..9b333146d9 100644 --- a/tests/milvus_python_test/test_search_vectors.py +++ b/tests/milvus_python_test/test_search_vectors.py @@ -120,6 +120,17 @@ class TestSearchBase: else: pytest.skip("Skip index Temporary") + @pytest.fixture( + scope="function", + params=gen_simple_index() + ) + def get_structure_index(self, request, connect): + logging.getLogger().info(request.param) + if request.param["index_type"] == IndexType.FLAT: + return request.param + else: + pytest.skip("Skip index Temporary") + """ generate top-k params """ @@ -640,6 +651,58 @@ class TestSearchBase: logging.getLogger().info(result) assert abs(result[0][0].distance - min(distance_0, distance_1).astype(float)) <= epsilon + def test_search_distance_substructure_flat_index(self, connect, substructure_collection): + ''' + target: search ip_collection, and check the result: distance + method: compare the return distance value with value computed with Inner product + expected: the return distance equals to the computed value + ''' + # from scipy.spatial import distance + top_k = 1 + nprobe = 512 + int_vectors, vectors, ids = self.init_binary_data(connect, substructure_collection, nb=2) + index_type = IndexType.FLAT + index_param = { + "nlist": 16384 + } + connect.create_index(substructure_collection, index_type, index_param) + logging.getLogger().info(connect.describe_collection(substructure_collection)) + logging.getLogger().info(connect.describe_index(substructure_collection)) + query_int_vectors, query_vecs, tmp_ids = self.init_binary_data(connect, substructure_collection, nb=1, insert=False) + distance_0 = substructure(query_int_vectors[0], int_vectors[0]) + distance_1 = substructure(query_int_vectors[0], int_vectors[1]) + search_param = get_search_param(index_type) + status, result = connect.search_vectors(substructure_collection, top_k, query_vecs, params=search_param) + logging.getLogger().info(status) + logging.getLogger().info(result) + assert abs(result[0][0].distance - min(distance_0, distance_1).astype(float)) <= epsilon + + def test_search_distance_superstructure_flat_index(self, connect, superstructure_collection): + ''' + target: search ip_collection, and check the result: distance + method: compare the return distance value with value computed with Inner product + expected: the return distance equals to the computed value + ''' + # from scipy.spatial import distance + top_k = 1 + nprobe = 512 + int_vectors, vectors, ids = self.init_binary_data(connect, superstructure_collection, nb=2) + index_type = IndexType.FLAT + index_param = { + "nlist": 16384 + } + connect.create_index(superstructure_collection, index_type, index_param) + logging.getLogger().info(connect.describe_collection(superstructure_collection)) + logging.getLogger().info(connect.describe_index(superstructure_collection)) + query_int_vectors, query_vecs, tmp_ids = self.init_binary_data(connect, superstructure_collection, nb=1, insert=False) + distance_0 = superstructure(query_int_vectors[0], int_vectors[0]) + distance_1 = superstructure(query_int_vectors[0], int_vectors[1]) + search_param = get_search_param(index_type) + status, result = connect.search_vectors(superstructure_collection, top_k, query_vecs, params=search_param) + logging.getLogger().info(status) + logging.getLogger().info(result) + assert abs(result[0][0].distance - min(distance_0, distance_1).astype(float)) <= epsilon + def test_search_distance_tanimoto_flat_index(self, connect, tanimoto_collection): ''' target: search ip_collection, and check the result: distance diff --git a/tests/milvus_python_test/utils.py b/tests/milvus_python_test/utils.py index 3c8c042a15..ceb72e3a32 100644 --- a/tests/milvus_python_test/utils.py +++ b/tests/milvus_python_test/utils.py @@ -55,6 +55,18 @@ def tanimoto(x, y): return -np.log2(np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum())) +def substructure(x, y): + x = np.asarray(x, np.bool) + y = np.asarray(y, np.bool) + return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(y) + + +def superstructure(x, y): + x = np.asarray(x, np.bool) + y = np.asarray(y, np.bool) + return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(x) + + def gen_single_vector(dim): return [[random.random() for _ in range(dim)]]