import time import random import pdb import threading import logging from multiprocessing import Pool, Process import pytest from milvus import IndexType, MetricType from utils import * dim = 128 index_file_size = 10 GET_TIMEOUT = 30 nprobe = 1 top_k = 1 epsilon = 0.001 tag = "1970-01-01" nb = 6000 class TestGetVectorIdsBase: def get_valid_segment_name(self, connect, collection): vectors = gen_vector(nb, dim) status, ids = connect.add_vectors(collection, vectors) assert status.OK() status = connect.flush([collection]) assert status.OK() status, info = connect.collection_info(collection) assert status.OK() logging.getLogger().info(info.partitions_stat[0].segments_stat[0].segment_name) return info.partitions_stat[0].segments_stat[0].segment_name """ ****************************************************************** The following cases are used to test `get_vector_ids` function ****************************************************************** """ @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_collection_name_None(self, connect, collection): ''' target: get vector ids where collection name is None method: call get_vector_ids with the collection_name: None expected: exception raised ''' collection_name = None segment_name = self.get_valid_segment_name(connect, collection) with pytest.raises(Exception) as e: status, vector_ids = connect.get_vector_ids(collection_name, segment_name) @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_collection_name_not_existed(self, connect, collection): ''' target: get vector ids where collection name does not exist method: call get_vector_ids with a random collection_name, which is not in db expected: status not ok ''' collection_name = gen_unique_str("not_existed_collection") segment_name = self.get_valid_segment_name(connect, collection) status, vector_ids = connect.get_vector_ids(collection_name, segment_name) assert not status.OK() @pytest.fixture( scope="function", params=gen_invalid_collection_names() ) def get_collection_name(self, request): yield request.param @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_collection_name_invalid(self, connect, collection, get_collection_name): ''' target: get vector ids where collection name is invalid method: call get_vector_ids with invalid collection_name expected: status not ok ''' collection_name = get_collection_name segment_name = self.get_valid_segment_name(connect, collection) status, vector_ids = connect.get_vector_ids(collection_name, segment_name) assert not status.OK() @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_segment_name_None(self, connect, collection): ''' target: get vector ids where segment name is None method: call get_vector_ids with the segment_name: None expected: exception raised ''' valid_segment_name = self.get_valid_segment_name(connect, collection) segment = None with pytest.raises(Exception) as e: status, vector_ids = connect.get_vector_ids(collection, segment) @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_segment_name_not_existed(self, connect, collection): ''' target: get vector ids where segment name does not exist method: call get_vector_ids with a random segment name expected: status not ok ''' valid_segment_name = self.get_valid_segment_name(connect, collection) segment = gen_unique_str("not_existed_segment") status, vector_ids = connect.get_vector_ids(collection, segment) logging.getLogger().info(vector_ids) assert not status.OK() @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_without_index_A(self, connect, collection): ''' target: get vector ids when there is no index method: call get_vector_ids and check if the segment contains vectors expected: status ok ''' vectors = gen_vector(10, dim) status, ids = connect.add_vectors(collection, vectors) assert status.OK() status = connect.flush([collection]) assert status.OK() status, info = connect.collection_info(collection) assert status.OK() status, vector_ids = connect.get_vector_ids(collection, info.partitions_stat[0].segments_stat[0].segment_name) # vector_ids should match ids assert len(vector_ids) == 10 for i in range(10): assert vector_ids[i] == ids[i] @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_without_index_B(self, connect, collection): ''' target: get vector ids when there is no index but with partition method: create partition, add vectors to it and call get_vector_ids, check if the segment contains vectors expected: status ok ''' status = connect.create_partition(collection, tag) assert status.OK() vectors = gen_vector(10, dim) status, ids = connect.add_vectors(collection, vectors, partition_tag=tag) assert status.OK() status = connect.flush([collection]) assert status.OK() status, info = connect.collection_info(collection) assert status.OK() assert info.partitions_stat[1].tag == tag status, vector_ids = connect.get_vector_ids(collection, info.partitions_stat[1].segments_stat[0].segment_name) # vector_ids should match ids assert len(vector_ids) == 10 for i in range(10): assert vector_ids[i] == ids[i] @pytest.fixture( scope="function", params=gen_simple_index() ) def get_simple_index(self, request, connect): if str(connect._cmd("mode")[1]) == "CPU": if request.param["index_type"] not in [IndexType.IVF_SQ8, IndexType.IVFLAT, IndexType.FLAT]: pytest.skip("Only support index_type: flat/ivf_flat/ivf_sq8") else: pytest.skip("Only support CPU mode") return request.param @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_with_index_A(self, connect, collection, get_simple_index): ''' target: get vector ids when there is index method: call get_vector_ids and check if the segment contains vectors expected: status ok ''' index_param = get_simple_index["index_param"] index_type = get_simple_index["index_type"] status = connect.create_index(collection, index_type, index_param) assert status.OK() vectors = gen_vector(10, dim) status, ids = connect.add_vectors(collection, vectors) assert status.OK() status = connect.flush([collection]) assert status.OK() status, info = connect.collection_info(collection) assert status.OK() status, vector_ids = connect.get_vector_ids(collection, info.partitions_stat[0].segments_stat[0].segment_name) # vector_ids should match ids assert len(vector_ids) == 10 for i in range(10): assert vector_ids[i] == ids[i] @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_with_index_B(self, connect, collection, get_simple_index): ''' target: get vector ids when there is index and with partition method: create partition, add vectors to it and call get_vector_ids, check if the segment contains vectors expected: status ok ''' status = connect.create_partition(collection, tag) assert status.OK() index_param = get_simple_index["index_param"] index_type = get_simple_index["index_type"] status = connect.create_index(collection, index_type, index_param) assert status.OK() vectors = gen_vector(10, dim) status, ids = connect.add_vectors(collection, vectors, partition_tag=tag) assert status.OK() status = connect.flush([collection]) assert status.OK() status, info = connect.collection_info(collection) assert status.OK() assert info.partitions_stat[1].tag == tag status, vector_ids = connect.get_vector_ids(collection, info.partitions_stat[1].segments_stat[0].segment_name) # vector_ids should match ids assert len(vector_ids) == 10 for i in range(10): assert vector_ids[i] == ids[i] @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_after_delete_vectors(self, connect, collection): ''' target: get vector ids after vectors are deleted method: add vectors and delete a few, call get_vector_ids expected: status ok, vector_ids decreased after vectors deleted ''' vectors = gen_vector(2, dim) status, ids = connect.add_vectors(collection, vectors) assert status.OK() delete_ids = [ids[0]] status = connect.delete_by_id(collection, delete_ids) status = connect.flush([collection]) assert status.OK() status, info = connect.collection_info(collection) assert status.OK() status, vector_ids = connect.get_vector_ids(collection, info.partitions_stat[0].segments_stat[0].segment_name) assert len(vector_ids) == 1 assert vector_ids[0] == ids[1] class TestGetVectorIdsIP: """ ****************************************************************** The following cases are used to test `get_vector_ids` function ****************************************************************** """ @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_without_index_A(self, connect, ip_collection): ''' target: get vector ids when there is no index method: call get_vector_ids and check if the segment contains vectors expected: status ok ''' vectors = gen_vector(10, dim) status, ids = connect.add_vectors(ip_collection, vectors) assert status.OK() status = connect.flush([ip_collection]) assert status.OK() status, info = connect.collection_info(ip_collection) assert status.OK() status, vector_ids = connect.get_vector_ids(ip_collection, info.partitions_stat[0].segments_stat[0].segment_name) # vector_ids should match ids assert len(vector_ids) == 10 for i in range(10): assert vector_ids[i] == ids[i] @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_without_index_B(self, connect, ip_collection): ''' target: get vector ids when there is no index but with partition method: create partition, add vectors to it and call get_vector_ids, check if the segment contains vectors expected: status ok ''' status = connect.create_partition(ip_collection, tag) assert status.OK() vectors = gen_vector(10, dim) status, ids = connect.add_vectors(ip_collection, vectors, partition_tag=tag) assert status.OK() status = connect.flush([ip_collection]) assert status.OK() status, info = connect.collection_info(ip_collection) assert status.OK() assert info.partitions_stat[1].tag == tag status, vector_ids = connect.get_vector_ids(ip_collection, info.partitions_stat[1].segments_stat[0].segment_name) # vector_ids should match ids assert len(vector_ids) == 10 for i in range(10): assert vector_ids[i] == ids[i] @pytest.fixture( scope="function", params=gen_simple_index() ) def get_simple_index(self, request, connect): if str(connect._cmd("mode")[1]) == "CPU": if request.param["index_type"] not in [IndexType.IVF_SQ8, IndexType.IVFLAT, IndexType.FLAT]: pytest.skip("Only support index_type: flat/ivf_flat/ivf_sq8") else: pytest.skip("Only support CPU mode") return request.param @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_with_index_A(self, connect, ip_collection, get_simple_index): ''' target: get vector ids when there is index method: call get_vector_ids and check if the segment contains vectors expected: status ok ''' index_param = get_simple_index["index_param"] index_type = get_simple_index["index_type"] status = connect.create_index(ip_collection, index_type, index_param) assert status.OK() vectors = gen_vector(10, dim) status, ids = connect.add_vectors(ip_collection, vectors) assert status.OK() status = connect.flush([ip_collection]) assert status.OK() status, info = connect.collection_info(ip_collection) assert status.OK() status, vector_ids = connect.get_vector_ids(ip_collection, info.partitions_stat[0].segments_stat[0].segment_name) # vector_ids should match ids assert len(vector_ids) == 10 for i in range(10): assert vector_ids[i] == ids[i] @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_with_index_B(self, connect, ip_collection, get_simple_index): ''' target: get vector ids when there is index and with partition method: create partition, add vectors to it and call get_vector_ids, check if the segment contains vectors expected: status ok ''' status = connect.create_partition(ip_collection, tag) assert status.OK() index_param = get_simple_index["index_param"] index_type = get_simple_index["index_type"] status = connect.create_index(ip_collection, index_type, index_param) assert status.OK() vectors = gen_vector(10, dim) status, ids = connect.add_vectors(ip_collection, vectors, partition_tag=tag) assert status.OK() status = connect.flush([ip_collection]) assert status.OK() status, info = connect.collection_info(ip_collection) assert status.OK() assert info.partitions_stat[1].tag == tag status, vector_ids = connect.get_vector_ids(ip_collection, info.partitions_stat[1].segments_stat[0].segment_name) # vector_ids should match ids assert len(vector_ids) == 10 for i in range(10): assert vector_ids[i] == ids[i] @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_after_delete_vectors(self, connect, ip_collection): ''' target: get vector ids after vectors are deleted method: add vectors and delete a few, call get_vector_ids expected: status ok, vector_ids decreased after vectors deleted ''' vectors = gen_vector(2, dim) status, ids = connect.add_vectors(ip_collection, vectors) assert status.OK() delete_ids = [ids[0]] status = connect.delete_by_id(ip_collection, delete_ids) status = connect.flush([ip_collection]) assert status.OK() status, info = connect.collection_info(ip_collection) assert status.OK() status, vector_ids = connect.get_vector_ids(ip_collection, info.partitions_stat[0].segments_stat[0].segment_name) assert len(vector_ids) == 1 assert vector_ids[0] == ids[1] class TestGetVectorIdsJAC: """ ****************************************************************** The following cases are used to test `get_vector_ids` function ****************************************************************** """ @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_without_index_A(self, connect, jac_collection): ''' target: get vector ids when there is no index method: call get_vector_ids and check if the segment contains vectors expected: status ok ''' tmp, vectors = gen_binary_vectors(10, dim) status, ids = connect.add_vectors(jac_collection, vectors) assert status.OK() status = connect.flush([jac_collection]) assert status.OK() status, info = connect.collection_info(jac_collection) assert status.OK() status, vector_ids = connect.get_vector_ids(jac_collection, info.partitions_stat[0].segments_stat[0].segment_name) # vector_ids should match ids assert len(vector_ids) == 10 for i in range(10): assert vector_ids[i] == ids[i] @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_without_index_B(self, connect, jac_collection): ''' target: get vector ids when there is no index but with partition method: create partition, add vectors to it and call get_vector_ids, check if the segment contains vectors expected: status ok ''' status = connect.create_partition(jac_collection, tag) assert status.OK() tmp, vectors = gen_binary_vectors(10, dim) status, ids = connect.add_vectors(jac_collection, vectors, partition_tag=tag) assert status.OK() status = connect.flush([jac_collection]) assert status.OK() status, info = connect.collection_info(jac_collection) assert status.OK() assert info.partitions_stat[1].tag == tag status, vector_ids = connect.get_vector_ids(jac_collection, info.partitions_stat[1].segments_stat[0].segment_name) # vector_ids should match ids assert len(vector_ids) == 10 for i in range(10): assert vector_ids[i] == ids[i] @pytest.fixture( scope="function", params=gen_simple_index() ) def get_jaccard_index(self, request, connect): logging.getLogger().info(request.param) if request.param["index_type"] == IndexType.IVFLAT or request.param["index_type"] == IndexType.FLAT: return request.param else: pytest.skip("Skip index Temporary") @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_with_index_A(self, connect, jac_collection, get_jaccard_index): ''' target: get vector ids when there is index method: call get_vector_ids and check if the segment contains vectors expected: status ok ''' index_param = get_jaccard_index["index_param"] index_type = get_jaccard_index["index_type"] status = connect.create_index(jac_collection, index_type, index_param) assert status.OK() tmp, vectors = gen_binary_vectors(10, dim) status, ids = connect.add_vectors(jac_collection, vectors) assert status.OK() status = connect.flush([jac_collection]) assert status.OK() status, info = connect.collection_info(jac_collection) assert status.OK() status, vector_ids = connect.get_vector_ids(jac_collection, info.partitions_stat[0].segments_stat[0].segment_name) # vector_ids should match ids assert len(vector_ids) == 10 for i in range(10): assert vector_ids[i] == ids[i] @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_with_index_B(self, connect, jac_collection, get_jaccard_index): ''' target: get vector ids when there is index and with partition method: create partition, add vectors to it and call get_vector_ids, check if the segment contains vectors expected: status ok ''' status = connect.create_partition(jac_collection, tag) assert status.OK() index_param = get_jaccard_index["index_param"] index_type = get_jaccard_index["index_type"] status = connect.create_index(jac_collection, index_type, index_param) assert status.OK() tmp, vectors = gen_binary_vectors(10, dim) status, ids = connect.add_vectors(jac_collection, vectors, partition_tag=tag) assert status.OK() status = connect.flush([jac_collection]) assert status.OK() status, info = connect.collection_info(jac_collection) assert status.OK() assert info.partitions_stat[1].tag == tag status, vector_ids = connect.get_vector_ids(jac_collection, info.partitions_stat[1].segments_stat[0].segment_name) # vector_ids should match ids assert len(vector_ids) == 10 for i in range(10): assert vector_ids[i] == ids[i] @pytest.mark.timeout(GET_TIMEOUT) def test_get_vector_ids_after_delete_vectors(self, connect, jac_collection): ''' target: get vector ids after vectors are deleted method: add vectors and delete a few, call get_vector_ids expected: status ok, vector_ids decreased after vectors deleted ''' tmp, vectors = gen_binary_vectors(2, dim) status, ids = connect.add_vectors(jac_collection, vectors) assert status.OK() delete_ids = [ids[0]] status = connect.delete_by_id(jac_collection, delete_ids) status = connect.flush([jac_collection]) assert status.OK() status, info = connect.collection_info(jac_collection) assert status.OK() status, vector_ids = connect.get_vector_ids(jac_collection, info.partitions_stat[0].segments_stat[0].segment_name) assert len(vector_ids) == 1 assert vector_ids[0] == ids[1]