milvus/tests/python_test/entity/test_get_entity_by_id.py
Cai Yudong 84110d2684 Add tests/benchmark and tests/python_test using new python SDK
Signed-off-by: Cai Yudong <yudong.cai@zilliz.com>
2021-02-25 17:35:36 +08:00

667 lines
27 KiB
Python

import time
import random
import pdb
import copy
import logging
from multiprocessing import Pool, Process
import concurrent.futures
from threading import current_thread
import pytest
from utils import *
from constants import *
default_single_query = {
"bool": {
"must": [
{"vector": {
default_float_vec_field_name: {"topk": 10, "query": gen_vectors(1, default_dim), "params": {"nprobe": 10}}}}
]
}
}
class TestGetBase:
"""
******************************************************************
The following cases are used to test `get_entity_by_id` function
******************************************************************
"""
@pytest.fixture(
scope="function",
params=gen_simple_index()
)
def get_simple_index(self, request, connect):
if str(connect._cmd("mode")) == "CPU":
if request.param["index_type"] in index_cpu_not_support():
pytest.skip("sq8h not support in CPU mode")
return request.param
@pytest.fixture(
scope="function",
params=[
1,
500
],
)
def get_pos(self, request):
yield request.param
def test_get_entity(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id, get one
method: add entity, and get
expected: entity returned equals insert
'''
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
res_count = connect.count_entities(collection)
assert res_count == default_nb
get_ids = [ids[get_pos]]
res = connect.get_entity_by_id(collection, get_ids)
assert_equal_vector(res[0].get(default_float_vec_field_name), default_entities[-1]["values"][get_pos])
def test_get_entity_multi_ids(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id, get one
method: add entity, and get
expected: entity returned equals insert
'''
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
get_ids = ids[:get_pos]
res = connect.get_entity_by_id(collection, get_ids)
for i in range(get_pos):
assert_equal_vector(res[i].get(default_float_vec_field_name), default_entities[-1]["values"][i])
def test_get_entity_parts_ids(self, connect, collection):
'''
target: test.get_entity_by_id, some ids in collection, some ids not
method: add entity, and get
expected: entity returned equals insert
'''
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
get_ids = [ids[0], 1, ids[-1]]
res = connect.get_entity_by_id(collection, get_ids)
assert_equal_vector(res[0].get(default_float_vec_field_name), default_entities[-1]["values"][0])
assert_equal_vector(res[-1].get(default_float_vec_field_name), default_entities[-1]["values"][-1])
assert res[1] is None
def test_get_entity_limit(self, connect, collection, args):
'''
target: test.get_entity_by_id
method: add entity, and get, limit > 1000
expected: entity returned
'''
if args["handler"] == "HTTP":
pytest.skip("skip in http mode")
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
with pytest.raises(Exception) as e:
res = connect.get_entity_by_id(collection, ids)
def test_get_entity_same_ids(self, connect, id_collection):
'''
target: test.get_entity_by_id, with the same ids
method: add entity, and get one id
expected: entity returned equals insert
'''
ids = [1 for i in range(default_nb)]
res_ids = connect.bulk_insert(id_collection, default_entities, ids)
connect.flush([id_collection])
get_ids = [ids[0]]
res = connect.get_entity_by_id(id_collection, get_ids)
assert len(res) == 1
assert_equal_vector(res[0].get(default_float_vec_field_name), default_entities[-1]["values"][0])
def test_get_entity_params_same_ids(self, connect, id_collection):
'''
target: test.get_entity_by_id, with the same ids
method: add entity, and get entity with the same ids
expected: entity returned equals insert
'''
ids = [1]
res_ids = connect.bulk_insert(id_collection, default_entity, ids)
connect.flush([id_collection])
get_ids = [1, 1]
res = connect.get_entity_by_id(id_collection, get_ids)
assert len(res) == len(get_ids)
for i in range(len(get_ids)):
logging.getLogger().info(i)
assert_equal_vector(res[i].get(default_float_vec_field_name), default_entity[-1]["values"][0])
def test_get_entities_params_same_ids(self, connect, collection):
'''
target: test.get_entity_by_id, with the same ids
method: add entities, and get entity with the same ids
expected: entity returned equals insert
'''
res_ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
get_ids = [res_ids[0], res_ids[0]]
res = connect.get_entity_by_id(collection, get_ids)
assert len(res) == len(get_ids)
for i in range(len(get_ids)):
assert_equal_vector(res[i].get(default_float_vec_field_name), default_entities[-1]["values"][0])
"""
******************************************************************
The following cases are used to test `get_entity_by_id` function, with different metric type
******************************************************************
"""
def test_get_entity_parts_ids_binary(self, connect, binary_collection):
'''
target: test.get_entity_by_id, some ids in jac_collection, some ids not
method: add entity, and get
expected: entity returned equals insert
'''
ids = connect.bulk_insert(binary_collection, default_binary_entities)
connect.flush([binary_collection])
get_ids = [ids[0], 1, ids[-1]]
res = connect.get_entity_by_id(binary_collection, get_ids)
assert_equal_vector(res[0].get("binary_vector"), default_binary_entities[-1]["values"][0])
assert_equal_vector(res[-1].get("binary_vector"), default_binary_entities[-1]["values"][-1])
assert res[1] is None
"""
******************************************************************
The following cases are used to test `get_entity_by_id` function, with tags
******************************************************************
"""
def test_get_entities_tag(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id
method: add entities with tag, get
expected: entity returned
'''
connect.create_partition(collection, default_tag)
ids = connect.bulk_insert(collection, default_entities, partition_tag = default_tag)
connect.flush([collection])
get_ids = ids[:get_pos]
res = connect.get_entity_by_id(collection, get_ids)
for i in range(get_pos):
assert_equal_vector(res[i].get(default_float_vec_field_name), default_entities[-1]["values"][i])
def test_get_entities_tag_default(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id
method: add entities with default tag, get
expected: entity returned
'''
connect.create_partition(collection, default_tag)
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
get_ids = ids[:get_pos]
res = connect.get_entity_by_id(collection, get_ids)
for i in range(get_pos):
assert_equal_vector(res[i].get(default_float_vec_field_name), default_entities[-1]["values"][i])
def test_get_entities_tags_default(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id
method: create partitions, add entities with default tag, get
expected: entity returned
'''
tag_new = "tag_new"
connect.create_partition(collection, default_tag)
connect.create_partition(collection, tag_new)
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
get_ids = ids[:get_pos]
res = connect.get_entity_by_id(collection, get_ids)
for i in range(get_pos):
assert_equal_vector(res[i].get(default_float_vec_field_name), default_entities[-1]["values"][i])
def test_get_entities_tags_A(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id
method: create partitions, add entities with default tag, get
expected: entity returned
'''
tag_new = "tag_new"
connect.create_partition(collection, default_tag)
connect.create_partition(collection, tag_new)
ids = connect.bulk_insert(collection, default_entities, partition_tag = default_tag)
connect.flush([collection])
get_ids = ids[:get_pos]
res = connect.get_entity_by_id(collection, get_ids)
for i in range(get_pos):
assert_equal_vector(res[i].get(default_float_vec_field_name), default_entities[-1]["values"][i])
def test_get_entities_tags_B(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id
method: create partitions, add entities with default tag, get
expected: entity returned
'''
tag_new = "tag_new"
connect.create_partition(collection, default_tag)
connect.create_partition(collection, tag_new)
new_entities = gen_entities(default_nb + 1)
ids = connect.bulk_insert(collection, default_entities, partition_tag = default_tag)
ids_new = connect.bulk_insert(collection, new_entities, partition_tag = tag_new)
connect.flush([collection])
get_ids = ids[:get_pos]
get_ids.extend(ids_new[:get_pos])
res = connect.get_entity_by_id(collection, get_ids)
for i in range(get_pos):
assert_equal_vector(res[i].get(default_float_vec_field_name), default_entities[-1]["values"][i])
for i in range(get_pos, get_pos * 2):
assert_equal_vector(res[i].get(default_float_vec_field_name), new_entities[-1]["values"][i - get_pos])
@pytest.mark.level(2)
def test_get_entities_indexed_tag(self, connect, collection, get_simple_index, get_pos):
'''
target: test.get_entity_by_id
method: add entities with tag, get
expected: entity returned
'''
connect.create_partition(collection, default_tag)
ids = connect.bulk_insert(collection, default_entities, partition_tag = default_tag)
connect.flush([collection])
connect.create_index(collection, default_float_vec_field_name, get_simple_index)
get_ids = ids[:get_pos]
res = connect.get_entity_by_id(collection, get_ids)
for i in range(get_pos):
assert_equal_vector(res[i].get(default_float_vec_field_name), default_entities[-1]["values"][i])
"""
******************************************************************
The following cases are used to test `get_entity_by_id` function, with fields params
******************************************************************
"""
def test_get_entity_field(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id, get one
method: add entity, and get
expected: entity returned equals insert
'''
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
get_ids = [ids[get_pos]]
fields = ["int64"]
res = connect.get_entity_by_id(collection, get_ids, fields = fields)
# assert fields
res = res.dict()
assert res[0]["field"] == fields[0]
assert res[0]["values"] == [default_entities[0]["values"][get_pos]]
assert res[0]["type"] == DataType.INT64
def test_get_entity_fields(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id, get one
method: add entity, and get
expected: entity returned equals insert
'''
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
get_ids = [ids[get_pos]]
fields = ["int64", "float", default_float_vec_field_name]
res = connect.get_entity_by_id(collection, get_ids, fields = fields)
# assert fields
res = res.dict()
assert len(res) == len(fields)
for field in res:
if field["field"] == fields[0]:
assert field["values"] == [default_entities[0]["values"][get_pos]]
elif field["field"] == fields[1]:
assert field["values"] == [default_entities[1]["values"][get_pos]]
else:
assert_equal_vector(field["values"][0], default_entities[-1]["values"][get_pos])
# TODO: assert exception
def test_get_entity_field_not_match(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id, get one
method: add entity, and get
expected: entity returned equals insert
'''
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
get_ids = [ids[get_pos]]
fields = ["int1288"]
with pytest.raises(Exception) as e:
res = connect.get_entity_by_id(collection, get_ids, fields = fields)
# TODO: assert exception
def test_get_entity_fields_not_match(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id, get one
method: add entity, and get
expected: entity returned equals insert
'''
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
get_ids = [ids[get_pos]]
fields = ["int1288"]
with pytest.raises(Exception) as e:
res = connect.get_entity_by_id(collection, get_ids, fields = fields)
def test_get_entity_id_not_exised(self, connect, collection):
'''
target: test get entity, params entity_id not existed
method: add entity and get
expected: empty result
'''
ids = connect.bulk_insert(collection, default_entity)
connect.flush([collection])
res = connect.get_entity_by_id(collection, [1])
assert res[0] is None
def test_get_entity_collection_not_existed(self, connect, collection):
'''
target: test get entity, params collection_name not existed
method: add entity and get
expected: error raised
'''
ids = connect.bulk_insert(collection, default_entity)
connect.flush([collection])
collection_new = gen_unique_str()
with pytest.raises(Exception) as e:
res = connect.get_entity_by_id(collection_new, [ids[0]])
"""
******************************************************************
The following cases are used to test `get_entity_by_id` function, after deleted
******************************************************************
"""
def test_get_entity_after_delete(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id
method: add entities, and delete, get entity by the given id
expected: empty result
'''
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
delete_ids = [ids[get_pos]]
status = connect.delete_entity_by_id(collection, delete_ids)
connect.flush([collection])
get_ids = [ids[get_pos]]
res = connect.get_entity_by_id(collection, get_ids)
assert res[0] is None
def test_get_entities_after_delete(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id
method: add entities, and delete, get entity by the given id
expected: empty result
'''
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
delete_ids = ids[:get_pos]
status = connect.delete_entity_by_id(collection, delete_ids)
connect.flush([collection])
get_ids = delete_ids
res = connect.get_entity_by_id(collection, get_ids)
for i in range(get_pos):
assert res[i] is None
def test_get_entities_after_delete_compact(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id
method: add entities, and delete, get entity by the given id
expected: empty result
'''
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
delete_ids = ids[:get_pos]
status = connect.delete_entity_by_id(collection, delete_ids)
connect.flush([collection])
connect.compact(collection)
get_ids = ids[:get_pos]
res = connect.get_entity_by_id(collection, get_ids)
for i in range(get_pos):
assert res[i] is None
def test_get_entities_indexed_batch(self, connect, collection, get_simple_index, get_pos):
'''
target: test.get_entity_by_id
method: add entities batch, create index, get
expected: entity returned
'''
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
connect.create_index(collection, default_float_vec_field_name, get_simple_index)
get_ids = ids[:get_pos]
res = connect.get_entity_by_id(collection, get_ids)
for i in range(get_pos):
assert_equal_vector(res[i].get(default_float_vec_field_name), default_entities[-1]["values"][i])
@pytest.mark.level(2)
def test_get_entities_indexed_single(self, connect, collection, get_simple_index, get_pos):
'''
target: test.get_entity_by_id
method: add entities 1 entity/per request, create index, get
expected: entity returned
'''
ids = []
for i in range(default_nb):
ids.append(connect.bulk_insert(collection, default_entity)[0])
connect.flush([collection])
connect.create_index(collection, default_float_vec_field_name, get_simple_index)
get_ids = ids[:get_pos]
res = connect.get_entity_by_id(collection, get_ids)
for i in range(get_pos):
assert_equal_vector(res[i].get(default_float_vec_field_name), default_entity[-1]["values"][0])
def test_get_entities_with_deleted_ids(self, connect, id_collection):
'''
target: test.get_entity_by_id
method: add entities ids, and delete part, get entity include the deleted id
expected:
'''
ids = [i for i in range(default_nb)]
res_ids = connect.bulk_insert(id_collection, default_entities, ids)
connect.flush([id_collection])
status = connect.delete_entity_by_id(id_collection, [res_ids[1]])
connect.flush([id_collection])
get_ids = res_ids[:2]
res = connect.get_entity_by_id(id_collection, get_ids)
assert len(res) == len(get_ids)
assert_equal_vector(res[0].get(default_float_vec_field_name), default_entities[-1]["values"][0])
assert res[1] is None
# TODO: unable to set config
def _test_get_entities_after_delete_disable_autoflush(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id
method: disable autoflush, add entities, and delete, get entity by the given id
expected: empty result
'''
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
delete_ids = ids[:get_pos]
try:
disable_flush(connect)
status = connect.delete_entity_by_id(collection, delete_ids)
get_ids = ids[:get_pos]
res = connect.get_entity_by_id(collection, get_ids)
for i in range(get_pos):
assert_equal_vector(res[i].get(default_float_vec_field_name), default_entities[-1]["values"][i])
finally:
enable_flush(connect)
def test_get_entities_after_delete_same_ids(self, connect, id_collection):
'''
target: test.get_entity_by_id
method: add entities with the same ids, and delete, get entity by the given id
expected: empty result
'''
ids = [i for i in range(default_nb)]
ids[0] = 1
res_ids = connect.bulk_insert(id_collection, default_entities, ids)
connect.flush([id_collection])
status = connect.delete_entity_by_id(id_collection, [1])
connect.flush([id_collection])
get_ids = [1]
res = connect.get_entity_by_id(id_collection, get_ids)
assert res[0] is None
def test_get_entity_after_delete_with_partition(self, connect, collection, get_pos):
'''
target: test.get_entity_by_id
method: add entities into partition, and delete, get entity by the given id
expected: get one entity
'''
connect.create_partition(collection, default_tag)
ids = connect.bulk_insert(collection, default_entities, partition_tag = default_tag)
connect.flush([collection])
status = connect.delete_entity_by_id(collection, [ids[get_pos]])
connect.flush([collection])
res = connect.get_entity_by_id(collection, [ids[get_pos]])
assert res[0] is None
def test_get_entity_by_id_multithreads(self, connect, collection):
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
get_id = ids[100:200]
def get():
res = connect.get_entity_by_id(collection, get_id)
assert len(res) == len(get_id)
for i in range(len(res)):
assert_equal_vector(res[i].get(default_float_vec_field_name), default_entities[-1]["values"][100 + i])
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
future_results = {executor.submit(
get): i for i in range(10)}
for future in concurrent.futures.as_completed(future_results):
future.result()
@pytest.mark.level(2)
def test_get_entity_by_id_insert_multi_threads(self, connect, collection):
'''
target: test.get_entity_by_id
method: thread do insert and get
expected:
'''
ids = connect.bulk_insert(collection, default_entities)
connect.flush([collection])
get_id = ids[:1000]
def insert():
# logging.getLogger().info(current_thread().getName() + " insert")
step = 1000
for i in range(default_nb // step):
group_entities = gen_entities(step, False)
connect.bulk_insert(collection, group_entities)
connect.flush([collection])
def get():
# logging.getLogger().info(current_thread().getName() + " get")
res = connect.get_entity_by_id(collection, get_id)
assert len(res) == len(get_id)
for i in range(len(res)):
assert_equal_vector(res[i].get(default_float_vec_field_name), default_entities[-1]["values"][i])
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
for i in range(20):
fun = random.choices([get, insert])[0]
future = executor.submit(fun)
future.result()
@pytest.mark.level(2)
def test_get_entity_by_id_insert_multi_threads_2(self, connect, collection):
'''
target: test.get_entity_by_id
method: thread do insert and get
expected:
'''
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
def get(group_ids, group_entities):
# logging.getLogger().info(current_thread().getName() + " get")
res = connect.get_entity_by_id(collection, group_ids)
assert len(res) == len(group_ids)
for i in range(len(res)):
assert_equal_vector(res[i].get(default_float_vec_field_name), group_entities[-1]["values"][i])
def insert(group_vectors):
# logging.getLogger().info(current_thread().getName() + " insert")
for group_vector in group_vectors:
group_entities = [
{"name": "int64", "type": DataType.INT64, "values": [i for i in range(step)]},
{"name": "float", "type": DataType.FLOAT, "values": [float(i) for i in range(step)]},
{"name": default_float_vec_field_name, "type": DataType.FLOAT_VECTOR, "values": group_vector}
]
group_ids = connect.bulk_insert(collection, group_entities)
connect.flush([collection])
executor.submit(get, group_ids, group_entities)
step = 100
vectors = gen_vectors(default_nb, default_dim, False)
group_vectors = [vectors[i:i + step] for i in range(0, len(vectors), step)]
task = executor.submit(insert, group_vectors)
task.result()
class TestGetInvalid(object):
"""
Test get entities with invalid params
"""
@pytest.fixture(
scope="function",
params=gen_invalid_strs()
)
def get_collection_name(self, request):
yield request.param
@pytest.fixture(
scope="function",
params=gen_invalid_strs()
)
def get_field_name(self, request):
yield request.param
@pytest.fixture(
scope="function",
params=gen_invalid_ints()
)
def get_entity_id(self, request):
yield request.param
@pytest.mark.level(2)
def test_insert_ids_invalid(self, connect, collection, get_entity_id):
'''
target: test insert, with using customize ids, which are not int64
method: create collection and insert entities in it
expected: raise an exception
'''
entity_id = get_entity_id
ids = [entity_id for _ in range(default_nb)]
with pytest.raises(Exception):
connect.get_entity_by_id(collection, ids)
@pytest.mark.level(2)
def test_insert_parts_ids_invalid(self, connect, collection, get_entity_id):
'''
target: test insert, with using customize ids, which are not int64
method: create collection and insert entities in it
expected: raise an exception
'''
entity_id = get_entity_id
ids = [i for i in range(default_nb)]
ids[-1] = entity_id
with pytest.raises(Exception):
connect.get_entity_by_id(collection, ids)
@pytest.mark.level(2)
def test_get_entities_with_invalid_collection_name(self, connect, get_collection_name):
collection_name = get_collection_name
ids = [1]
with pytest.raises(Exception):
res = connect.get_entity_by_id(collection_name, ids)
@pytest.mark.level(2)
def test_get_entities_with_invalid_field_name(self, connect, collection, get_field_name):
field_name = get_field_name
ids = [1]
fields = [field_name]
with pytest.raises(Exception):
res = connect.get_entity_by_id(collection, ids, fields = fields)