mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
[skip ci] update segment_size to segment_row_count (#3068)
* [skip ci] update segment_size to segment_row_count Signed-off-by: zw <zw@milvus.io> * [skip ci] remove metric type in collection params Signed-off-by: zw <zw@milvus.io> Co-authored-by: zw <zw@milvus.io> Co-authored-by: Wang XiangYu <xy.wang@zilliz.com>
This commit is contained in:
parent
86652416c5
commit
52a59396ce
@ -15,7 +15,7 @@ dim = 128
|
||||
tag = "tag"
|
||||
collection_id = "count_collection"
|
||||
add_interval_time = 3
|
||||
segment_size = 10
|
||||
segment_row_count = 5000
|
||||
default_fields = gen_default_fields()
|
||||
entities = gen_entities(nb)
|
||||
raw_vectors, binary_entities = gen_binary_entities(nb)
|
||||
@ -31,7 +31,7 @@ class TestCollectionCount:
|
||||
scope="function",
|
||||
params=[
|
||||
1,
|
||||
5000,
|
||||
4000,
|
||||
6001
|
||||
],
|
||||
)
|
||||
@ -186,7 +186,7 @@ class TestCollectionCountIP:
|
||||
scope="function",
|
||||
params=[
|
||||
1,
|
||||
5000,
|
||||
4000,
|
||||
6001
|
||||
],
|
||||
)
|
||||
@ -341,7 +341,7 @@ class TestCollectionCountBinary:
|
||||
scope="function",
|
||||
params=[
|
||||
1,
|
||||
5000,
|
||||
4000,
|
||||
6001
|
||||
],
|
||||
)
|
||||
@ -507,7 +507,7 @@ class TestCollectionMultiCollections:
|
||||
scope="function",
|
||||
params=[
|
||||
1,
|
||||
5000,
|
||||
4000,
|
||||
6001
|
||||
],
|
||||
)
|
||||
@ -564,7 +564,8 @@ class TestCollectionMultiCollections:
|
||||
res = connect.count_entities(collection_list[i])
|
||||
assert res == insert_count
|
||||
|
||||
def test_collection_count_multi_collections_binary(self, connect, jac_collection, insert_count):
|
||||
# TODO:
|
||||
def _test_collection_count_multi_collections_binary(self, connect, jac_collection, insert_count):
|
||||
'''
|
||||
target: test collection rows_count is correct or not with multiple collections of JACCARD
|
||||
method: create collection and add entities in it,
|
||||
@ -587,7 +588,8 @@ class TestCollectionMultiCollections:
|
||||
res = connect.count_entities(collection_list[i])
|
||||
assert res == insert_count
|
||||
|
||||
def test_collection_count_multi_collections_mix(self, connect):
|
||||
# TODO:
|
||||
def _test_collection_count_multi_collections_mix(self, connect):
|
||||
'''
|
||||
target: test collection rows_count is correct or not with multiple collections of JACCARD
|
||||
method: create collection and add entities in it,
|
||||
|
||||
@ -7,9 +7,9 @@ from multiprocessing import Process
|
||||
from utils import *
|
||||
|
||||
dim = 128
|
||||
default_segment_size = 1024
|
||||
default_segment_row_count = 100000
|
||||
drop_collection_interval_time = 3
|
||||
segment_size = 10
|
||||
segment_row_count = 5000
|
||||
collection_id = "logic"
|
||||
vectors = gen_vectors(100, dim)
|
||||
default_fields = gen_default_fields()
|
||||
|
||||
@ -7,7 +7,7 @@ import pytest
|
||||
from utils import *
|
||||
|
||||
dim = 128
|
||||
segment_size = 10
|
||||
segment_row_count = 5000
|
||||
nprobe = 1
|
||||
top_k = 1
|
||||
epsilon = 0.0001
|
||||
|
||||
@ -13,9 +13,9 @@ from utils import *
|
||||
nb = 1
|
||||
dim = 128
|
||||
collection_id = "create_collection"
|
||||
default_segment_size = 1024
|
||||
default_segment_row_count = 100000
|
||||
drop_collection_interval_time = 3
|
||||
segment_size = 10
|
||||
segment_row_count = 5000
|
||||
default_fields = gen_default_fields()
|
||||
entities = gen_entities(nb)
|
||||
|
||||
@ -42,9 +42,9 @@ class TestCreateCollection:
|
||||
|
||||
@pytest.fixture(
|
||||
scope="function",
|
||||
params=gen_segment_sizes()
|
||||
params=gen_segment_row_counts()
|
||||
)
|
||||
def get_segment_size(self, request):
|
||||
def get_segment_row_count(self, request):
|
||||
yield request.param
|
||||
|
||||
def test_create_collection_fields(self, connect, get_filter_field, get_vector_field):
|
||||
@ -59,7 +59,7 @@ class TestCreateCollection:
|
||||
collection_name = gen_unique_str(collection_id)
|
||||
fields = {
|
||||
"fields": [filter_field, vector_field],
|
||||
"segment_size": segment_size
|
||||
"segment_row_count": segment_row_count
|
||||
}
|
||||
logging.getLogger().info(fields)
|
||||
connect.create_collection(collection_name, fields)
|
||||
@ -77,20 +77,20 @@ class TestCreateCollection:
|
||||
collection_name = gen_unique_str(collection_id)
|
||||
fields = {
|
||||
"fields": [filter_field, vector_field],
|
||||
"segment_size": segment_size
|
||||
"segment_row_count": segment_row_count
|
||||
}
|
||||
connect.create_collection(collection_name, fields)
|
||||
assert connect.has_collection(collection_name)
|
||||
|
||||
def test_create_collection_segment_size(self, connect, get_segment_size):
|
||||
def test_create_collection_segment_row_count(self, connect, get_segment_row_count):
|
||||
'''
|
||||
target: test create normal collection with different fields
|
||||
method: create collection with diff segment_size
|
||||
method: create collection with diff segment_row_count
|
||||
expected: no exception raised
|
||||
'''
|
||||
collection_name = gen_unique_str(collection_id)
|
||||
fields = copy.deepcopy(default_fields)
|
||||
fields["segment_size"] = get_segment_size
|
||||
fields["segment_row_count"] = get_segment_row_count
|
||||
connect.create_collection(collection_name, fields)
|
||||
assert connect.has_collection(collection_name)
|
||||
|
||||
@ -197,7 +197,7 @@ class TestCreateCollectionInvalid(object):
|
||||
scope="function",
|
||||
params=gen_invalid_ints()
|
||||
)
|
||||
def get_segment_size(self, request):
|
||||
def get_segment_row_count(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(
|
||||
@ -222,20 +222,20 @@ class TestCreateCollectionInvalid(object):
|
||||
yield request.param
|
||||
|
||||
@pytest.mark.level(2)
|
||||
def test_create_collection_with_invalid_segment_size(self, connect, get_segment_size):
|
||||
def test_create_collection_with_invalid_segment_row_count(self, connect, get_segment_row_count):
|
||||
collection_name = gen_unique_str()
|
||||
fields = copy.deepcopy(default_fields)
|
||||
fields["segment_size"] = get_segment_size
|
||||
fields["segment_row_count"] = get_segment_row_count
|
||||
with pytest.raises(Exception) as e:
|
||||
connect.create_collection(collection_name, fields)
|
||||
|
||||
@pytest.mark.level(2)
|
||||
def test_create_collection_with_invalid_metric_type(self, connect, get_metric_type):
|
||||
collection_name = gen_unique_str()
|
||||
fields = copy.deepcopy(default_fields)
|
||||
fields["fields"][-1]["params"]["metric_type"] = get_metric_type
|
||||
with pytest.raises(Exception) as e:
|
||||
connect.create_collection(collection_name, fields)
|
||||
# @pytest.mark.level(2)
|
||||
# def test_create_collection_with_invalid_metric_type(self, connect, get_metric_type):
|
||||
# collection_name = gen_unique_str()
|
||||
# fields = copy.deepcopy(default_fields)
|
||||
# fields["fields"][-1]["params"]["metric_type"] = get_metric_type
|
||||
# with pytest.raises(Exception) as e:
|
||||
# connect.create_collection(collection_name, fields)
|
||||
|
||||
@pytest.mark.level(2)
|
||||
def test_create_collection_with_invalid_dimension(self, connect, get_dim):
|
||||
@ -285,34 +285,33 @@ class TestCreateCollectionInvalid(object):
|
||||
with pytest.raises(Exception) as e:
|
||||
connect.create_collection(collection_name, fields)
|
||||
|
||||
def test_create_collection_no_segment_size(self, connect):
|
||||
def test_create_collection_no_segment_row_count(self, connect):
|
||||
'''
|
||||
target: test create collection with no segment_size params
|
||||
target: test create collection with no segment_row_count params
|
||||
method: create collection with corrent params
|
||||
expected: use default default_segment_size
|
||||
expected: use default default_segment_row_count
|
||||
'''
|
||||
collection_name = gen_unique_str(collection_id)
|
||||
fields = copy.deepcopy(default_fields)
|
||||
fields.pop("segment_size")
|
||||
fields.pop("segment_row_count")
|
||||
connect.create_collection(collection_name, fields)
|
||||
res = connect.get_collection_info(collection_name)
|
||||
logging.getLogger().info(res)
|
||||
assert res["segment_size"] == default_segment_size
|
||||
assert res["segment_row_count"] == default_segment_row_count
|
||||
|
||||
# TODO:
|
||||
def _test_create_collection_no_metric_type(self, connect):
|
||||
'''
|
||||
target: test create collection with no metric_type params
|
||||
method: create collection with corrent params
|
||||
expected: use default L2
|
||||
'''
|
||||
collection_name = gen_unique_str(collection_id)
|
||||
fields = copy.deepcopy(default_fields)
|
||||
fields["fields"][-1]["params"].pop("metric_type")
|
||||
connect.create_collection(collection_name, fields)
|
||||
res = connect.get_collection_info(collection_name)
|
||||
logging.getLogger().info(res)
|
||||
assert res["metric_type"] == "L2"
|
||||
# def _test_create_collection_no_metric_type(self, connect):
|
||||
# '''
|
||||
# target: test create collection with no metric_type params
|
||||
# method: create collection with corrent params
|
||||
# expected: use default L2
|
||||
# '''
|
||||
# collection_name = gen_unique_str(collection_id)
|
||||
# fields = copy.deepcopy(default_fields)
|
||||
# fields["fields"][-1]["params"].pop("metric_type")
|
||||
# connect.create_collection(collection_name, fields)
|
||||
# res = connect.get_collection_info(collection_name)
|
||||
# logging.getLogger().info(res)
|
||||
# assert res["metric_type"] == "L2"
|
||||
|
||||
# TODO: assert exception
|
||||
def test_create_collection_limit_fields(self, connect):
|
||||
|
||||
@ -9,7 +9,7 @@ from utils import *
|
||||
|
||||
collection_id = "info"
|
||||
default_fields = gen_default_fields()
|
||||
segment_size = 10
|
||||
segment_row_count = 5000
|
||||
|
||||
|
||||
class TestInfoBase:
|
||||
@ -30,9 +30,9 @@ class TestInfoBase:
|
||||
|
||||
@pytest.fixture(
|
||||
scope="function",
|
||||
params=gen_segment_sizes()
|
||||
params=gen_segment_row_counts()
|
||||
)
|
||||
def get_segment_size(self, request):
|
||||
def get_segment_row_count(self, request):
|
||||
yield request.param
|
||||
|
||||
"""
|
||||
@ -53,7 +53,7 @@ class TestInfoBase:
|
||||
collection_name = gen_unique_str(collection_id)
|
||||
fields = {
|
||||
"fields": [filter_field, vector_field],
|
||||
"segment_size": segment_size
|
||||
"segment_row_count": segment_row_count
|
||||
}
|
||||
connect.create_collection(collection_name, fields)
|
||||
res = connect.get_collection_info(collection_name)
|
||||
@ -64,15 +64,15 @@ class TestInfoBase:
|
||||
# assert dimension
|
||||
|
||||
# TODO
|
||||
def test_create_collection_segment_size(self, connect, get_segment_size):
|
||||
def test_create_collection_segment_row_count(self, connect, get_segment_row_count):
|
||||
'''
|
||||
target: test create normal collection with different fields
|
||||
method: create collection with diff segment_size
|
||||
method: create collection with diff segment_row_count
|
||||
expected: no exception raised
|
||||
'''
|
||||
collection_name = gen_unique_str(collection_id)
|
||||
fields = copy.deepcopy(default_fields)
|
||||
fields["segment_size"] = get_segment_size
|
||||
fields["segment_row_count"] = get_segment_row_count
|
||||
connect.create_collection(collection_name, fields)
|
||||
# assert segment size
|
||||
|
||||
@ -141,7 +141,7 @@ class TestInfoBase:
|
||||
collection_name = gen_unique_str(collection_id)
|
||||
fields = {
|
||||
"fields": [filter_field, vector_field],
|
||||
"segment_size": segment_size
|
||||
"segment_row_count": segment_row_count
|
||||
}
|
||||
connect.create_collection(collection_name, fields)
|
||||
# insert
|
||||
@ -153,15 +153,15 @@ class TestInfoBase:
|
||||
# assert dimension
|
||||
|
||||
# TODO
|
||||
def test_create_collection_segment_size_after_insert(self, connect, get_segment_size):
|
||||
def test_create_collection_segment_row_count_after_insert(self, connect, get_segment_row_count):
|
||||
'''
|
||||
target: test create normal collection with different fields
|
||||
method: create collection with diff segment_size
|
||||
method: create collection with diff segment_row_count
|
||||
expected: no exception raised
|
||||
'''
|
||||
collection_name = gen_unique_str(collection_id)
|
||||
fields = copy.deepcopy(default_fields)
|
||||
fields["segment_size"] = get_segment_size
|
||||
fields["segment_row_count"] = get_segment_row_count
|
||||
connect.create_collection(collection_name, fields)
|
||||
# insert
|
||||
# assert segment size
|
||||
|
||||
@ -10,7 +10,7 @@ from utils import *
|
||||
|
||||
|
||||
dim = 128
|
||||
segment_size = 10
|
||||
segment_row_count = 5000
|
||||
collection_id = "test_delete"
|
||||
DELETE_TIMEOUT = 60
|
||||
tag = "1970-01-01"
|
||||
|
||||
@ -11,7 +11,7 @@ from utils import *
|
||||
|
||||
|
||||
dim = 128
|
||||
segment_size = 10
|
||||
segment_row_count = 5000
|
||||
collection_id = "test_get"
|
||||
DELETE_TIMEOUT = 60
|
||||
tag = "1970-01-01"
|
||||
|
||||
@ -9,7 +9,7 @@ from milvus import DataType
|
||||
from utils import *
|
||||
|
||||
dim = 128
|
||||
segment_size = 10
|
||||
segment_row_count = 5000
|
||||
collection_id = "test_insert"
|
||||
ADD_TIMEOUT = 60
|
||||
tag = "1970-01-01"
|
||||
@ -209,7 +209,7 @@ class TestInsertBase:
|
||||
collection_name = gen_unique_str("test_collection")
|
||||
fields = {
|
||||
"fields": [filter_field, vector_field],
|
||||
"segment_size": segment_size
|
||||
"segment_row_count": segment_row_count
|
||||
}
|
||||
connect.create_collection(collection_name, fields)
|
||||
ids = [i for i in range(nb)]
|
||||
@ -283,7 +283,7 @@ class TestInsertBase:
|
||||
collection_name = gen_unique_str("test_collection")
|
||||
fields = {
|
||||
"fields": [filter_field, vector_field],
|
||||
"segment_size": segment_size
|
||||
"segment_row_count": segment_row_count
|
||||
}
|
||||
connect.create_collection(collection_name, fields)
|
||||
entities = gen_entities_by_fields(fields["fields"], nb, dim)
|
||||
|
||||
@ -8,7 +8,7 @@ import pytest
|
||||
from utils import *
|
||||
|
||||
dim = 128
|
||||
segment_size = 100
|
||||
segment_row_count = 100000
|
||||
nb = 6000
|
||||
tag = "1970-01-01"
|
||||
field_name = "float_vector"
|
||||
|
||||
@ -11,7 +11,7 @@ from milvus import DataType
|
||||
from utils import *
|
||||
|
||||
dim = 128
|
||||
segment_size = 10
|
||||
segment_row_count = 5000
|
||||
top_k_limit = 2048
|
||||
collection_id = "search"
|
||||
tag = "1970-01-01"
|
||||
|
||||
@ -13,7 +13,7 @@ nprobe = 1
|
||||
top_k = 1
|
||||
tag = "1970-01-01"
|
||||
nb = 6000
|
||||
segment_size = 10
|
||||
segment_row_count = 5000
|
||||
entity = gen_entities(1)
|
||||
entities = gen_entities(nb)
|
||||
raw_vector, binary_entity = gen_binary_entities(1)
|
||||
|
||||
@ -7,7 +7,7 @@ import pytest
|
||||
from utils import *
|
||||
|
||||
dim = 128
|
||||
segment_size = 10
|
||||
segment_row_count = 5000
|
||||
index_file_size = 10
|
||||
collection_id = "test_flush"
|
||||
DELETE_TIMEOUT = 60
|
||||
@ -155,7 +155,7 @@ class TestFlushBase:
|
||||
collection_new = gen_unique_str("test_flush")
|
||||
fields = {
|
||||
"fields": [filter_field, vector_field],
|
||||
"segment_size": segment_size
|
||||
"segment_row_count": segment_row_count
|
||||
}
|
||||
connect.create_collection(collection_new, fields)
|
||||
connect.create_partition(collection, tag)
|
||||
|
||||
@ -9,7 +9,7 @@ from utils import *
|
||||
|
||||
|
||||
dim = 128
|
||||
segment_size = 10
|
||||
segment_row_count = 5000
|
||||
collection_id = "partition"
|
||||
nprobe = 1
|
||||
tag = "1970-01-01"
|
||||
|
||||
@ -8,7 +8,7 @@ from utils import *
|
||||
|
||||
dim = 128
|
||||
collection_id = "test_wal"
|
||||
segment_size = 10
|
||||
segment_row_count = 5000
|
||||
WAL_TIMEOUT = 60
|
||||
tag = "1970-01-01"
|
||||
insert_interval_time = 1.5
|
||||
|
||||
@ -16,7 +16,7 @@ epsilon = 0.000001
|
||||
default_flush_interval = 1
|
||||
big_flush_interval = 1000
|
||||
dimension = 128
|
||||
segment_size = 10
|
||||
segment_row_count = 5000
|
||||
|
||||
# TODO:
|
||||
all_index_types = [
|
||||
@ -199,14 +199,9 @@ def gen_single_filter_fields():
|
||||
|
||||
def gen_single_vector_fields():
|
||||
fields = []
|
||||
for metric_type in ['HAMMING', 'IP', 'JACCARD', 'L2', 'SUBSTRUCTURE', 'SUPERSTRUCTURE', 'TANIMOTO']:
|
||||
for data_type in [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR]:
|
||||
if metric_type in ["L2", "IP"] and data_type == DataType.BINARY_VECTOR:
|
||||
continue
|
||||
if metric_type not in ["L2", "IP"] and data_type == DataType.FLOAT_VECTOR:
|
||||
continue
|
||||
field = {"field": data_type.name, "type": data_type, "params": {"metric_type": metric_type, "dim": dimension}}
|
||||
fields.append(field)
|
||||
for data_type in [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR]:
|
||||
field = {"field": data_type.name, "type": data_type, "params": {"dim": dimension}}
|
||||
fields.append(field)
|
||||
return fields
|
||||
|
||||
|
||||
@ -215,9 +210,9 @@ def gen_default_fields():
|
||||
"fields": [
|
||||
{"field": "int64", "type": DataType.INT64},
|
||||
{"field": "float", "type": DataType.FLOAT},
|
||||
{"field": "vector", "type": DataType.FLOAT_VECTOR, "params": {"metric_type": "L2", "dim": dimension}}
|
||||
{"field": "vector", "type": DataType.FLOAT_VECTOR, "params": {"dim": dimension}}
|
||||
],
|
||||
"segment_size": segment_size
|
||||
"segment_row_count": segment_row_count
|
||||
}
|
||||
return default_fields
|
||||
|
||||
@ -311,14 +306,14 @@ def add_vector_field(entities, is_normal=False):
|
||||
return entities
|
||||
|
||||
|
||||
def update_fields_metric_type(fields, metric_type):
|
||||
tmp_fields = copy.deepcopy(fields)
|
||||
if metric_type in ["L2", "IP"]:
|
||||
tmp_fields["fields"][-1]["type"] = DataType.FLOAT_VECTOR
|
||||
else:
|
||||
tmp_fields["fields"][-1]["type"] = DataType.BINARY_VECTOR
|
||||
tmp_fields["fields"][-1]["params"]["metric_type"] = metric_type
|
||||
return tmp_fields
|
||||
# def update_fields_metric_type(fields, metric_type):
|
||||
# tmp_fields = copy.deepcopy(fields)
|
||||
# if metric_type in ["L2", "IP"]:
|
||||
# tmp_fields["fields"][-1]["type"] = DataType.FLOAT_VECTOR
|
||||
# else:
|
||||
# tmp_fields["fields"][-1]["type"] = DataType.BINARY_VECTOR
|
||||
# tmp_fields["fields"][-1]["params"]["metric_type"] = metric_type
|
||||
# return tmp_fields
|
||||
|
||||
|
||||
def remove_field(entities):
|
||||
@ -363,7 +358,7 @@ def add_vector_field(nb, dimension=dimension):
|
||||
return field_name
|
||||
|
||||
|
||||
def gen_segment_sizes():
|
||||
def gen_segment_row_counts():
|
||||
sizes = [
|
||||
1,
|
||||
2,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user