test: Add cases for restful about default and none (#37943)

issue: #36129

Signed-off-by: qixuan <673771573@qq.com>
This commit is contained in:
qixuan 2024-11-25 14:06:33 +08:00 committed by GitHub
parent e76802f910
commit 4cd79323bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 570 additions and 0 deletions

View File

@ -670,6 +670,40 @@ class TestCreateCollection(TestBase):
assert rsp['code'] == 0
assert rsp['data']['collectionName'] == name
def test_create_collections_with_nullable_default(self):
"""
target: test create collection
method: create a collection with default none
expected: create collection success
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}, "defaultValue": 100},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"},
"nullable": True, "defaultValue": "123"},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
}
}
logging.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
assert rsp['code'] == 0
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
# describe collection
rsp = client.collection_describe(name)
assert rsp['code'] == 0
assert rsp['data']['fields'][2]['defaultValue'] == {'Data': {'StringData': '123'}}
assert rsp['data']['fields'][2]['nullable'] is True
@pytest.mark.L1
class TestCreateCollectionNegative(TestBase):
@ -736,6 +770,125 @@ class TestCreateCollectionNegative(TestBase):
assert rsp['code'] == 1100
assert "Invalid collection name" in rsp['message'] or "invalid parameter" in rsp['message']
def test_create_collections_with_partition_key_nullable(self):
"""
partition key field not support nullable
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}, "isPartitionKey": True,
"nullable": True},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
}
}
logging.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
assert rsp['code'] == 1100
assert "partition key field not support nullable" in rsp['message']
def test_create_collections_with_vector_nullable(self):
"""
vector field not support nullable
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"},
"nullable": True}
]
}
}
logging.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
assert rsp['code'] == 1100
assert "vector type not support null" in rsp['message']
def test_create_collections_with_primary_default(self):
"""
primary key field not support defaultValue
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {},
"defaultValue": 123},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
}
}
logging.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
assert rsp['code'] == 1100
assert "primary field not support default_value" in rsp['message']
def test_create_collections_with_json_field_default(self):
"""
json field not support default value
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "json", "dataType": "JSON", "elementTypeParams": {}, "defaultValue": {"key": 1}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
}
}
logging.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
assert rsp['code'] == 1100
assert "convert defaultValue fail" in rsp['message']
def test_create_collections_with_array_field_default(self):
"""
array field not support default value
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "int_array", "dataType": "Array", "elementDataType": "Int64", "defaultValue": [1, 2],
"elementTypeParams": {"max_capacity": "1024"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
}
}
logging.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
assert rsp['code'] == 1100
assert "convert defaultValue fail" in rsp['message']
@pytest.mark.L0
class TestHasCollections(TestBase):

View File

@ -687,6 +687,86 @@ class TestInsertVector(TestBase):
assert rsp['code'] == 0
assert len(rsp['data']) == 50
@pytest.mark.parametrize("insert_round", [1])
@pytest.mark.parametrize("auto_id", [True, False])
@pytest.mark.parametrize("is_partition_key", [True, False])
@pytest.mark.parametrize("enable_dynamic_schema", [True, False])
@pytest.mark.parametrize("nb", [3000])
@pytest.mark.parametrize("dim", [128])
def test_insert_entities_with_default_none(self, nb, dim, insert_round, auto_id, is_partition_key,
enable_dynamic_schema):
"""
Insert a vector with defaultValue and none
"""
# create a collection
name = gen_collection_name()
payload = {
"collectionName": name,
"schema": {
"autoId": auto_id,
"enableDynamicField": enable_dynamic_schema,
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": is_partition_key,
"elementTypeParams": {}, "defaultValue": 10},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}, "nullable": True},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"},
"defaultValue": "default", "nullable": True},
{"fieldName": "json", "dataType": "JSON", "elementTypeParams": {}, "nullable": True},
{"fieldName": "varchar_array", "dataType": "Array", "elementDataType": "VarChar",
"elementTypeParams": {"max_capacity": "1024", "max_length": "256"}, "nullable": True},
{"fieldName": "text_emb", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}},
]
},
"indexParams": [
{"fieldName": "text_emb", "indexName": "text_emb", "metricType": "L2"},
]
}
rsp = self.collection_client.collection_create(payload)
assert rsp['code'] == 0
rsp = self.collection_client.collection_describe(name)
logger.info(f"rsp: {rsp}")
assert rsp['code'] == 0
# insert data
for k in range(insert_round):
data = []
for i in range(nb):
if auto_id:
tmp = {
"user_id": None,
"word_count": None,
"book_describe": None,
"json": None,
"varchar_array": None,
"text_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[0].tolist(),
}
else:
tmp = {
"book_id": i,
"user_id": None,
"word_count": None,
"book_describe": None,
"json": None,
"varchar_array": None,
"text_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[0].tolist(),
}
if enable_dynamic_schema:
tmp.update({f"dynamic_field_{i}": i})
data.append(tmp)
payload = {
"collectionName": name,
"data": data,
}
rsp = self.vector_client.vector_insert(payload)
assert rsp['code'] == 0
assert rsp['data']['insertCount'] == nb
# query data to make sure the data is inserted
rsp = self.vector_client.vector_query({"collectionName": name, "filter": "user_id > 0", "limit": 5})
assert rsp['code'] == 0
assert len(rsp['data']) == 5
assert rsp['data'][0]['book_describe'] == 'default'
assert rsp['data'][0]['word_count'] is None
assert rsp['data'][0]['json'] is None
@pytest.mark.L0
class TestInsertVectorNegative(TestBase):
@ -780,6 +860,44 @@ class TestInsertVectorNegative(TestBase):
assert rsp['code'] == 1804
assert "fail to deal the insert data" in rsp['message']
def test_insert_entities_with_none_no_nullable_field(self):
"""
Insert a vector with none no nullable field
"""
# create a collection
name = gen_collection_name()
payload = {
"collectionName": name,
"schema": {
"autoId": True,
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "text_emb", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{128}"}},
]
}
}
rsp = self.collection_client.collection_create(payload)
assert rsp['code'] == 0
rsp = self.collection_client.collection_describe(name)
logger.info(f"rsp: {rsp}")
assert rsp['code'] == 0
# insert data
data = []
for i in range(10):
tmp = {
"word_count": i if i % 2 else None,
"text_emb": preprocessing.normalize([np.array([random.random() for _ in range(128)])])[0].tolist(),
}
data.append(tmp)
payload = {
"collectionName": name,
"data": data,
}
rsp = self.vector_client.vector_insert(payload)
assert rsp['code'] == 1804
assert "fail to deal the insert data" in rsp['message']
@pytest.mark.L0
class TestUpsertVector(TestBase):
@ -948,6 +1066,85 @@ class TestUpsertVector(TestBase):
res = utility.get_query_segment_info(name)
logger.info(f"res: {res}")
@pytest.mark.parametrize("insert_round", [2])
@pytest.mark.parametrize("nb", [3000])
@pytest.mark.parametrize("dim", [128])
@pytest.mark.parametrize("id_type", ["Int64", "VarChar"])
def test_upsert_vector_with_default_none(self, nb, dim, insert_round, id_type):
# create a collection
name = gen_collection_name()
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": f"{id_type}", "isPrimary": True, "elementTypeParams": {"max_length": "256"}},
{"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}, "defaultValue": 123},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"},
"nullable": True},
{"fieldName": "text_emb", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
},
"indexParams": [{"fieldName": "text_emb", "indexName": "text_emb_index", "metricType": "L2"}]
}
rsp = self.collection_client.collection_create(payload)
assert rsp['code'] == 0
rsp = self.collection_client.collection_describe(name)
logger.info(f"rsp: {rsp}")
assert rsp['code'] == 0
# insert data
for i in range(insert_round):
data = []
for j in range(nb):
tmp = {
"book_id": i * nb + j if id_type == "Int64" else f"{i * nb + j}",
"user_id": i * nb + j,
"word_count": i * nb + j,
"book_describe": f"book_{i * nb + j}",
"text_emb": preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist()
}
data.append(tmp)
payload = {
"collectionName": name,
"data": data,
}
body_size = sys.getsizeof(json.dumps(payload))
logger.info(f"body size: {body_size / 1024 / 1024} MB")
rsp = self.vector_client.vector_insert(payload)
assert rsp['code'] == 0
assert rsp['data']['insertCount'] == nb
c = Collection(name)
c.flush()
# upsert data
for i in range(insert_round):
data = []
for j in range(nb):
tmp = {
"book_id": i * nb + j if id_type == "Int64" else f"{i * nb + j}",
"user_id": i * nb + j + 1,
"word_count": None,
"book_describe": None,
"text_emb": preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist()
}
data.append(tmp)
payload = {
"collectionName": name,
"data": data,
}
body_size = sys.getsizeof(json.dumps(payload))
logger.info(f"body size: {body_size / 1024 / 1024} MB")
rsp = self.vector_client.vector_upsert(payload)
# query data to make sure the data is updated
if id_type == "Int64":
rsp = self.vector_client.vector_query({"collectionName": name, "filter": "book_id > 0"})
if id_type == "VarChar":
rsp = self.vector_client.vector_query({"collectionName": name, "filter": "book_id > '0'"})
for data in rsp['data']:
assert data['user_id'] == int(data['book_id']) + 1
assert data['word_count'] == 123
assert data['book_describe'] is None
@pytest.mark.L0
class TestUpsertVectorNegative(TestBase):
@ -981,6 +1178,45 @@ class TestUpsertVectorNegative(TestBase):
assert rsp['code'] == 100
assert "can't find collection" in rsp['message']
def test_upsert_entities_with_none_no_nullable_field(self):
"""
Insert a vector with none no nullable field
"""
# create a collection
name = gen_collection_name()
payload = {
"collectionName": name,
"schema": {
"autoId": True,
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "text_emb", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{128}"}},
]
}
}
rsp = self.collection_client.collection_create(payload)
assert rsp['code'] == 0
rsp = self.collection_client.collection_describe(name)
logger.info(f"rsp: {rsp}")
assert rsp['code'] == 0
# insert data
data = []
for i in range(10):
tmp = {
"word_count": i if i % 2 else None,
"text_emb": preprocessing.normalize([np.array([random.random() for _ in range(128)])])[0].tolist(),
}
data.append(tmp)
payload = {
"collectionName": name,
"data": data,
}
rsp = self.vector_client.vector_upsert(payload)
assert rsp['code'] == 1804
assert "fail to deal the insert data" in rsp['message']
@pytest.mark.L0
class TestSearchVector(TestBase):
@ -2055,6 +2291,88 @@ class TestSearchVector(TestBase):
for d in rsp['data']:
assert token in d[field]
@pytest.mark.parametrize("insert_round", [1])
@pytest.mark.parametrize("auto_id", [True])
@pytest.mark.parametrize("is_partition_key", [True])
@pytest.mark.parametrize("enable_dynamic_schema", [True])
@pytest.mark.parametrize("nb", [3000])
@pytest.mark.parametrize("dim", [128])
@pytest.mark.parametrize("nq", [1, 2])
@pytest.mark.parametrize("metric_type", ['COSINE', "L2", "IP"])
def test_search_vector_with_default_none(self, nb, dim, insert_round, auto_id, is_partition_key,
enable_dynamic_schema, nq, metric_type):
"""
Insert a vector with default and none
"""
# create a collection
name = gen_collection_name()
payload = {
"collectionName": name,
"schema": {
"autoId": auto_id,
"enableDynamicField": enable_dynamic_schema,
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": is_partition_key,
"elementTypeParams": {}, "defaultValue": 8888},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}, "nullable": True},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"},
"nullable": True, "defaultValue": "8888"},
{"fieldName": "float_vector", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}},
]
},
"indexParams": [
{"fieldName": "float_vector", "indexName": "float_vector", "metricType": metric_type},
]
}
rsp = self.collection_client.collection_create(payload)
assert rsp['code'] == 0
rsp = self.collection_client.collection_describe(name)
logger.info(f"rsp: {rsp}")
assert rsp['code'] == 0
# insert data
for i in range(insert_round):
data = []
for i in range(nb):
if auto_id:
tmp = {
"user_id": i % 100 if i % 2 else None,
"word_count": None,
"book_describe": None,
"float_vector": gen_vector(datatype="FloatVector", dim=dim),
}
else:
tmp = {
"book_id": i,
"user_id": i % 100 if i % 2 else None,
"word_count": None,
"book_describe": None,
"float_vector": gen_vector(datatype="FloatVector", dim=dim),
}
if enable_dynamic_schema:
tmp.update({f"dynamic_field_{i}": i})
data.append(tmp)
payload = {
"collectionName": name,
"data": data,
}
rsp = self.vector_client.vector_insert(payload)
assert rsp['code'] == 0
assert rsp['data']['insertCount'] == nb
# search data
payload = {
"collectionName": name,
"data": [gen_vector(datatype="FloatVector", dim=dim) for _ in range(nq)],
"filter": "book_id >= 0",
# "groupingField": "user_id",
"outputFields": ["*"],
"limit": 100,
}
rsp = self.vector_client.vector_search(payload)
assert rsp['code'] == 0
assert rsp['data'][0]['book_describe'] == "8888"
assert rsp['data'][0]['word_count'] is None
assert len(rsp['data']) == 100 * nq
@pytest.mark.L0
class TestSearchVectorNegative(TestBase):
@ -2905,6 +3223,105 @@ class TestQueryVector(TestBase):
for d in rsp['data']:
assert token in d[field]
@pytest.mark.parametrize("insert_round", [1])
@pytest.mark.parametrize("auto_id", [True])
@pytest.mark.parametrize("is_partition_key", [True])
@pytest.mark.parametrize("enable_dynamic_schema", [True])
@pytest.mark.parametrize("nb", [3000])
@pytest.mark.parametrize("dim", [128])
def test_query_entities_with_default_none(self, nb, dim, insert_round, auto_id, is_partition_key,
enable_dynamic_schema):
"""
Insert a vector with default and none
"""
# create a collection
name = gen_collection_name()
payload = {
"collectionName": name,
"schema": {
"autoId": auto_id,
"enableDynamicField": enable_dynamic_schema,
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": is_partition_key,
"elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}, "defaultValue": 8888},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "25536"},
"nullable": True},
{"fieldName": "bool", "dataType": "Bool", "elementTypeParams": {}, "nullable": True},
{"fieldName": "json", "dataType": "JSON", "elementTypeParams": {}, "nullable": True},
{"fieldName": "int_array", "dataType": "Array", "elementDataType": "Int64",
"elementTypeParams": {"max_capacity": "1024"}, "nullable": True},
{"fieldName": "varchar_array", "dataType": "Array", "elementDataType": "VarChar",
"elementTypeParams": {"max_capacity": "1024", "max_length": "256"}, "nullable": True},
{"fieldName": "bool_array", "dataType": "Array", "elementDataType": "Bool",
"elementTypeParams": {"max_capacity": "1024"}, "nullable": True},
{"fieldName": "text_emb", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}},
]
},
"indexParams": [
{"fieldName": "text_emb", "indexName": "text_emb", "metricType": "L2"},
]
}
rsp = self.collection_client.collection_create(payload)
assert rsp['code'] == 0
rsp = self.collection_client.collection_describe(name)
logger.info(f"rsp: {rsp}")
assert rsp['code'] == 0
# insert data
for i in range(insert_round):
data = []
for i in range(nb):
if auto_id:
tmp = {
"user_id": i,
"word_count": None,
"book_describe": None,
"bool": random.choice([True, False]),
"json": None,
"int_array": None,
"varchar_array": None,
"bool_array": None,
"text_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[0].tolist(),
}
else:
tmp = {
"book_id": i,
"user_id": i,
"word_count": None,
"book_describe": None,
"bool": random.choice([True, False]),
"json": None,
"int_array": None,
"varchar_array": None,
"bool_array": None,
"text_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[0].tolist(),
}
if enable_dynamic_schema:
tmp.update({f"dynamic_field_{i}": i})
data.append(tmp)
payload = {
"collectionName": name,
"data": data,
}
rsp = self.vector_client.vector_insert(payload)
assert rsp['code'] == 0
assert rsp['data']['insertCount'] == nb
# query data to make sure the data is inserted
payload = {
"collectionName": name,
"filter": "user_id > 0",
"limit": 50,
"outputFields": ["*"]
}
rsp = self.vector_client.vector_query(payload)
assert rsp['code'] == 0
assert rsp['data'][0]['book_describe'] is None
assert rsp['data'][0]['word_count'] == 8888
assert rsp['data'][0]['json'] is None
assert rsp['data'][0]['varchar_array'] is None
assert len(rsp['data']) == 50
@pytest.mark.L0
class TestQueryVectorNegative(TestBase):