mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-28 22:45:26 +08:00
related: #45993 This commit extends nullable vector support to the proxy layer, querynode, and adds comprehensive validation, search reduce, and field data handling for nullable vectors with sparse storage. Proxy layer changes: - Update validate_util.go checkAligned() with getExpectedVectorRows() helper to validate nullable vector field alignment using valid data count - Update checkFloatVectorFieldData/checkSparseFloatVectorFieldData for nullable vector validation with proper row count expectations - Add FieldDataIdxComputer in typeutil/schema.go for logical-to-physical index translation during search reduce operations - Update search_reduce_util.go reduceSearchResultData to use idxComputers for correct field data indexing with nullable vectors - Update task.go, task_query.go, task_upsert.go for nullable vector handling - Update msg_pack.go with nullable vector field data processing QueryNode layer changes: - Update segments/result.go for nullable vector result handling - Update segments/search_reduce.go with nullable vector offset translation Storage and index changes: - Update data_codec.go and utils.go for nullable vector serialization - Update indexcgowrapper/dataset.go and index.go for nullable vector indexing Utility changes: - Add FieldDataIdxComputer struct with Compute() method for efficient logical-to-physical index mapping across multiple field data - Update EstimateEntitySize() and AppendFieldData() with fieldIdxs parameter - Update funcutil.go with nullable vector support functions <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Full support for nullable vector fields (float, binary, float16, bfloat16, int8, sparse) across ingest, storage, indexing, search and retrieval; logical↔physical offset mapping preserves row semantics. * Client: compaction control and compaction-state APIs. * **Bug Fixes** * Improved validation for adding vector fields (nullable + dimension checks) and corrected search/query behavior for nullable vectors. * **Chores** * Persisted validity maps with indexes and on-disk formats. * **Tests** * Extensive new and updated end-to-end nullable-vector tests. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub> <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: marcelo-cjl <marcelo.chen@zilliz.com>
2072 lines
80 KiB
Python
2072 lines
80 KiB
Python
import datetime
|
|
import logging
|
|
import time
|
|
from utils.util_log import test_log as logger
|
|
from utils.utils import gen_collection_name, gen_vector
|
|
import pytest
|
|
from api.milvus import CollectionClient
|
|
from base.testbase import TestBase
|
|
import threading
|
|
from utils.utils import get_data_by_payload
|
|
from pymilvus import (
|
|
FieldSchema, CollectionSchema, DataType,
|
|
Collection
|
|
)
|
|
|
|
|
|
@pytest.mark.L0
|
|
class TestCreateCollection(TestBase):
|
|
|
|
@pytest.mark.parametrize("dim", [128])
|
|
def test_create_collections_quick_setup(self, dim):
|
|
"""
|
|
target: test create collection
|
|
method: create a collection with a simple schema
|
|
expected: create collection success
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"dimension": dim,
|
|
}
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# describe collection
|
|
rsp = client.collection_describe(name)
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['collectionName'] == name
|
|
assert rsp['data']['autoId'] is False
|
|
assert rsp['data']['enableDynamicField'] is True
|
|
assert "COSINE" in str(rsp['data']["indexes"])
|
|
|
|
@pytest.mark.parametrize("dim", [128])
|
|
@pytest.mark.parametrize("metric_type", ["L2", "COSINE", "IP"])
|
|
@pytest.mark.parametrize("id_type", ["Int64", "VarChar"])
|
|
@pytest.mark.parametrize("primary_field", ["id", "url"])
|
|
@pytest.mark.parametrize("vector_field", ["vector", "embedding"])
|
|
def test_create_collection_quick_setup_with_custom(self, vector_field, primary_field, dim, id_type, metric_type):
|
|
"""
|
|
Insert a vector with a simple payload
|
|
"""
|
|
# create a collection
|
|
name = gen_collection_name()
|
|
collection_payload = {
|
|
"collectionName": name,
|
|
"dimension": dim,
|
|
"metricType": metric_type,
|
|
"primaryFieldName": primary_field,
|
|
"vectorFieldName": vector_field,
|
|
"idType": id_type,
|
|
}
|
|
if id_type == "VarChar":
|
|
collection_payload["params"] = {"max_length": "256"}
|
|
rsp = self.collection_client.collection_create(collection_payload)
|
|
assert rsp['code'] == 0
|
|
rsp = self.collection_client.collection_describe(name)
|
|
logger.info(f"rsp: {rsp}")
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['collectionName'] == name
|
|
fields = [f["name"] for f in rsp['data']['fields']]
|
|
assert primary_field in fields
|
|
assert vector_field in fields
|
|
for f in rsp['data']['fields']:
|
|
if f['name'] == primary_field:
|
|
assert f['type'] == id_type
|
|
assert f['primaryKey'] is True
|
|
for index in rsp['data']['indexes']:
|
|
assert index['metricType'] == metric_type
|
|
|
|
@pytest.mark.parametrize("enable_dynamic_field", [False, "False", "0"])
|
|
@pytest.mark.parametrize("request_shards_num", [2, "2"])
|
|
@pytest.mark.parametrize("request_ttl_seconds", [360, "360"])
|
|
def test_create_collections_without_params(self, enable_dynamic_field, request_shards_num, request_ttl_seconds):
|
|
"""
|
|
target: test create collection
|
|
method: create a collection with a simple schema
|
|
expected: create collection success
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
metric_type = "COSINE"
|
|
client = self.collection_client
|
|
num_shards = 2
|
|
consistency_level = "Strong"
|
|
ttl_seconds = 360
|
|
payload = {
|
|
"collectionName": name,
|
|
"dimension": dim,
|
|
"metricType": metric_type,
|
|
"params": {
|
|
"enableDynamicField": enable_dynamic_field,
|
|
"shardsNum": request_shards_num,
|
|
"consistencyLevel": f"{consistency_level}",
|
|
"ttlSeconds": request_ttl_seconds,
|
|
},
|
|
}
|
|
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# describe collection by pymilvus
|
|
c = Collection(name)
|
|
res = c.describe()
|
|
logger.info(f"describe collection: {res}")
|
|
# describe collection
|
|
time.sleep(10)
|
|
rsp = client.collection_describe(name)
|
|
logger.info(f"describe collection: {rsp}")
|
|
|
|
ttl_seconds_actual = None
|
|
for d in rsp["data"]["properties"]:
|
|
if d["key"] == "collection.ttl.seconds":
|
|
ttl_seconds_actual = int(d["value"])
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['enableDynamicField'] == False
|
|
assert rsp['data']['collectionName'] == name
|
|
assert rsp['data']['shardsNum'] == num_shards
|
|
assert rsp['data']['consistencyLevel'] == consistency_level
|
|
assert ttl_seconds_actual == ttl_seconds
|
|
|
|
@pytest.mark.parametrize("primary_key_field", ["book_id"])
|
|
@pytest.mark.parametrize("partition_key_field", ["word_count"])
|
|
@pytest.mark.parametrize("clustering_key_field", ["book_category"])
|
|
@pytest.mark.parametrize("shardsNum", [4])
|
|
@pytest.mark.parametrize("partitionsNum", [16])
|
|
@pytest.mark.parametrize("ttl_seconds", [60])
|
|
@pytest.mark.parametrize("metric_type", ["L2", "COSINE", "IP"])
|
|
@pytest.mark.parametrize("consistency_level", ["Strong", "Bounded"])
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
@pytest.mark.parametrize("index_type", ["AUTOINDEX", "IVF_SQ8", "HNSW"])
|
|
@pytest.mark.parametrize("dim", [128])
|
|
def test_create_collections_with_all_params(self,
|
|
dim,
|
|
index_type,
|
|
enable_dynamic_field,
|
|
consistency_level,
|
|
metric_type,
|
|
ttl_seconds,
|
|
partitionsNum,
|
|
shardsNum,
|
|
clustering_key_field,
|
|
partition_key_field,
|
|
primary_key_field,
|
|
):
|
|
"""
|
|
target: test create collection
|
|
method: create a collection with a simple schema
|
|
expected: create collection success
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = dim
|
|
metric_type = metric_type
|
|
client = self.collection_client
|
|
num_shards = shardsNum
|
|
num_partitions = partitionsNum
|
|
consistency_level = consistency_level
|
|
ttl_seconds = ttl_seconds
|
|
index_param_map = {
|
|
"FLAT": {},
|
|
"IVF_SQ8": {"nlist": 16384},
|
|
"HNSW": {"M": 16, "efConstruction": 500},
|
|
"BM25_SPARSE_INVERTED_INDEX": {"bm25_k1": 0.5, "bm25_b": 0.5},
|
|
"AUTOINDEX": {}
|
|
}
|
|
|
|
payload = {
|
|
"collectionName": name,
|
|
"params": {
|
|
"shardsNum": f"{num_shards}",
|
|
"partitionsNum": f"{num_partitions}",
|
|
"consistencyLevel": f"{consistency_level}",
|
|
"ttlSeconds": f"{ttl_seconds}",
|
|
},
|
|
"schema": {
|
|
"enableDynamicField": enable_dynamic_field,
|
|
"fields": [
|
|
{"fieldName": "user_id", "dataType": "Int64", "elementTypeParams": {}},
|
|
{"fieldName": "book_id", "dataType": "Int64",
|
|
"isPrimary": primary_key_field == "book_id", "elementTypeParams": {}},
|
|
{"fieldName": "word_count", "dataType": "Int64",
|
|
"isPartitionKey": partition_key_field == "word_count",
|
|
"isClusteringKey": clustering_key_field == "word_count", "elementTypeParams": {}},
|
|
{"fieldName": "book_category", "dataType": "Int64",
|
|
"isPartitionKey": partition_key_field == "book_category",
|
|
"isClusteringKey": clustering_key_field == "book_category", "elementTypeParams": {}},
|
|
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
|
|
{"fieldName": "document_content", "dataType": "VarChar",
|
|
"elementTypeParams": {"max_length": "1000",
|
|
"enable_analyzer": True,
|
|
"analyzer_params": {
|
|
"tokenizer": "standard"
|
|
},
|
|
"enable_match": True}},
|
|
{"fieldName": "json", "dataType": "JSON", "elementTypeParams": {}},
|
|
{"fieldName": "int_array", "dataType": "Array", "elementDataType": "Int64",
|
|
"elementTypeParams": {"max_capacity": "1024"}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}},
|
|
{"fieldName": "sparse_vector", "dataType": "SparseFloatVector"}
|
|
],
|
|
"functions": [
|
|
{
|
|
"name": "bm25_fn",
|
|
"type": "BM25",
|
|
"inputFieldNames": ["document_content"],
|
|
"outputFieldNames": ["sparse_vector"],
|
|
"params": {}
|
|
}
|
|
]
|
|
},
|
|
"indexParams": [
|
|
{"fieldName": "book_intro",
|
|
"indexName": "book_intro_vector",
|
|
"metricType": f"{metric_type}",
|
|
"indexType": index_type,
|
|
"params": index_param_map[index_type]
|
|
},
|
|
{"fieldName": "sparse_vector",
|
|
"indexName": "sparse_vector_index",
|
|
"metricType": "BM25",
|
|
"indexType": "SPARSE_INVERTED_INDEX",
|
|
"params": index_param_map["BM25_SPARSE_INVERTED_INDEX"]
|
|
}
|
|
]
|
|
}
|
|
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# describe collection by pymilvus
|
|
c = Collection(name)
|
|
res = c.describe()
|
|
logger.info(f"pymilvus describe collection: {res}")
|
|
# describe collection
|
|
time.sleep(10)
|
|
rsp = client.collection_describe(name)
|
|
logger.info(f"restful describe collection: {rsp}")
|
|
|
|
ttl_seconds_actual = None
|
|
for d in rsp["data"]["properties"]:
|
|
if d["key"] == "collection.ttl.seconds":
|
|
ttl_seconds_actual = int(d["value"])
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['collectionName'] == name
|
|
assert rsp['data']['enableDynamicField'] == enable_dynamic_field
|
|
assert rsp['data']['shardsNum'] == num_shards
|
|
assert rsp['data']['partitionsNum'] == num_partitions
|
|
assert rsp['data']['consistencyLevel'] == consistency_level
|
|
assert ttl_seconds_actual == ttl_seconds
|
|
assert len(rsp['data']["functions"]) == len(payload["schema"]["functions"])
|
|
#
|
|
# # check fields properties
|
|
fields = rsp['data']['fields']
|
|
assert len(fields) == len(payload['schema']['fields'])
|
|
for field in fields:
|
|
if field['name'] == primary_key_field:
|
|
assert field['primaryKey'] is True
|
|
if field['name'] == partition_key_field:
|
|
assert field['partitionKey'] is True
|
|
if field['name'] == clustering_key_field:
|
|
assert field['clusteringKey'] is True
|
|
|
|
# check index
|
|
index_info = [index.to_dict() for index in c.indexes]
|
|
logger.info(f"index_info: {index_info}")
|
|
assert len(index_info) == 2
|
|
for index in index_info:
|
|
index_param = index["index_param"]
|
|
if index_param["index_type"] == "SPARSE_INVERTED_INDEX":
|
|
assert index_param["metric_type"] == "BM25"
|
|
assert index_param.get("params", {}) == index_param_map["BM25_SPARSE_INVERTED_INDEX"]
|
|
else:
|
|
assert index_param["metric_type"] == metric_type
|
|
assert index_param["index_type"] == index_type
|
|
assert index_param.get("params", {}) == index_param_map[index_type]
|
|
|
|
@pytest.mark.parametrize("auto_id", [True, False])
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
|
@pytest.mark.parametrize("enable_partition_key", [True, False])
|
|
@pytest.mark.parametrize("dim", [128])
|
|
def test_create_collections_custom_without_index(self, dim, auto_id, enable_dynamic_field, enable_partition_key):
|
|
"""
|
|
target: test create collection
|
|
method: create a collection with a simple schema
|
|
expected: create collection success
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"autoId": auto_id,
|
|
"enableDynamicField": enable_dynamic_field,
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": enable_partition_key,
|
|
"elementTypeParams": {}},
|
|
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
|
|
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}},
|
|
{"fieldName": "image_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}},
|
|
]
|
|
}
|
|
}
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
c = Collection(name)
|
|
logger.info(f"schema: {c.schema}")
|
|
# describe collection
|
|
rsp = client.collection_describe(name)
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['collectionName'] == name
|
|
assert rsp['data']['autoId'] == auto_id
|
|
assert c.schema.auto_id == auto_id
|
|
assert rsp['data']['enableDynamicField'] == enable_dynamic_field
|
|
assert c.schema.enable_dynamic_field == enable_dynamic_field
|
|
# assert no index created
|
|
indexes = rsp['data']['indexes']
|
|
assert len(indexes) == 0
|
|
# assert not loaded
|
|
assert rsp['data']['load'] == "LoadStateNotLoad"
|
|
for field in rsp['data']['fields']:
|
|
if field['name'] == "user_id":
|
|
assert field['partitionKey'] == enable_partition_key
|
|
for field in c.schema.fields:
|
|
if field.name == "user_id":
|
|
assert field.is_partition_key == enable_partition_key
|
|
|
|
@pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"])
|
|
@pytest.mark.parametrize("dim", [128])
|
|
def test_create_collections_one_float_vector_with_index(self, dim, metric_type):
|
|
"""
|
|
target: test create collection
|
|
method: create a collection with a simple schema
|
|
expected: create collection success
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
|
|
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
},
|
|
"indexParams": [
|
|
{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": f"{metric_type}"}]
|
|
}
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# describe collection
|
|
time.sleep(10)
|
|
rsp = client.collection_describe(name)
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['collectionName'] == name
|
|
# assert index created
|
|
indexes = rsp['data']['indexes']
|
|
assert len(indexes) == len(payload['indexParams'])
|
|
# assert load success
|
|
assert rsp['data']['load'] == "LoadStateLoaded"
|
|
|
|
@pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"])
|
|
@pytest.mark.parametrize("dim", [128])
|
|
def test_create_collections_multi_float_vector_with_one_index(self, dim, metric_type):
|
|
"""
|
|
target: test create collection
|
|
method: create a collection with a simple schema
|
|
expected: create collection success
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
|
|
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}},
|
|
{"fieldName": "image_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
},
|
|
"indexParams": [
|
|
{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": f"{metric_type}"}]
|
|
}
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 65535
|
|
rsp = client.collection_list()
|
|
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# describe collection
|
|
time.sleep(10)
|
|
rsp = client.collection_describe(name)
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['collectionName'] == name
|
|
# assert index created
|
|
indexes = rsp['data']['indexes']
|
|
assert len(indexes) == len(payload['indexParams'])
|
|
# assert load success
|
|
assert rsp['data']['load'] == "LoadStateNotLoad"
|
|
|
|
@pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"])
|
|
@pytest.mark.parametrize("dim", [128])
|
|
def test_create_collections_multi_float_vector_with_all_index(self, dim, metric_type):
|
|
"""
|
|
target: test create collection
|
|
method: create a collection with a simple schema
|
|
expected: create collection success
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
|
|
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}},
|
|
{"fieldName": "image_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
},
|
|
"indexParams": [
|
|
{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": f"{metric_type}"},
|
|
{"fieldName": "image_intro", "indexName": "image_intro_vector", "metricType": f"{metric_type}"}]
|
|
}
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# describe collection
|
|
time.sleep(10)
|
|
rsp = client.collection_describe(name)
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['collectionName'] == name
|
|
# assert index created
|
|
indexes = rsp['data']['indexes']
|
|
assert len(indexes) == len(payload['indexParams'])
|
|
# assert load success
|
|
assert rsp['data']['load'] in ["LoadStateLoaded", "LoadStateLoading"]
|
|
|
|
@pytest.mark.parametrize("auto_id", [True])
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True])
|
|
@pytest.mark.parametrize("enable_partition_key", [True])
|
|
@pytest.mark.parametrize("dim", [128])
|
|
@pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"])
|
|
def test_create_collections_float16_vector_datatype(self, dim, auto_id, enable_dynamic_field, enable_partition_key,
|
|
metric_type):
|
|
"""
|
|
target: test create collection
|
|
method: create a collection with a simple schema
|
|
expected: create collection success
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"autoId": auto_id,
|
|
"enableDynamicField": enable_dynamic_field,
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "float16_vector", "dataType": "Float16Vector",
|
|
"elementTypeParams": {"dim": f"{dim}"}},
|
|
{"fieldName": "bfloat16_vector", "dataType": "BFloat16Vector",
|
|
"elementTypeParams": {"dim": f"{dim}"}},
|
|
]
|
|
},
|
|
"indexParams": [
|
|
{"fieldName": "float16_vector", "indexName": "float16_vector_index", "metricType": f"{metric_type}"},
|
|
{"fieldName": "bfloat16_vector", "indexName": "bfloat16_vector_index", "metricType": f"{metric_type}"}]
|
|
|
|
}
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
c = Collection(name)
|
|
logger.info(f"schema: {c.schema}")
|
|
# describe collection
|
|
rsp = client.collection_describe(name)
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['collectionName'] == name
|
|
assert len(rsp['data']['fields']) == len(c.schema.fields)
|
|
|
|
@pytest.mark.parametrize("auto_id", [True])
|
|
@pytest.mark.parametrize("enable_dynamic_field", [True])
|
|
@pytest.mark.parametrize("enable_partition_key", [True])
|
|
@pytest.mark.parametrize("dim", [128])
|
|
@pytest.mark.parametrize("metric_type", ["JACCARD", "HAMMING"])
|
|
def test_create_collections_binary_vector_datatype(self, dim, auto_id, enable_dynamic_field, enable_partition_key,
|
|
metric_type):
|
|
"""
|
|
target: test create collection
|
|
method: create a collection with a simple schema
|
|
expected: create collection success
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"autoId": auto_id,
|
|
"enableDynamicField": enable_dynamic_field,
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "binary_vector", "dataType": "BinaryVector", "elementTypeParams": {"dim": f"{dim}"}},
|
|
]
|
|
},
|
|
"indexParams": [
|
|
{"fieldName": "binary_vector", "indexName": "binary_vector_index", "metricType": f"{metric_type}"}
|
|
]
|
|
|
|
}
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
c = Collection(name)
|
|
logger.info(f"schema: {c.schema}")
|
|
# describe collection
|
|
rsp = client.collection_describe(name)
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['collectionName'] == name
|
|
assert len(rsp['data']['fields']) == len(c.schema.fields)
|
|
|
|
def test_create_collections_concurrent_with_same_param(self):
|
|
"""
|
|
target: test create collection with same param
|
|
method: concurrent create collections with same param with multi thread
|
|
expected: create collections all success
|
|
"""
|
|
concurrent_rsp = []
|
|
|
|
def create_collection(c_name, vector_dim, c_metric_type):
|
|
collection_payload = {
|
|
"collectionName": c_name,
|
|
"dimension": vector_dim,
|
|
"metricType": c_metric_type,
|
|
}
|
|
rsp = client.collection_create(collection_payload)
|
|
concurrent_rsp.append(rsp)
|
|
logger.info(rsp)
|
|
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
metric_type = "L2"
|
|
client = self.collection_client
|
|
threads = []
|
|
for i in range(10):
|
|
t = threading.Thread(target=create_collection, args=(name, dim, metric_type,))
|
|
threads.append(t)
|
|
for t in threads:
|
|
t.start()
|
|
for t in threads:
|
|
t.join()
|
|
time.sleep(10)
|
|
success_cnt = 0
|
|
for rsp in concurrent_rsp:
|
|
if rsp['code'] == 0:
|
|
success_cnt += 1
|
|
logger.info(concurrent_rsp)
|
|
assert success_cnt == 10
|
|
rsp = client.collection_list()
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# describe collection
|
|
rsp = client.collection_describe(name)
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['collectionName'] == name
|
|
|
|
def test_create_collections_concurrent_with_different_param(self):
|
|
"""
|
|
target: test create collection with different param
|
|
method: concurrent create collections with different param with multi thread
|
|
expected: only one collection can success
|
|
"""
|
|
concurrent_rsp = []
|
|
|
|
def create_collection(c_name, vector_dim, c_metric_type):
|
|
collection_payload = {
|
|
"collectionName": c_name,
|
|
"dimension": vector_dim,
|
|
"metricType": c_metric_type,
|
|
}
|
|
rsp = client.collection_create(collection_payload)
|
|
concurrent_rsp.append(rsp)
|
|
logger.info(rsp)
|
|
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
threads = []
|
|
for i in range(0, 5):
|
|
t = threading.Thread(target=create_collection, args=(name, dim + i, "L2",))
|
|
threads.append(t)
|
|
for i in range(5, 10):
|
|
t = threading.Thread(target=create_collection, args=(name, dim + i, "IP",))
|
|
threads.append(t)
|
|
for t in threads:
|
|
t.start()
|
|
for t in threads:
|
|
t.join()
|
|
time.sleep(10)
|
|
success_cnt = 0
|
|
for rsp in concurrent_rsp:
|
|
if rsp['code'] == 0:
|
|
success_cnt += 1
|
|
logger.info(concurrent_rsp)
|
|
assert success_cnt == 1
|
|
rsp = client.collection_list()
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# describe collection
|
|
rsp = client.collection_describe(name)
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['collectionName'] == name
|
|
|
|
def test_create_collections_with_nullable_default(self):
|
|
"""
|
|
target: test create collection
|
|
method: create a collection with default none
|
|
expected: create collection success
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}, "defaultValue": 100},
|
|
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"},
|
|
"nullable": True, "defaultValue": "123"},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
}
|
|
}
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# describe collection
|
|
rsp = client.collection_describe(name)
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['fields'][2]['defaultValue'] == {'Data': {'StringData': '123'}}
|
|
assert rsp['data']['fields'][2]['nullable'] is True
|
|
|
|
|
|
@pytest.mark.L1
|
|
class TestCreateCollectionNegative(TestBase):
|
|
|
|
def test_create_collections_custom_with_invalid_datatype(self):
|
|
"""
|
|
VARCHAR is not a valid data type, it should be VarChar
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
|
|
{"fieldName": "book_describe", "dataType": "VARCHAR", "elementTypeParams": {"max_length": "256"}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
}
|
|
}
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 1100
|
|
|
|
def test_create_collections_custom_with_invalid_params(self):
|
|
"""
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"enableDynamicField": 1,
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
|
|
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
}
|
|
}
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 1801
|
|
|
|
@pytest.mark.parametrize("name",
|
|
[" ", "test_collection_" * 100, "test collection", "test/collection", "test\collection"])
|
|
def test_create_collections_with_invalid_collection_name(self, name):
|
|
"""
|
|
target: test create collection with invalid collection name
|
|
method: create collections with invalid collection name
|
|
expected: create collection failed with right error message
|
|
"""
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"dimension": dim,
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 1100
|
|
assert "Invalid collection name" in rsp['message'] or "invalid parameter" in rsp['message']
|
|
|
|
def test_create_collections_with_partition_key_nullable(self):
|
|
"""
|
|
partition key field not support nullable
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}, "isPartitionKey": True,
|
|
"nullable": True},
|
|
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
}
|
|
}
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 1100
|
|
assert "partition key field not support nullable" in rsp['message']
|
|
|
|
def test_create_collections_with_primary_default(self):
|
|
"""
|
|
primary key field not support defaultValue
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {},
|
|
"defaultValue": 123},
|
|
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
}
|
|
}
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 1100
|
|
assert "primary field not support default_value" in rsp['message']
|
|
|
|
def test_create_collections_with_json_field_default(self):
|
|
"""
|
|
json field not support default value
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
|
|
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
|
|
{"fieldName": "json", "dataType": "JSON", "elementTypeParams": {}, "defaultValue": {"key": 1}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
}
|
|
}
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 1100
|
|
assert "convert defaultValue fail" in rsp['message']
|
|
|
|
def test_create_collections_with_array_field_default(self):
|
|
"""
|
|
array field not support default value
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
|
|
{"fieldName": "int_array", "dataType": "Array", "elementDataType": "Int64", "defaultValue": [1, 2],
|
|
"elementTypeParams": {"max_capacity": "1024"}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
}
|
|
}
|
|
logging.info(f"create collection {name} with payload: {payload}")
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 1100
|
|
assert "convert defaultValue fail" in rsp['message']
|
|
|
|
|
|
@pytest.mark.L0
|
|
class TestHasCollections(TestBase):
|
|
|
|
def test_has_collections_default(self):
|
|
"""
|
|
target: test list collection with a simple schema
|
|
method: create collections and list them
|
|
expected: created collections are in list
|
|
"""
|
|
client = self.collection_client
|
|
name_list = []
|
|
for i in range(2):
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
payload = {
|
|
"collectionName": name,
|
|
"metricType": "L2",
|
|
"dimension": dim,
|
|
}
|
|
time.sleep(1)
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
name_list.append(name)
|
|
rsp = client.collection_list()
|
|
all_collections = rsp['data']
|
|
for name in name_list:
|
|
assert name in all_collections
|
|
rsp = client.collection_has(collection_name=name)
|
|
assert rsp['data']['has'] is True
|
|
|
|
def test_has_collections_with_not_exist_name(self):
|
|
"""
|
|
target: test list collection with a simple schema
|
|
method: create collections and list them
|
|
expected: created collections are in list
|
|
"""
|
|
client = self.collection_client
|
|
name_list = []
|
|
for i in range(2):
|
|
name = gen_collection_name()
|
|
name_list.append(name)
|
|
rsp = client.collection_list()
|
|
all_collections = rsp['data']
|
|
for name in name_list:
|
|
assert name not in all_collections
|
|
rsp = client.collection_has(collection_name=name)
|
|
assert rsp['data']['has'] is False
|
|
|
|
|
|
@pytest.mark.L0
|
|
class TestGetCollectionStats(TestBase):
|
|
|
|
def test_get_collections_stats(self):
|
|
"""
|
|
target: test list collection with a simple schema
|
|
method: create collections and list them
|
|
expected: created collections are in list
|
|
"""
|
|
client = self.collection_client
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
payload = {
|
|
"collectionName": name,
|
|
"metricType": "L2",
|
|
"dimension": dim,
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
# describe collection
|
|
client.collection_describe(collection_name=name)
|
|
rsp = client.collection_stats(collection_name=name)
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['rowCount'] == 0
|
|
# insert data
|
|
nb = 3000
|
|
data = get_data_by_payload(payload, nb)
|
|
payload = {
|
|
"collectionName": name,
|
|
"data": data
|
|
}
|
|
self.vector_client.vector_insert(payload=payload)
|
|
c = Collection(name)
|
|
count = c.query(expr="", output_fields=["count(*)"])
|
|
logger.info(f"count: {count}")
|
|
c.flush()
|
|
rsp = client.collection_stats(collection_name=name)
|
|
assert rsp['data']['rowCount'] == nb
|
|
|
|
|
|
@pytest.mark.L0
|
|
class TestLoadReleaseCollection(TestBase):
|
|
|
|
def test_load_and_release_collection(self):
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
|
|
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
}
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
# create index before load
|
|
index_params = [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}]
|
|
payload = {
|
|
"collectionName": name,
|
|
"indexParams": index_params
|
|
}
|
|
rsp = self.index_client.index_create(payload)
|
|
|
|
# get load state before load
|
|
rsp = client.collection_load_state(collection_name=name)
|
|
assert rsp['data']['loadState'] == "LoadStateNotLoad"
|
|
|
|
# describe collection
|
|
client.collection_describe(collection_name=name)
|
|
rsp = client.collection_load(collection_name=name)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_load_state(collection_name=name)
|
|
assert rsp['data']['loadState'] in ["LoadStateLoaded", "LoadStateLoading"]
|
|
time.sleep(5)
|
|
rsp = client.collection_load_state(collection_name=name)
|
|
assert rsp['data']['loadState'] == "LoadStateLoaded"
|
|
|
|
# release collection
|
|
rsp = client.collection_release(collection_name=name)
|
|
time.sleep(5)
|
|
rsp = client.collection_load_state(collection_name=name)
|
|
assert rsp['data']['loadState'] == "LoadStateNotLoad"
|
|
|
|
|
|
@pytest.mark.L0
|
|
class TestGetCollectionLoadState(TestBase):
|
|
|
|
def test_get_collection_load_state(self):
|
|
"""
|
|
target: test list collection with a simple schema
|
|
method: create collections and list them
|
|
expected: created collections are in list
|
|
"""
|
|
client = self.collection_client
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
payload = {
|
|
"collectionName": name,
|
|
"metricType": "L2",
|
|
"dimension": dim,
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
# describe collection
|
|
client.collection_describe(collection_name=name)
|
|
rsp = client.collection_load_state(collection_name=name)
|
|
assert rsp['code'] == 0
|
|
t0 = time.time()
|
|
while time.time() - t0 < 10:
|
|
rsp = client.collection_load_state(collection_name=name)
|
|
if rsp['data']['loadState'] != "LoadStateNotLoad":
|
|
break
|
|
time.sleep(1)
|
|
assert rsp['data']['loadState'] in ["LoadStateLoading", "LoadStateLoaded"]
|
|
# insert data
|
|
nb = 3000
|
|
data = get_data_by_payload(payload, nb)
|
|
payload = {
|
|
"collectionName": name,
|
|
"data": data
|
|
}
|
|
self.vector_client.vector_insert(payload=payload)
|
|
rsp = client.collection_load_state(collection_name=name)
|
|
assert rsp['data']['loadState'] in ["LoadStateLoading", "LoadStateLoaded"]
|
|
time.sleep(10)
|
|
rsp = client.collection_load_state(collection_name=name)
|
|
assert rsp['data']['loadState'] == "LoadStateLoaded"
|
|
|
|
|
|
@pytest.mark.L0
|
|
class TestListCollections(TestBase):
|
|
|
|
def test_list_collections_default(self):
|
|
"""
|
|
target: test list collection with a simple schema
|
|
method: create collections and list them
|
|
expected: created collections are in list
|
|
"""
|
|
client = self.collection_client
|
|
name_list = []
|
|
for i in range(2):
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
payload = {
|
|
"collectionName": name,
|
|
"metricType": "L2",
|
|
"dimension": dim,
|
|
}
|
|
time.sleep(1)
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
name_list.append(name)
|
|
rsp = client.collection_list()
|
|
all_collections = rsp['data']
|
|
for name in name_list:
|
|
assert name in all_collections
|
|
|
|
|
|
@pytest.mark.L0
|
|
class TestDescribeCollection(TestBase):
|
|
|
|
def test_describe_collections_default(self):
|
|
"""
|
|
target: test describe collection with a simple schema
|
|
method: describe collection
|
|
expected: info of description is same with param passed to create collection
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"dimension": dim,
|
|
"metricType": "L2"
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# describe collection
|
|
rsp = client.collection_describe(name)
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['collectionName'] == name
|
|
assert rsp['data']['autoId'] is False
|
|
assert rsp['data']['enableDynamicField'] is True
|
|
assert len(rsp['data']['indexes']) == 1
|
|
|
|
def test_describe_collections_custom(self):
|
|
"""
|
|
target: test describe collection with a simple schema
|
|
method: describe collection
|
|
expected: info of description is same with param passed to create collection
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
fields = [
|
|
FieldSchema(name='reviewer_id', dtype=DataType.INT64, description="", is_primary=True),
|
|
FieldSchema(name='store_address', dtype=DataType.VARCHAR, description="", max_length=512,
|
|
is_partition_key=True),
|
|
FieldSchema(name='review', dtype=DataType.VARCHAR, description="", max_length=16384),
|
|
FieldSchema(name='vector', dtype=DataType.FLOAT_VECTOR, description="", dim=384, is_index=True),
|
|
]
|
|
|
|
schema = CollectionSchema(
|
|
fields=fields,
|
|
description="",
|
|
enable_dynamic_field=True,
|
|
# The following is an alternative to setting `is_partition_key` in a field schema.
|
|
partition_key_field="store_address"
|
|
)
|
|
|
|
collection = Collection(
|
|
name=name,
|
|
schema=schema,
|
|
)
|
|
logger.info(f"schema: {schema}")
|
|
rsp = client.collection_list()
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# describe collection
|
|
rsp = client.collection_describe(name)
|
|
assert rsp['code'] == 0
|
|
assert rsp['data']['collectionName'] == name
|
|
|
|
for field in rsp['data']['fields']:
|
|
if field['name'] == "store_address":
|
|
assert field['partitionKey'] is True
|
|
if field['name'] == "reviewer_id":
|
|
assert field['primaryKey'] is True
|
|
assert rsp['data']['autoId'] is False
|
|
assert rsp['data']['enableDynamicField'] is True
|
|
|
|
|
|
@pytest.mark.L0
|
|
class TestDescribeCollectionNegative(TestBase):
|
|
|
|
def test_describe_collections_with_invalid_collection_name(self):
|
|
"""
|
|
target: test describe collection with invalid collection name
|
|
method: describe collection with invalid collection name
|
|
expected: raise error with right error code and message
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"dimension": dim,
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# describe collection
|
|
invalid_name = "invalid_name"
|
|
rsp = client.collection_describe(invalid_name)
|
|
assert rsp['code'] == 100
|
|
assert "can't find collection" in rsp['message']
|
|
|
|
|
|
@pytest.mark.L0
|
|
class TestDropCollection(TestBase):
|
|
def test_drop_collections_default(self):
|
|
"""
|
|
Drop a collection with a simple schema
|
|
target: test drop collection with a simple schema
|
|
method: drop collection
|
|
expected: dropped collection was not in collection list
|
|
"""
|
|
clo_list = []
|
|
for i in range(5):
|
|
time.sleep(1)
|
|
name = 'test_collection_' + datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f_%f")
|
|
payload = {
|
|
"collectionName": name,
|
|
"dimension": 128,
|
|
"metricType": "L2"
|
|
}
|
|
rsp = self.collection_client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
clo_list.append(name)
|
|
rsp = self.collection_client.collection_list()
|
|
all_collections = rsp['data']
|
|
for name in clo_list:
|
|
assert name in all_collections
|
|
for name in clo_list:
|
|
time.sleep(0.2)
|
|
payload = {
|
|
"collectionName": name,
|
|
}
|
|
rsp = self.collection_client.collection_drop(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = self.collection_client.collection_list()
|
|
all_collections = rsp['data']
|
|
for name in clo_list:
|
|
assert name not in all_collections
|
|
|
|
|
|
@pytest.mark.L0
|
|
class TestDropCollectionNegative(TestBase):
|
|
|
|
def test_drop_collections_with_invalid_collection_name(self):
|
|
"""
|
|
target: test drop collection with invalid collection name
|
|
method: drop collection with invalid collection name
|
|
expected: raise error with right error code and message
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"dimension": dim,
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# drop collection
|
|
invalid_name = "invalid_name"
|
|
payload = {
|
|
"collectionName": invalid_name,
|
|
}
|
|
rsp = client.collection_drop(payload)
|
|
assert rsp['code'] == 0
|
|
|
|
|
|
@pytest.mark.L0
|
|
class TestRenameCollection(TestBase):
|
|
|
|
def test_rename_collection(self):
|
|
"""
|
|
target: test rename collection
|
|
method: rename collection
|
|
expected: renamed collection is in collection list
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"metricType": "L2",
|
|
"dimension": dim,
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
new_name = gen_collection_name()
|
|
payload = {
|
|
"collectionName": name,
|
|
"newCollectionName": new_name,
|
|
}
|
|
rsp = client.collection_rename(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
all_collections = rsp['data']
|
|
assert new_name in all_collections
|
|
assert name not in all_collections
|
|
|
|
|
|
@pytest.mark.L1
|
|
class TestCollectionWithAuth(TestBase):
|
|
def test_drop_collections_with_invalid_api_key(self):
|
|
"""
|
|
target: test drop collection with invalid api key
|
|
method: drop collection with invalid api key
|
|
expected: raise error with right error code and message; collection still in collection list
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"dimension": dim,
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# drop collection
|
|
payload = {
|
|
"collectionName": name,
|
|
}
|
|
illegal_client = CollectionClient(self.endpoint, "invalid_api_key")
|
|
rsp = illegal_client.collection_drop(payload)
|
|
assert rsp['code'] == 1800
|
|
rsp = client.collection_list()
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
|
|
def test_describe_collections_with_invalid_api_key(self):
|
|
"""
|
|
target: test describe collection with invalid api key
|
|
method: describe collection with invalid api key
|
|
expected: raise error with right error code and message
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"dimension": dim,
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_list()
|
|
all_collections = rsp['data']
|
|
assert name in all_collections
|
|
# describe collection
|
|
illegal_client = CollectionClient(self.endpoint, "illegal_api_key")
|
|
rsp = illegal_client.collection_describe(name)
|
|
assert rsp['code'] == 1800
|
|
|
|
def test_list_collections_with_invalid_api_key(self):
|
|
"""
|
|
target: test list collection with an invalid api key
|
|
method: list collection with invalid api key
|
|
expected: raise error with right error code and message
|
|
"""
|
|
client = self.collection_client
|
|
name_list = []
|
|
for i in range(2):
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
payload = {
|
|
"collectionName": name,
|
|
"metricType": "L2",
|
|
"dimension": dim,
|
|
}
|
|
time.sleep(1)
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
name_list.append(name)
|
|
client = self.collection_client
|
|
client.api_key = "illegal_api_key"
|
|
rsp = client.collection_list()
|
|
assert rsp['code'] == 1800
|
|
|
|
def test_create_collections_with_invalid_api_key(self):
|
|
"""
|
|
target: test create collection with invalid api key(wrong username and password)
|
|
method: create collections with invalid api key
|
|
expected: create collection failed
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
client.api_key = "illegal_api_key"
|
|
payload = {
|
|
"collectionName": name,
|
|
"dimension": dim,
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 1800
|
|
|
|
|
|
@pytest.mark.L0
|
|
class TestCollectionProperties(TestBase):
|
|
"""Test collection property operations"""
|
|
|
|
def test_refresh_load_collection(self):
|
|
"""
|
|
target: test refresh load collection
|
|
method: create collection, refresh load
|
|
expected: refresh load success
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"dimension": dim,
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
|
|
# release collection
|
|
client.collection_release(collection_name=name)
|
|
# load collection
|
|
client.collection_load(collection_name=name)
|
|
client.wait_load_completed(collection_name=name)
|
|
# refresh load
|
|
rsp = client.refresh_load(collection_name=name)
|
|
|
|
assert rsp['code'] == 0
|
|
|
|
def test_alter_collection_properties(self):
|
|
"""
|
|
target: test alter collection properties
|
|
method: create collection, alter properties
|
|
expected: alter properties success
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"dimension": dim,
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
client.collection_release(collection_name=name)
|
|
# alter properties
|
|
properties = {"mmap.enabled": "true"}
|
|
rsp = client.alter_collection_properties(name, properties)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_describe(name)
|
|
enabled_mmap = False
|
|
for prop in rsp['data']['properties']:
|
|
if prop['key'] == "mmap.enabled":
|
|
assert prop['value'] == "true"
|
|
enabled_mmap = True
|
|
assert enabled_mmap
|
|
|
|
def test_drop_collection_properties(self):
|
|
"""
|
|
target: test drop collection properties
|
|
method: create collection, alter properties, drop properties
|
|
expected: drop properties success
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"dimension": dim,
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
client.collection_release(collection_name=name)
|
|
|
|
# alter properties
|
|
properties = {"mmap.enabled": "true"}
|
|
rsp = client.alter_collection_properties(name, properties)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_describe(name)
|
|
enabled_mmap = False
|
|
for prop in rsp['data']['properties']:
|
|
if prop['key'] == "mmap.enabled":
|
|
assert prop['value'] == "true"
|
|
enabled_mmap = True
|
|
assert enabled_mmap
|
|
|
|
# drop properties
|
|
delete_keys = ["mmap.enabled"]
|
|
rsp = client.drop_collection_properties(name, delete_keys)
|
|
assert rsp['code'] == 0
|
|
rsp = client.collection_describe(name)
|
|
enabled_mmap = False
|
|
for prop in rsp['data']['properties']:
|
|
if prop['key'] == "mmap.enabled":
|
|
enabled_mmap = True
|
|
assert not enabled_mmap
|
|
|
|
def test_alter_field_properties(self):
|
|
"""
|
|
target: test alter field properties
|
|
method: create collection with varchar field, alter field properties
|
|
expected: alter field properties success
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"autoId": True,
|
|
"enableDynamicField": True,
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": True,
|
|
"elementTypeParams": {}},
|
|
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
|
|
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}},
|
|
{"fieldName": "image_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}},
|
|
]
|
|
}
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
# release collection
|
|
client.collection_release(collection_name=name)
|
|
|
|
# describe collection
|
|
rsp = client.collection_describe(name)
|
|
for field in rsp['data']['fields']:
|
|
if field['name'] == "book_describe":
|
|
for p in field['params']:
|
|
if p['key'] == "max_length":
|
|
assert p['value'] == "256"
|
|
|
|
# alter field properties
|
|
field_params = {"max_length": "100"}
|
|
rsp = client.alter_field_properties(name, "book_describe", field_params)
|
|
assert rsp['code'] == 0
|
|
|
|
# describe collection
|
|
rsp = client.collection_describe(name)
|
|
for field in rsp['data']['fields']:
|
|
if field['name'] == "book_describe":
|
|
for p in field['params']:
|
|
if p['key'] == "max_length":
|
|
assert p['value'] == "100"
|
|
|
|
|
|
class TestCollectionAddField(TestBase):
|
|
"""Test collection add field operations"""
|
|
|
|
@pytest.mark.parametrize("field_params,test_data_generator,expected_validations", [
|
|
# Test case 1: Int64 nullable field
|
|
(
|
|
{
|
|
"fieldName": "new_int_field",
|
|
"dataType": "Int64",
|
|
"nullable": True,
|
|
"elementTypeParams": {}
|
|
},
|
|
lambda i: i * 10, # Generate int values
|
|
{
|
|
"field_type": "Int64",
|
|
"nullable": True,
|
|
"has_default": False,
|
|
"data_validator": lambda item, i: item["new_int_field"] == i * 10
|
|
}
|
|
),
|
|
# Test case 2: VarChar field with max_length
|
|
(
|
|
{
|
|
"fieldName": "new_varchar_field",
|
|
"dataType": "VarChar",
|
|
"nullable": True,
|
|
"elementTypeParams": {"max_length": "256"}
|
|
},
|
|
lambda i: f"description_{i}", # Generate string values
|
|
{
|
|
"field_type": "VarChar",
|
|
"nullable": True,
|
|
"has_default": False,
|
|
"max_length": "256",
|
|
"data_validator": lambda item, i: item["new_varchar_field"] == f"description_{i}"
|
|
}
|
|
),
|
|
# Test case 3: Int64 field with default value
|
|
(
|
|
{
|
|
"fieldName": "new_field_with_default",
|
|
"dataType": "Int64",
|
|
"nullable": True,
|
|
"defaultValue": 42,
|
|
"elementTypeParams": {}
|
|
},
|
|
lambda i: i * 100, # Generate int values when explicitly provided
|
|
{
|
|
"field_type": "Int64",
|
|
"nullable": True,
|
|
"has_default": True,
|
|
"default_value": 42,
|
|
"data_validator": lambda item, i: item["new_field_with_default"] == i * 100
|
|
}
|
|
),
|
|
# Test case 4: Array field
|
|
(
|
|
{
|
|
"fieldName": "new_array_field",
|
|
"dataType": "Array",
|
|
"elementDataType": "Int64",
|
|
"nullable": True,
|
|
"elementTypeParams": {"max_capacity": "1024"}
|
|
},
|
|
lambda i: [i * 10, i * 20, i * 30], # Generate array values
|
|
{
|
|
"field_type": "Array",
|
|
"nullable": True,
|
|
"has_default": False,
|
|
"element_type": "Int64",
|
|
"data_validator": lambda item, i: item["new_array_field"] == [i * 10, i * 20, i * 30]
|
|
}
|
|
)
|
|
])
|
|
def test_add_field_parametrized(self, field_params, test_data_generator, expected_validations):
|
|
"""
|
|
target: test add various types of fields
|
|
method: create collection, insert data, add field, insert and query again
|
|
expected: add field success and data operations work before and after
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
nb = 3000 # Number of records to insert in each batch
|
|
client = self.collection_client
|
|
vector_client = self.vector_client
|
|
field_name = field_params["fieldName"]
|
|
|
|
# Create collection first
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
},
|
|
"indexParams": [
|
|
{"fieldName": "book_intro", "indexName": "book_intro_index", "metricType": "L2"}
|
|
]
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
|
|
# Wait for collection to be loaded
|
|
client.wait_load_completed(collection_name=name)
|
|
|
|
# Insert data before adding field
|
|
insert_data_before = []
|
|
for i in range(nb):
|
|
insert_data_before.append({
|
|
"book_id": i,
|
|
"book_intro": gen_vector(dim=dim)
|
|
})
|
|
|
|
insert_payload = {
|
|
"collectionName": name,
|
|
"data": insert_data_before
|
|
}
|
|
rsp = vector_client.vector_insert(insert_payload)
|
|
assert rsp['code'] == 0
|
|
|
|
# Query data before adding field
|
|
query_payload = {
|
|
"collectionName": name,
|
|
"expr": "book_id >= 0",
|
|
"outputFields": ["book_id"],
|
|
"limit": nb
|
|
}
|
|
rsp = vector_client.vector_query(query_payload)
|
|
assert rsp['code'] == 0
|
|
assert len(rsp['data']) == nb
|
|
|
|
# Search data before adding field
|
|
search_payload = {
|
|
"collectionName": name,
|
|
"data": [gen_vector(dim=dim)],
|
|
"annsField": "book_intro",
|
|
"limit": 100
|
|
}
|
|
rsp = vector_client.vector_search(search_payload)
|
|
assert rsp['code'] == 0
|
|
assert len(rsp['data']) > 0
|
|
|
|
# Add field
|
|
rsp = client.add_field(name, field_params)
|
|
logger.info(f"add field response: {rsp}")
|
|
assert rsp['code'] == 0
|
|
|
|
# Verify field was added
|
|
rsp = client.collection_describe(name)
|
|
assert rsp['code'] == 0
|
|
field_names = [field["name"] for field in rsp['data']['fields']]
|
|
assert field_name in field_names
|
|
|
|
# Check the field properties
|
|
for field in rsp['data']['fields']:
|
|
if field['name'] == field_name:
|
|
assert field['type'] == expected_validations["field_type"]
|
|
assert field['nullable'] == expected_validations["nullable"]
|
|
|
|
# Check specific field type properties
|
|
if expected_validations.get("max_length"):
|
|
for param in field.get('params', []):
|
|
if param['key'] == 'max_length':
|
|
assert param['value'] == expected_validations["max_length"]
|
|
|
|
if expected_validations.get("element_type"):
|
|
assert field.get('elementType') == expected_validations["element_type"]
|
|
|
|
if expected_validations.get("has_default") and expected_validations["has_default"]:
|
|
assert field.get('defaultValue') is not None
|
|
|
|
# Insert data after adding field
|
|
insert_data_after = []
|
|
for i in range(nb, nb * 2):
|
|
data_item = {
|
|
"book_id": i,
|
|
"book_intro": gen_vector(dim=dim)
|
|
}
|
|
|
|
# For default value test, sometimes omit the field to test default behavior
|
|
if expected_validations.get("has_default") and expected_validations["has_default"] and i < nb + nb // 2:
|
|
# Don't add the field for first half of records to test default value
|
|
pass
|
|
else:
|
|
# Add the field with generated test data
|
|
data_item[field_name] = test_data_generator(i)
|
|
|
|
insert_data_after.append(data_item)
|
|
|
|
insert_payload = {
|
|
"collectionName": name,
|
|
"data": insert_data_after
|
|
}
|
|
rsp = vector_client.vector_insert(insert_payload)
|
|
assert rsp['code'] == 0
|
|
|
|
# Query data after adding field
|
|
query_payload = {
|
|
"collectionName": name,
|
|
"expr": f"book_id >= {nb}",
|
|
"outputFields": ["book_id", field_name],
|
|
"limit": nb
|
|
}
|
|
rsp = vector_client.vector_query(query_payload)
|
|
assert rsp['code'] == 0
|
|
assert len(rsp['data']) == nb
|
|
|
|
# Validate field data for records that have explicit values
|
|
for item in rsp['data']:
|
|
assert field_name in item
|
|
book_id = item["book_id"]
|
|
# Only validate explicit values (not default values)
|
|
if not (expected_validations.get("has_default") and expected_validations["has_default"] and book_id < nb + nb // 2):
|
|
if expected_validations.get("data_validator"):
|
|
expected_validations["data_validator"](item, book_id)
|
|
|
|
# Search data after adding field
|
|
search_payload = {
|
|
"collectionName": name,
|
|
"data": [gen_vector(dim=dim)],
|
|
"annsField": "book_intro",
|
|
"limit": 100,
|
|
"outputFields": ["book_id", field_name]
|
|
}
|
|
rsp = vector_client.vector_search(search_payload)
|
|
assert rsp['code'] == 0
|
|
assert len(rsp['data']) > 0
|
|
|
|
|
|
@pytest.mark.L1
|
|
class TestCollectionAddFieldNegative(TestBase):
|
|
"""Test collection add field negative cases"""
|
|
|
|
def test_add_field_missing_data_type(self):
|
|
"""
|
|
target: test add field with missing dataType
|
|
method: create collection, add field without dataType parameter
|
|
expected: add field failed with proper error message
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
|
|
# Create collection first
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
}
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
|
|
# Try to add field without dataType
|
|
field_params = {
|
|
"fieldName": "new_field",
|
|
"nullable": True,
|
|
"elementTypeParams": {}
|
|
}
|
|
rsp = client.add_field(name, field_params)
|
|
logger.info(f"add field response: {rsp}")
|
|
assert rsp['code'] != 0
|
|
assert "dataType" in rsp.get('message', '').lower() or "required" in rsp.get('message', '').lower()
|
|
|
|
def test_add_field_invalid_default_value_type(self):
|
|
"""
|
|
target: test add field with invalid defaultValue type
|
|
method: create collection, add Int64 field with string defaultValue
|
|
expected: add field failed with proper error message
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
|
|
# Create collection first
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
}
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
|
|
# Try to add Int64 field with string defaultValue
|
|
field_params = {
|
|
"fieldName": "new_field",
|
|
"dataType": "Int64",
|
|
"nullable": True,
|
|
"defaultValue": "aaa", # Invalid type for Int64
|
|
"elementTypeParams": {}
|
|
}
|
|
rsp = client.add_field(name, field_params)
|
|
logger.info(f"add field response: {rsp}")
|
|
assert rsp['code'] != 0
|
|
assert "defaultValue" in rsp.get('message', '') or "invalid" in rsp.get('message', '').lower()
|
|
|
|
def test_add_field_invalid_data_type(self):
|
|
"""
|
|
target: test add field with invalid dataType
|
|
method: create collection, add field with invalid dataType
|
|
expected: add field failed with proper error message
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
|
|
# Create collection first
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
}
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
|
|
# Try to add field with invalid dataType
|
|
field_params = {
|
|
"fieldName": "new_field",
|
|
"dataType": "LONGLONGLONGLONGTEXT", # Invalid dataType
|
|
"nullable": True,
|
|
"elementTypeParams": {}
|
|
}
|
|
rsp = client.add_field(name, field_params)
|
|
logger.info(f"add field response: {rsp}")
|
|
assert rsp['code'] != 0
|
|
assert "invalid" in rsp.get('message', '').lower() or "data type" in rsp.get('message', '').lower()
|
|
|
|
def test_add_field_array_missing_element_data_type(self):
|
|
"""
|
|
target: test add Array field without elementDataType
|
|
method: create collection, add Array field without elementDataType
|
|
expected: add field failed with proper error message
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
|
|
# Create collection first
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
}
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
|
|
# Try to add Array field without elementDataType
|
|
field_params = {
|
|
"fieldName": "new_array_field",
|
|
"dataType": "Array",
|
|
"nullable": True,
|
|
"elementTypeParams": {}
|
|
}
|
|
rsp = client.add_field(name, field_params)
|
|
logger.info(f"add field response: {rsp}")
|
|
assert rsp['code'] != 0
|
|
assert "element" in rsp.get('message', '').lower() or "invalid" in rsp.get('message', '').lower()
|
|
|
|
def test_add_field_array_invalid_element_data_type(self):
|
|
"""
|
|
target: test add Array field with invalid elementDataType
|
|
method: create collection, add Array field with invalid elementDataType
|
|
expected: add field failed with proper error message
|
|
"""
|
|
name = gen_collection_name()
|
|
dim = 128
|
|
client = self.collection_client
|
|
|
|
# Create collection first
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
|
|
]
|
|
}
|
|
}
|
|
rsp = client.collection_create(payload)
|
|
assert rsp['code'] == 0
|
|
|
|
# Try to add Array field with invalid elementDataType
|
|
field_params = {
|
|
"fieldName": "new_array_field",
|
|
"dataType": "Array",
|
|
"elementDataType": "MYBLOB", # Invalid elementDataType
|
|
"nullable": True,
|
|
"elementTypeParams": {}
|
|
}
|
|
rsp = client.add_field(name, field_params)
|
|
logger.info(f"add field response: {rsp}")
|
|
assert rsp['code'] != 0
|
|
assert "element" in rsp.get('message', '').lower() or "invalid" in rsp.get('message', '').lower()
|
|
|
|
def test_add_field_to_nonexistent_collection(self):
|
|
"""
|
|
target: test add field to non-existent collection
|
|
method: add field to a collection that doesn't exist
|
|
expected: add field failed with proper error message
|
|
"""
|
|
name = "nonexistent_collection"
|
|
client = self.collection_client
|
|
|
|
# Try to add field to non-existent collection
|
|
field_params = {
|
|
"fieldName": "new_field",
|
|
"dataType": "Int64",
|
|
"nullable": True,
|
|
"elementTypeParams": {}
|
|
}
|
|
rsp = client.add_field(name, field_params)
|
|
logger.info(f"add field response: {rsp}")
|
|
assert rsp['code'] != 0
|
|
assert "collection" in rsp.get('message', '').lower() or "not found" in rsp.get('message', '').lower()
|
|
|
|
|
|
@pytest.mark.L0
|
|
class TestCollectionMaintenance(TestBase):
|
|
"""Test collection maintenance operations"""
|
|
|
|
@pytest.mark.xfail(reason="issue: https://github.com/milvus-io/milvus/issues/39546")
|
|
def test_collection_flush(self):
|
|
"""
|
|
target: test collection flush
|
|
method: create collection, insert data multiple times and flush
|
|
expected: flush successfully
|
|
"""
|
|
# Create collection
|
|
name = gen_collection_name()
|
|
client = self.collection_client
|
|
vector_client = self.vector_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "my_vector", "dataType": "FloatVector", "elementTypeParams": {"dim": 128}}
|
|
]
|
|
}
|
|
}
|
|
client.collection_create(payload)
|
|
|
|
# Insert small batches of data multiple times
|
|
for i in range(3):
|
|
vectors = [gen_vector(dim=128) for _ in range(10)]
|
|
insert_data = {
|
|
"collectionName": name,
|
|
"data": [
|
|
{
|
|
"book_id": i * 10 + j,
|
|
"my_vector": vector
|
|
}
|
|
for i, vector in enumerate(vectors)
|
|
for j in range(10)
|
|
]
|
|
}
|
|
response = vector_client.vector_insert(insert_data)
|
|
assert response["code"] == 0
|
|
c = Collection(name)
|
|
num_entities_before_flush = c.num_entities
|
|
# Flush collection
|
|
response = client.flush(name)
|
|
assert response["code"] == 0
|
|
# check segments
|
|
num_entities_after_flush = c.num_entities
|
|
logger.info(f"num_entities_before_flush: {num_entities_before_flush}, num_entities_after_flush: {num_entities_after_flush}")
|
|
assert num_entities_after_flush > num_entities_before_flush
|
|
|
|
def test_collection_compact(self):
|
|
"""
|
|
target: test collection compact
|
|
method: create collection, insert data, flush multiple times, then compact
|
|
expected: compact successfully
|
|
"""
|
|
# Create collection
|
|
name = gen_collection_name()
|
|
client = self.collection_client
|
|
vector_client = self.vector_client
|
|
payload = {
|
|
"collectionName": name,
|
|
"schema": {
|
|
"fields": [
|
|
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
|
|
{"fieldName": "my_vector", "dataType": "FloatVector", "elementTypeParams": {"dim": 128}}
|
|
]
|
|
}
|
|
}
|
|
client.collection_create(payload)
|
|
|
|
# Insert and flush multiple times
|
|
for i in range(3):
|
|
# Insert data
|
|
vectors = [gen_vector(dim=128) for _ in range(10)]
|
|
insert_data = {
|
|
"collectionName": name,
|
|
"data": [
|
|
{
|
|
"book_id": i * 10 + j,
|
|
"my_vector": vector
|
|
}
|
|
for i, vector in enumerate(vectors)
|
|
for j in range(10)
|
|
]
|
|
}
|
|
response = vector_client.vector_insert(insert_data)
|
|
assert response["code"] == 0
|
|
|
|
# Flush after each insert
|
|
c = Collection(name)
|
|
c.flush()
|
|
# Compact collection
|
|
response = client.compact(name)
|
|
assert response["code"] == 0
|
|
|
|
# Get compaction state
|
|
response = client.get_compaction_state(name)
|
|
assert response["code"] == 0
|
|
assert "state" in response["data"]
|
|
assert "compactionID" in response["data"]
|
|
# TODO need verification by pymilvus
|
|
|