mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Pull Request Summary: Test Case Updates for API Behavior Changes
**Core Invariant**: These test case updates reflect backend API
improvements to error messaging and schema information returned by
collection operations. The changes maintain backward compatibility—no
public signatures change, and all modifications are test expectation
updates.
**Updated Error Messages for Better Diagnostics**:
- `test_add_field_feature.py`: Updated expected error when adding a
vector field without dimension specification from a generic "not support
to add vector field" to the more descriptive "vector field must have
dimension specified, field name = {field_name}: invalid parameter". This
change is non-breaking for clients that only check error codes; it
improves developer experience with clearer error context.
**Schema Information Extension**:
- `test_milvus_client_collection.py`: Added `enable_namespace: False` to
the expected `describe_collection()` output. This is a new boolean field
in the collection metadata that defaults to False, representing an
opt-in feature. Existing code querying describe_collection continues to
work; the new field is simply an additional property in the response
dictionary.
**Dynamic Error Message Construction**:
- `test_milvus_client_search_invalid.py`: Replaced hardcoded error
message with conditional logic that generates the appropriate error
based on input state (None vectors vs invalid vector data). This
prevents test brittle failure if multiple error conditions exist, and
correctly validates the API's behavior handles both "missing data" and
"malformed data" cases distinctly.
**No Regression Risk**: All changes update test expectations to match
improved backend behavior. The changes are additive (new field in
schema) or clarifying (better error messages), with no modifications to
existing response structures or behavior for valid inputs.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
Signed-off-by: nico <cheng.yuan@zilliz.com>
715 lines
40 KiB
Python
715 lines
40 KiB
Python
import random
|
|
import time
|
|
|
|
import pytest
|
|
from base.client_v2_base import TestMilvusClientV2Base
|
|
from common import common_func as cf
|
|
from common import common_type as ct
|
|
from common.common_type import CaseLabel, CheckTasks
|
|
from utils.util_pymilvus import DataType
|
|
import numpy as np
|
|
|
|
prefix = "add_field"
|
|
default_vector_field_name = "vector"
|
|
default_primary_key_field_name = "id"
|
|
default_string_field_name = "varchar"
|
|
default_float_field_name = "float"
|
|
default_new_field_name = "field_new"
|
|
default_dynamic_field_name = "field_new"
|
|
exp_res = "exp_res"
|
|
default_nb = 2000
|
|
default_dim = 128
|
|
default_limit = 10
|
|
|
|
|
|
class TestMilvusClientAddFieldFeature(TestMilvusClientV2Base):
|
|
"""Test cases for add field feature with CaseLabel.L0"""
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
def test_milvus_client_collection_add_field(self):
|
|
"""
|
|
target: test self create collection normal case about add field
|
|
method: create collection with added field
|
|
expected: create collection with default schema, index, and load successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
dim = 128
|
|
# 1. create collection
|
|
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
|
schema.add_field("id_string", DataType.VARCHAR, max_length=64, is_primary=True, auto_id=False)
|
|
schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=dim)
|
|
schema.add_field("title", DataType.VARCHAR, max_length=64, is_partition_key=True)
|
|
schema.add_field("nullable_field", DataType.INT64, nullable=True, default_value=10)
|
|
schema.add_field("array_field", DataType.ARRAY, element_type=DataType.INT64, max_capacity=12,
|
|
max_length=64, nullable=True)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index("embeddings", metric_type="COSINE")
|
|
|
|
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
|
|
collections = self.list_collections(client)[0]
|
|
assert collection_name in collections
|
|
check_items = {"collection_name": collection_name,
|
|
"dim": dim,
|
|
"consistency_level": 0,
|
|
"enable_dynamic_field": False,
|
|
"num_partitions": 16,
|
|
"id_name": "id_string",
|
|
"vector_name": "embeddings"}
|
|
self.add_collection_field(client, collection_name, field_name="field_new_int64", data_type=DataType.INT64,
|
|
nullable=True, is_cluster_key=True, mmap_enabled=True)
|
|
self.add_collection_field(client, collection_name, field_name="field_new_var", data_type=DataType.VARCHAR,
|
|
nullable=True, default_vaule="field_new_var", max_length=64, mmap_enabled=True)
|
|
check_items["add_fields"] = ["field_new_int64", "field_new_var"]
|
|
self.describe_collection(client, collection_name,
|
|
check_task=CheckTasks.check_describe_collection_property,
|
|
check_items=check_items)
|
|
index = self.list_indexes(client, collection_name)[0]
|
|
assert index == ['embeddings']
|
|
if self.has_collection(client, collection_name)[0]:
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
def test_milvus_client_compact_with_added_field(self):
|
|
"""
|
|
target: test clustering compaction with added field as cluster key
|
|
method: create connection, collection, insert, add field, insert and compact
|
|
expected: successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
dim, default_value = 128, 1
|
|
# 1. create collection
|
|
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
|
|
# 2. insert
|
|
rng = np.random.default_rng(seed=19530)
|
|
rows = [
|
|
{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_string_field_name: str(i)} for i in range(10*default_nb)]
|
|
self.insert(client, collection_name, rows)
|
|
# 3. add collection field
|
|
self.add_collection_field(client, collection_name, field_name=default_new_field_name, data_type=DataType.INT64,
|
|
nullable=True, is_clustering_key=True, default_value=default_value)
|
|
cost = 180
|
|
vectors = cf.gen_vectors(default_nb, dim, vector_data_type=DataType.FLOAT_VECTOR)
|
|
vectors_to_search = [vectors[0]]
|
|
# 4. insert new field after add field
|
|
rows_new = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
|
default_string_field_name: str(i), default_new_field_name: random.randint(1, 1000)}
|
|
for i in range(10*default_nb, 11*default_nb)]
|
|
self.insert(client, collection_name, rows_new)
|
|
# 5. compact
|
|
compact_id = self.compact(client, collection_name)[0]
|
|
start = time.time()
|
|
while True:
|
|
time.sleep(1)
|
|
res = self.get_compaction_state(client, compact_id)[0]
|
|
if res == "Completed":
|
|
break
|
|
if time.time() - start > cost:
|
|
raise Exception(1, f"Compact after index cost more than {cost}s")
|
|
self.wait_for_index_ready(client, collection_name, default_vector_field_name)
|
|
self.release_collection(client, collection_name)
|
|
time.sleep(10)
|
|
self.load_collection(client, collection_name)
|
|
insert_ids = [i for i in range(10*default_nb)]
|
|
# 6. search with default value
|
|
self.search(client, collection_name, vectors_to_search, filter=f'{default_new_field_name} == {default_value}',
|
|
output_fields=[default_new_field_name],
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"pk_name": default_primary_key_field_name,
|
|
"limit": default_limit})
|
|
insert_ids = [i for i in range(10*default_nb, 11*default_nb)]
|
|
# 7. search with new data(no default value)
|
|
self.search(client, collection_name, vectors_to_search,
|
|
filter=f'{default_new_field_name} != {default_value}',
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"pk_name": default_primary_key_field_name,
|
|
"limit": default_limit})
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_insert_with_old_and_added_field(self):
|
|
"""
|
|
target: test search (high level api) normal case
|
|
method: create connection, collection, insert, add field, insert old/new field and search
|
|
expected: search/query successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
dim = 8
|
|
# 1. create collection
|
|
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, max_length=64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64, is_partition_key=True)
|
|
schema.add_field(default_float_field_name, DataType.FLOAT, nullable=True)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
|
|
# 2. insert before add field
|
|
vectors = cf.gen_vectors(default_nb * 3, dim, vector_data_type=DataType.FLOAT_VECTOR)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == default_nb
|
|
# 3. add new field
|
|
self.add_collection_field(client, collection_name, field_name=default_new_field_name, data_type=DataType.VARCHAR,
|
|
nullable=True, max_length=64)
|
|
vectors_to_search = [vectors[0]]
|
|
insert_ids = [i for i in range(default_nb)]
|
|
# 4. check old dynamic data search is not impacted after add new field
|
|
self.search(client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"pk_name": default_primary_key_field_name,
|
|
"limit": default_limit})
|
|
# 5. insert data(old field)
|
|
rows_old = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_float_field_name: i * 1.0,
|
|
default_string_field_name: str(i)} for i in range(default_nb, default_nb * 2)]
|
|
results = self.insert(client, collection_name, rows_old)[0]
|
|
assert results['insert_count'] == default_nb
|
|
insert_ids_with_old_field = [i for i in range(default_nb, default_nb * 2)]
|
|
# 6. insert data(new field)
|
|
rows_new = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i),
|
|
default_new_field_name: default_new_field_name} for i in range(default_nb * 2, default_nb * 3)]
|
|
results = self.insert(client, collection_name, rows_new)[0]
|
|
assert results['insert_count'] == default_nb
|
|
insert_ids_with_new_field = [i for i in range(default_nb * 2, default_nb * 3)]
|
|
# 7. search filtered with the new field
|
|
self.search(client, collection_name, vectors_to_search,
|
|
filter=f'field_new is null',
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids + insert_ids_with_old_field,
|
|
"pk_name": default_primary_key_field_name,
|
|
"limit": default_limit})
|
|
self.search(client, collection_name, vectors_to_search,
|
|
filter=f"field_new=='field_new'",
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids_with_new_field,
|
|
"pk_name": default_primary_key_field_name,
|
|
"limit": default_limit})
|
|
self.release_collection(client, collection_name)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_upsert_with_added_field(self):
|
|
"""
|
|
target: test upsert (high level api) normal case
|
|
method: create connection, collection, insert, add field, upsert and search
|
|
expected: upsert/search successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
|
|
collections = self.list_collections(client)[0]
|
|
assert collection_name in collections
|
|
self.describe_collection(client, collection_name,
|
|
check_task=CheckTasks.check_describe_collection_property,
|
|
check_items={"collection_name": collection_name,
|
|
"dim": default_dim,
|
|
"consistency_level": 0})
|
|
# 2. insert before add field
|
|
vectors = cf.gen_vectors(default_nb * 3, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == default_nb
|
|
# 3. add new field
|
|
self.add_collection_field(client, collection_name, field_name=default_new_field_name, data_type=DataType.VARCHAR,
|
|
nullable=True, max_length=64)
|
|
half_default_nb = int (default_nb/2)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i),
|
|
default_new_field_name: "default"} for i in range(half_default_nb)]
|
|
results = self.upsert(client, collection_name, rows)[0]
|
|
assert results['upsert_count'] == half_default_nb
|
|
vectors_to_search = [vectors[0]]
|
|
insert_ids = [i for i in range(half_default_nb)]
|
|
insert_ids_with_new_field = [i for i in range(half_default_nb, default_nb)]
|
|
# 4. search filtered with the new field
|
|
self.search(client, collection_name, vectors_to_search,
|
|
filter=f'field_new is null',
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids_with_new_field,
|
|
"pk_name": default_primary_key_field_name,
|
|
"limit": default_limit})
|
|
self.search(client, collection_name, vectors_to_search,
|
|
filter=f"field_new=='default'",
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"pk_name": default_primary_key_field_name,
|
|
"limit": default_limit})
|
|
self.release_collection(client, collection_name)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
@pytest.mark.parametrize("new_field_name", [default_dynamic_field_name, "new_field"])
|
|
def test_milvus_client_search_query_enable_dynamic_and_add_field(self, new_field_name):
|
|
"""
|
|
target: test search (high level api) normal case
|
|
method: create connection, collection, insert, add field(same as dynamic and different as dynamic) and search
|
|
expected: search/query successfully
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
dim = 8
|
|
# 1. create collection
|
|
schema = self.create_schema(client, enable_dynamic_field=True)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, max_length=64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64, is_partition_key=True)
|
|
schema.add_field(default_float_field_name, DataType.FLOAT, nullable=True)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
|
|
# 2. insert
|
|
vectors = cf.gen_vectors(default_nb, dim, vector_data_type=DataType.FLOAT_VECTOR)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_float_field_name: i * 1.0, default_string_field_name: str(i),
|
|
default_dynamic_field_name: 1} for i in range(default_nb)]
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == default_nb
|
|
# 3. add new field same as dynamic field name
|
|
default_value = 1
|
|
self.add_collection_field(client, collection_name, field_name=new_field_name, data_type=DataType.INT64,
|
|
nullable=True, default_value=default_value)
|
|
vectors_to_search = [vectors[0]]
|
|
insert_ids = [i for i in range(default_nb)]
|
|
# 4. check old dynamic data search is not impacted after add new field
|
|
self.search(client, collection_name, vectors_to_search, limit=default_limit,
|
|
filter=f'$meta["{default_dynamic_field_name}"] == 1',
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"limit": default_limit,
|
|
"pk_name": default_primary_key_field_name})
|
|
# 5. check old dynamic data query is not impacted after add new field
|
|
for row in rows:
|
|
row[new_field_name] = default_value
|
|
self.query(client, collection_name, filter=f'$meta["{default_dynamic_field_name}"] == 1',
|
|
check_task=CheckTasks.check_query_results,
|
|
check_items={exp_res: rows,
|
|
"with_vec": True,
|
|
"pk_name": default_primary_key_field_name,
|
|
"vector_type": DataType.FLOAT_VECTOR})
|
|
# 6. search filtered with the new field
|
|
self.search(client, collection_name, vectors_to_search,
|
|
filter=f"{new_field_name} == 1",
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"ids": insert_ids,
|
|
"pk_name": default_primary_key_field_name,
|
|
"limit": default_limit})
|
|
self.search(client, collection_name, vectors_to_search,
|
|
filter=f"{new_field_name} is null",
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"pk_name": default_primary_key_field_name,
|
|
"limit": 0})
|
|
# 7. query filtered with the new field
|
|
self.query(client, collection_name, filter=f"{new_field_name} == 1",
|
|
check_task=CheckTasks.check_query_results,
|
|
check_items={exp_res: rows,
|
|
"with_vec": True,
|
|
"pk_name": default_primary_key_field_name})
|
|
self.query(client, collection_name, filter=f"{new_field_name} is null",
|
|
check_task=CheckTasks.check_query_results,
|
|
check_items={exp_res: [],
|
|
"pk_name": default_primary_key_field_name})
|
|
self.release_collection(client, collection_name)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_milvus_client_add_field_with_analyzer(self):
|
|
"""
|
|
target: test add field with analyzer configuration
|
|
method: create collection, add field with standard analyzer,
|
|
insert data and verify
|
|
expected: successfully add field with analyzer and perform text search
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
dim = 8
|
|
|
|
# 1. create collection with basic schema
|
|
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64, is_partition_key=True)
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
|
|
|
|
# 2. insert initial data before adding analyzer field
|
|
schema_info = self.describe_collection(client, collection_name)[0]
|
|
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema_info)
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == default_nb
|
|
|
|
# 3. add new field with standard analyzer
|
|
analyzer_params = {
|
|
"type": "standard",
|
|
"stop_words": ["for", "the", "is", "a"]
|
|
}
|
|
self.add_collection_field(client, collection_name, field_name="text_content", data_type=DataType.VARCHAR,
|
|
nullable=True, max_length=1000, enable_analyzer=True, analyzer_params=analyzer_params,
|
|
enable_match=True)
|
|
|
|
# 4. insert data with the new analyzer field
|
|
text_data = [
|
|
"The Milvus vector database is built for scale",
|
|
"This is a test document for analyzer",
|
|
"Vector search with text analysis capabilities",
|
|
"Database performance and scalability features"
|
|
]
|
|
rows_with_analyzer = []
|
|
vectors = cf.gen_vectors(default_nb, dim, vector_data_type=DataType.FLOAT_VECTOR)
|
|
for i in range(default_nb, default_nb + len(text_data)):
|
|
rows_with_analyzer.append({
|
|
default_primary_key_field_name: i,
|
|
default_vector_field_name: vectors[i - default_nb],
|
|
default_string_field_name: str(i),
|
|
"text_content": text_data[i - default_nb]
|
|
})
|
|
results = self.insert(client, collection_name, rows_with_analyzer)[0]
|
|
assert results['insert_count'] == len(text_data)
|
|
|
|
# 5. verify the analyzer field was added correctly
|
|
collection_info = self.describe_collection(client, collection_name)[0]
|
|
field_names = [field["name"] for field in collection_info["fields"]]
|
|
assert "text_content" in field_names
|
|
|
|
# 6. test text search using the analyzer field
|
|
vectors_to_search = [vectors[0]]
|
|
# Simple search without filter to verify basic functionality
|
|
search_results = self.search(
|
|
client, collection_name, vectors_to_search,
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={
|
|
"enable_milvus_client_api": True,
|
|
"nq": len(vectors_to_search),
|
|
"limit": 10, # Adjust limit to match actual results
|
|
"pk_name": default_primary_key_field_name
|
|
}
|
|
)
|
|
# Verify search returned some results
|
|
assert len(search_results[0]) > 0
|
|
|
|
# 7. test query with analyzer field - use simpler condition
|
|
query_results = self.query(
|
|
client, collection_name,
|
|
filter="text_content is not null",
|
|
check_task=CheckTasks.check_query_results,
|
|
check_items={
|
|
"pk_name": default_primary_key_field_name,
|
|
"exp_limit": 4 # We expect 4 documents with text_content
|
|
}
|
|
)
|
|
# Verify that we get results for documents with text_content
|
|
assert len(query_results[0]) > 0
|
|
|
|
# 8. test run_analyzer to verify the analyzer configuration
|
|
sample_text = "The Milvus vector database is built for scale"
|
|
analyzer_result = client.run_analyzer(sample_text, analyzer_params)
|
|
# Verify analyzer produces tokens
|
|
# (should remove stop words like "the", "is", "a")
|
|
tokens = analyzer_result.tokens
|
|
assert len(tokens) > 0
|
|
# Handle different token formats - tokens might be strings or dictionaries
|
|
if isinstance(tokens[0], str):
|
|
token_texts = tokens
|
|
else:
|
|
token_texts = [token["token"] for token in tokens]
|
|
# Check that stop words are filtered out
|
|
assert "the" not in token_texts
|
|
assert "is" not in token_texts
|
|
assert "a" not in token_texts
|
|
|
|
# 9. cleanup
|
|
self.release_collection(client, collection_name)
|
|
self.drop_collection(client, collection_name)
|
|
|
|
|
|
class TestMilvusClientAddFieldFeatureInvalid(TestMilvusClientV2Base):
|
|
"""Test invalid cases for add field feature"""
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_collection_add_vector_field(self):
|
|
"""
|
|
target: test fast create collection with add vector field
|
|
method: create collection name with add vector field
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
dim, field_name = 8, default_new_field_name
|
|
error = {ct.err_code: 1100, ct.err_msg: f"vector field must have dimension specified, "
|
|
f"field name = {field_name}: invalid parameter"}
|
|
self.create_collection(client, collection_name, dim)
|
|
collections = self.list_collections(client)[0]
|
|
assert collection_name in collections
|
|
self.add_collection_field(client, collection_name, field_name=field_name, data_type=DataType.FLOAT_VECTOR,
|
|
nullable=True, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_collection_add_varchar_field_without_max_length(self):
|
|
"""
|
|
target: test fast create collection with add varchar field without maxlength
|
|
method: create collection name with add varchar field without maxlength
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
dim, field_name = 8, default_new_field_name
|
|
error = {ct.err_code: 1100, ct.err_msg: f"type param(max_length) should be specified for "
|
|
f"the field({field_name}) of collection {collection_name}"}
|
|
self.create_collection(client, collection_name, dim)
|
|
collections = self.list_collections(client)[0]
|
|
assert collection_name in collections
|
|
self.add_collection_field(client, collection_name, field_name=field_name, data_type=DataType.VARCHAR,
|
|
nullable=True, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_collection_add_field_as_auto_id(self):
|
|
"""
|
|
target: test fast create collection with add new field as auto id
|
|
method: create collection name with add new field as auto id
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
dim, field_name = 8, default_new_field_name
|
|
error = {ct.err_code: 1, ct.err_msg: f"The auto_id can only be specified on the primary key field"}
|
|
self.create_collection(client, collection_name, dim)
|
|
collections = self.list_collections(client)[0]
|
|
assert collection_name in collections
|
|
self.add_collection_field(client, collection_name, field_name=field_name, data_type=DataType.INT64,
|
|
nullable=True, auto_id=True, check_task=CheckTasks.err_res,
|
|
check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_collection_add_field_with_disable_nullable(self):
|
|
"""
|
|
target: test fast create collection with add new field as nullable false
|
|
method: create collection name with add new field as nullable false
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
dim, field_name = 8, default_new_field_name
|
|
error = {ct.err_code: 1100, ct.err_msg: f"added field must be nullable, please check it, "
|
|
f"field name = {field_name}: invalid parameter"}
|
|
self.create_collection(client, collection_name, dim)
|
|
collections = self.list_collections(client)[0]
|
|
assert collection_name in collections
|
|
self.add_collection_field(client, collection_name, field_name=field_name, data_type=DataType.INT64,
|
|
nullable=False, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_collection_add_field_as_partition_ley(self):
|
|
"""
|
|
target: test fast create collection with add new field as partition key
|
|
method: create collection name with add new field as partition key
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
dim, field_name = 8, default_new_field_name
|
|
error = {ct.err_code: 1100, ct.err_msg: f"not support to add partition key field, "
|
|
f"field name = {field_name}: invalid parameter"}
|
|
self.create_collection(client, collection_name, dim)
|
|
collections = self.list_collections(client)[0]
|
|
assert collection_name in collections
|
|
self.add_collection_field(client, collection_name, field_name=field_name, data_type=DataType.INT64,
|
|
nullable=True, is_partition_key=True,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_collection_add_field_exceed_max_length(self):
|
|
"""
|
|
target: test fast create collection with add new field with exceed max length
|
|
method: create collection name with add new field with exceed max length
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
dim, field_name = 8, default_new_field_name
|
|
error = {ct.err_code: 1100, ct.err_msg: f"the maximum length specified for the field({field_name}) "
|
|
f"should be in (0, 65535], but got 65536 instead: invalid parameter"}
|
|
self.create_collection(client, collection_name, dim)
|
|
collections = self.list_collections(client)[0]
|
|
assert collection_name in collections
|
|
self.add_collection_field(client, collection_name, field_name=field_name, data_type=DataType.VARCHAR,
|
|
nullable=True, max_length=65536, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_collection_add_field_as_cluster_key(self):
|
|
"""
|
|
target: test fast create collection with add new field as cluster key
|
|
method: create collection with add new field as cluster key(already has cluster key)
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
field_name = default_new_field_name
|
|
error = {ct.err_code: 1100, ct.err_msg: f"already has another clutering key field, "
|
|
f"field name: {field_name}: invalid parameter"}
|
|
schema = self.create_schema(client)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64, is_clustering_key=True)
|
|
|
|
self.create_collection(client, collection_name, schema=schema)
|
|
collections = self.list_collections(client)[0]
|
|
assert collection_name in collections
|
|
self.add_collection_field(client, collection_name, field_name=field_name, data_type=DataType.INT64,
|
|
nullable=True, is_clustering_key=True,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_collection_add_field_same_other_name(self):
|
|
"""
|
|
target: test fast create collection with add new field as other same name
|
|
method: create collection with add new field as other same name
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
error = {ct.err_code: 1100, ct.err_msg: f"duplicate field name: {default_string_field_name}: invalid parameter"}
|
|
schema = self.create_schema(client)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64, is_clustering_key=True)
|
|
|
|
self.create_collection(client, collection_name, schema=schema)
|
|
collections = self.list_collections(client)[0]
|
|
assert collection_name in collections
|
|
self.add_collection_field(client, collection_name, field_name=default_string_field_name,
|
|
data_type=DataType.VARCHAR, nullable=True, max_length=64,
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_collection_add_field_exceed_max_field_number(self):
|
|
"""
|
|
target: test fast create collection with add new field with exceed max field number
|
|
method: create collection name with add new field with exceed max field number
|
|
expected: raise exception
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
# 1. create collection
|
|
dim, field_name = 8, default_new_field_name
|
|
error = {ct.err_code: 1100, ct.err_msg: f"The number of fields has reached the maximum value 64: "
|
|
f"invalid parameter"}
|
|
self.create_collection(client, collection_name, dim)
|
|
collections = self.list_collections(client)[0]
|
|
assert collection_name in collections
|
|
for i in range(62):
|
|
self.add_collection_field(client, collection_name, field_name=f"{field_name}_{i}",
|
|
data_type=DataType.VARCHAR, nullable=True, max_length=64)
|
|
self.add_collection_field(client, collection_name, field_name=field_name, data_type=DataType.VARCHAR,
|
|
nullable=True, max_length=64, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_milvus_client_add_field_with_reranker_unsupported(self):
|
|
"""
|
|
target: test that add_collection_field and decay ranker combination is not supported
|
|
method: create collection without reranker field, add nullable reranker field via add_collection_field,
|
|
then try to use it with decay ranker
|
|
expected: raise exception because decay ranker requires non-nullable fields but add_collection_field
|
|
only supports nullable fields, creating a technical limitation
|
|
"""
|
|
client = self._client()
|
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
|
dim = 8
|
|
|
|
# 1. create collection WITHOUT reranker field initially
|
|
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
|
|
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64, is_partition_key=True)
|
|
# Note: NO reranker field here - we'll try to add it later via add_collection_field
|
|
|
|
index_params = self.prepare_index_params(client)[0]
|
|
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
|
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
|
|
|
|
# 2. insert initial data WITHOUT reranker field
|
|
vectors = cf.gen_vectors(default_nb, dim, vector_data_type=DataType.FLOAT_VECTOR)
|
|
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
|
|
default_string_field_name: str(i)} for i in range(default_nb)]
|
|
results = self.insert(client, collection_name, rows)[0]
|
|
assert results['insert_count'] == default_nb
|
|
|
|
# 3. Try to add nullable reranker field via add_collection_field (nullable must be True)
|
|
# This will succeed in adding the field, but then we'll test if it can work with decay reranker
|
|
# The conflict: add_collection_field only supports nullable fields, but decay reranker needs non-nullable fields
|
|
self.add_collection_field(client, collection_name, field_name=ct.default_reranker_field_name,
|
|
data_type=DataType.INT64, nullable=True, default_value=0)
|
|
|
|
# 4. Insert data with the newly added reranker field
|
|
# Generate new vectors for the second batch of data
|
|
vectors_batch2 = cf.gen_vectors(default_nb, dim, vector_data_type=DataType.FLOAT_VECTOR)
|
|
rows_with_reranker = [{default_primary_key_field_name: i, default_vector_field_name: vectors_batch2[i - default_nb],
|
|
default_string_field_name: str(i), ct.default_reranker_field_name: i}
|
|
for i in range(default_nb, default_nb * 2)]
|
|
results = self.insert(client, collection_name, rows_with_reranker)[0]
|
|
assert results['insert_count'] == default_nb
|
|
|
|
# 5. Try to use the nullable reranker field with decay reranker
|
|
# This should fail because decay reranker requires non-nullable fields for proper functionality
|
|
from pymilvus import Function, FunctionType
|
|
my_rerank_fn = Function(
|
|
name="my_reranker",
|
|
input_field_names=[ct.default_reranker_field_name],
|
|
function_type=FunctionType.RERANK,
|
|
params={
|
|
"reranker": "decay",
|
|
"function": "gauss",
|
|
"origin": 0,
|
|
"offset": 0,
|
|
"decay": 0.5,
|
|
"scale": 100
|
|
}
|
|
)
|
|
|
|
error = {ct.err_code: 65535, ct.err_msg: "Function input field cannot be nullable: field reranker_field"}
|
|
self.search(client, collection_name, [vectors[0]], ranker=my_rerank_fn,
|
|
check_task=CheckTasks.err_res, check_items=error)
|