mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
test: add different language tests and modify some cases (#36465)
fix: #36396 Signed-off-by: nico <cheng.yuan@zilliz.com>
This commit is contained in:
parent
447e326629
commit
cfd636ed5b
@ -242,7 +242,7 @@ class TestcaseBase(Base):
|
||||
primary_field=ct.default_int64_field_name, is_flush=True, name=None,
|
||||
enable_dynamic_field=False, with_json=True, random_primary_key=False,
|
||||
multiple_dim_array=[], is_partition_key=None, vector_data_type="FLOAT_VECTOR",
|
||||
nullable_fields={}, default_value_fields={}, **kwargs):
|
||||
nullable_fields={}, default_value_fields={}, language=None, **kwargs):
|
||||
"""
|
||||
target: create specified collections
|
||||
method: 1. create collections (binary/non-binary, default/all data type, auto_id or not)
|
||||
@ -311,7 +311,7 @@ class TestcaseBase(Base):
|
||||
dim=dim, enable_dynamic_field=enable_dynamic_field, with_json=with_json,
|
||||
random_primary_key=random_primary_key, multiple_dim_array=multiple_dim_array,
|
||||
primary_field=primary_field, vector_data_type=vector_data_type,
|
||||
nullable_fields=nullable_fields)
|
||||
nullable_fields=nullable_fields, language=language)
|
||||
if is_flush:
|
||||
assert collection_w.is_empty is False
|
||||
assert collection_w.num_entities == nb
|
||||
@ -324,7 +324,7 @@ class TestcaseBase(Base):
|
||||
for vector_name in vector_name_list:
|
||||
collection_w.create_index(vector_name, ct.default_sparse_inverted_index)
|
||||
else:
|
||||
if len(multiple_dim_array) == 0 or is_all_data_type == False:
|
||||
if len(multiple_dim_array) == 0 or is_all_data_type is False:
|
||||
vector_name_list.append(ct.default_float_vec_field_name)
|
||||
for vector_name in vector_name_list:
|
||||
# Unlike dense vectors, sparse vectors cannot create flat index.
|
||||
|
||||
@ -322,6 +322,7 @@ def generate_array_dataset(size, array_length, hit_probabilities, target_values)
|
||||
|
||||
return dataset
|
||||
|
||||
|
||||
def prepare_array_test_data(data_size, hit_rate=0.005, dim=128):
|
||||
size = data_size # Number of arrays in the dataset
|
||||
array_length = 10 # Length of each array
|
||||
@ -421,7 +422,6 @@ def prepare_array_test_data(data_size, hit_rate=0.005, dim=128):
|
||||
return train_df, query_expr
|
||||
|
||||
|
||||
|
||||
def gen_unique_str(str_value=None):
|
||||
prefix = "".join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
|
||||
return "test_" + prefix if str_value is None else str_value + "_" + prefix
|
||||
@ -433,6 +433,26 @@ def gen_str_by_length(length=8, letters_only=False):
|
||||
return "".join(random.choice(string.ascii_letters + string.digits) for _ in range(length))
|
||||
|
||||
|
||||
def generate_random_sentence(language):
|
||||
language_map = {
|
||||
"English": "en_US",
|
||||
"French": "fr_FR",
|
||||
"Spanish": "es_ES",
|
||||
"German": "de_DE",
|
||||
"Italian": "it_IT",
|
||||
"Portuguese": "pt_PT",
|
||||
"Russian": "ru_RU",
|
||||
"Chinese": "zh_CN",
|
||||
"Japanese": "ja_JP",
|
||||
"Korean": "ko_KR",
|
||||
"Arabic": "ar_SA",
|
||||
"Hindi": "hi_IN"
|
||||
}
|
||||
lang_code = language_map.get(language, "en_US")
|
||||
faker = Faker(lang_code)
|
||||
return faker.sentence()
|
||||
|
||||
|
||||
def gen_digits_by_length(length=8):
|
||||
return "".join(random.choice(string.digits) for _ in range(length))
|
||||
|
||||
@ -957,7 +977,7 @@ def gen_binary_vectors(num, dim):
|
||||
def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
|
||||
random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[],
|
||||
vector_data_type="FLOAT_VECTOR", auto_id=False,
|
||||
primary_field = ct.default_int64_field_name, nullable_fields={}):
|
||||
primary_field=ct.default_int64_field_name, nullable_fields={}, language=None):
|
||||
if not random_primary_key:
|
||||
int_values = pd.Series(data=[i for i in range(start, start + nb)])
|
||||
else:
|
||||
@ -973,6 +993,8 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi
|
||||
float_values = pd.Series(data=float_data, dtype=object)
|
||||
|
||||
string_data = [str(i) for i in range(start, start + nb)]
|
||||
if language:
|
||||
string_data = [generate_random_sentence(language) for _ in range(nb)]
|
||||
string_values = pd.Series(data=string_data, dtype="string")
|
||||
if ct.default_string_field_name in nullable_fields:
|
||||
null_number = int(nb*nullable_fields[ct.default_string_field_name])
|
||||
@ -1017,7 +1039,7 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi
|
||||
def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
|
||||
random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[],
|
||||
vector_data_type="FLOAT_VECTOR", auto_id=False,
|
||||
primary_field=ct.default_int64_field_name, nullable_fields={}):
|
||||
primary_field=ct.default_int64_field_name, nullable_fields={}, language=None):
|
||||
insert_list = []
|
||||
if not random_primary_key:
|
||||
int_values = pd.Series(data=[i for i in range(start, start + nb)])
|
||||
@ -1031,6 +1053,8 @@ def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_js
|
||||
float_data = float_data[:nb - null_number] + null_data
|
||||
float_values = pd.Series(data=float_data, dtype=object)
|
||||
string_data = [str(i) for i in range(start, start + nb)]
|
||||
if language:
|
||||
string_data = [generate_random_sentence(language) for _ in range(nb)]
|
||||
string_values = pd.Series(data=string_data, dtype="string")
|
||||
if ct.default_string_field_name in nullable_fields:
|
||||
null_number = int(nb * nullable_fields[ct.default_string_field_name])
|
||||
@ -1069,7 +1093,7 @@ def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_js
|
||||
|
||||
def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, multiple_dim_array=[],
|
||||
multiple_vector_field_name=[], vector_data_type="FLOAT_VECTOR", auto_id=False,
|
||||
primary_field = ct.default_int64_field_name, nullable_fields={}):
|
||||
primary_field = ct.default_int64_field_name, nullable_fields={}, language=None):
|
||||
array = []
|
||||
for i in range(start, start + nb):
|
||||
dict = {ct.default_int64_field_name: i,
|
||||
@ -1080,6 +1104,8 @@ def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_js
|
||||
}
|
||||
if with_json is False:
|
||||
dict.pop(ct.default_json_field_name, None)
|
||||
if language:
|
||||
dict[ct.default_string_field_name] = generate_random_sentence(language)
|
||||
if auto_id is True:
|
||||
if primary_field == ct.default_int64_field_name:
|
||||
dict.pop(ct.default_int64_field_name)
|
||||
@ -1281,7 +1307,7 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, w
|
||||
def gen_general_list_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
|
||||
auto_id=False, random_primary_key=False, multiple_dim_array=[],
|
||||
multiple_vector_field_name=[], primary_field=ct.default_int64_field_name,
|
||||
nullable_fields={}):
|
||||
nullable_fields={}, language=None):
|
||||
if not random_primary_key:
|
||||
int64_values = pd.Series(data=[i for i in range(start, start + nb)])
|
||||
else:
|
||||
@ -1335,6 +1361,8 @@ def gen_general_list_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0
|
||||
double_values = pd.Series(data=double_data, dtype=object)
|
||||
|
||||
string_data = [str(i) for i in range(start, start + nb)]
|
||||
if language:
|
||||
string_data = [generate_random_sentence(language) for _ in range(nb)]
|
||||
string_values = pd.Series(data=string_data, dtype="string")
|
||||
if ct.default_string_field_name in nullable_fields:
|
||||
null_number = int(nb * nullable_fields[ct.default_string_field_name])
|
||||
@ -1375,7 +1403,7 @@ def gen_general_list_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0
|
||||
|
||||
def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
|
||||
multiple_dim_array=[], multiple_vector_field_name=[], partition_id=0,
|
||||
auto_id=False, primary_field=ct.default_int64_field_name):
|
||||
auto_id=False, primary_field=ct.default_int64_field_name, language=None):
|
||||
array = []
|
||||
for i in range(start, start + nb):
|
||||
dict = {ct.default_int64_field_name: i,
|
||||
@ -1391,6 +1419,8 @@ def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, st
|
||||
}
|
||||
if with_json is False:
|
||||
dict.pop(ct.default_json_field_name, None)
|
||||
if language:
|
||||
dict[ct.default_string_field_name] = generate_random_sentence(language)
|
||||
if auto_id is True:
|
||||
if primary_field == ct.default_int64_field_name:
|
||||
dict.pop(ct.default_int64_field_name, None)
|
||||
@ -1412,7 +1442,7 @@ def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, st
|
||||
|
||||
|
||||
def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, auto_id=False,
|
||||
primary_field=ct.default_int64_field_name, nullable_fields={}):
|
||||
primary_field=ct.default_int64_field_name, nullable_fields={}, language=None):
|
||||
int_data = [i for i in range(start, start + nb)]
|
||||
int_values = pd.Series(data=int_data)
|
||||
if ct.default_int64_field_name in nullable_fields:
|
||||
@ -1430,6 +1460,8 @@ def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, star
|
||||
float_values = pd.Series(data=float_data, dtype=object)
|
||||
|
||||
string_data = [str(i) for i in range(start, start + nb)]
|
||||
if language:
|
||||
string_data = [generate_random_sentence(language) for _ in range(nb)]
|
||||
string_values = pd.Series(data=string_data, dtype="string")
|
||||
if ct.default_string_field_name in nullable_fields:
|
||||
null_number = int(nb * nullable_fields[ct.default_string_field_name])
|
||||
@ -2525,7 +2557,7 @@ def gen_partitions(collection_w, partition_num=1):
|
||||
def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_type=False,
|
||||
auto_id=False, dim=ct.default_dim, insert_offset=0, enable_dynamic_field=False, with_json=True,
|
||||
random_primary_key=False, multiple_dim_array=[], primary_field=ct.default_int64_field_name,
|
||||
vector_data_type="FLOAT_VECTOR", nullable_fields={}):
|
||||
vector_data_type="FLOAT_VECTOR", nullable_fields={}, language=None):
|
||||
"""
|
||||
target: insert non-binary/binary data
|
||||
method: insert non-binary/binary data into partitions if any
|
||||
@ -2553,7 +2585,7 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
|
||||
multiple_vector_field_name=vector_name_list,
|
||||
vector_data_type=vector_data_type,
|
||||
auto_id=auto_id, primary_field=primary_field,
|
||||
nullable_fields=nullable_fields)
|
||||
nullable_fields=nullable_fields, language=language)
|
||||
elif vector_data_type in ct.append_vector_type:
|
||||
default_data = gen_default_list_data(nb // num, dim=dim, start=start, with_json=with_json,
|
||||
random_primary_key=random_primary_key,
|
||||
@ -2561,7 +2593,7 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
|
||||
multiple_vector_field_name=vector_name_list,
|
||||
vector_data_type=vector_data_type,
|
||||
auto_id=auto_id, primary_field=primary_field,
|
||||
nullable_fields=nullable_fields)
|
||||
nullable_fields=nullable_fields, language=language)
|
||||
|
||||
else:
|
||||
default_data = gen_default_rows_data(nb // num, dim=dim, start=start, with_json=with_json,
|
||||
@ -2569,7 +2601,7 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
|
||||
multiple_vector_field_name=vector_name_list,
|
||||
vector_data_type=vector_data_type,
|
||||
auto_id=auto_id, primary_field=primary_field,
|
||||
nullable_fields=nullable_fields)
|
||||
nullable_fields=nullable_fields, language=language)
|
||||
|
||||
else:
|
||||
if not enable_dynamic_field:
|
||||
@ -2579,14 +2611,14 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
|
||||
multiple_dim_array=multiple_dim_array,
|
||||
multiple_vector_field_name=vector_name_list,
|
||||
auto_id=auto_id, primary_field=primary_field,
|
||||
nullable_fields=nullable_fields)
|
||||
nullable_fields=nullable_fields, language=language)
|
||||
elif vector_data_type == "FLOAT16_VECTOR" or "BFLOAT16_VECTOR":
|
||||
default_data = gen_general_list_all_data_type(nb // num, dim=dim, start=start, with_json=with_json,
|
||||
random_primary_key=random_primary_key,
|
||||
multiple_dim_array=multiple_dim_array,
|
||||
multiple_vector_field_name=vector_name_list,
|
||||
auto_id=auto_id, primary_field=primary_field,
|
||||
nullable_fields=nullable_fields)
|
||||
nullable_fields=nullable_fields, language=language)
|
||||
else:
|
||||
if os.path.exists(ct.rows_all_data_type_file_path + f'_{i}' + f'_dim{dim}.txt'):
|
||||
with open(ct.rows_all_data_type_file_path + f'_{i}' + f'_dim{dim}.txt', 'rb') as f:
|
||||
@ -2597,12 +2629,14 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
|
||||
multiple_dim_array=multiple_dim_array,
|
||||
multiple_vector_field_name=vector_name_list,
|
||||
partition_id=i, auto_id=auto_id,
|
||||
primary_field=primary_field)
|
||||
primary_field=primary_field,
|
||||
language=language)
|
||||
else:
|
||||
default_data, binary_raw_data = gen_default_binary_dataframe_data(nb // num, dim=dim, start=start,
|
||||
auto_id=auto_id,
|
||||
primary_field=primary_field,
|
||||
nullable_fields=nullable_fields)
|
||||
nullable_fields=nullable_fields,
|
||||
language=language)
|
||||
binary_raw_vectors.extend(binary_raw_data)
|
||||
insert_res = collection_w.insert(default_data, par[i].name)[0]
|
||||
log.info(f"inserted {nb // num} data into collection {collection_w.name}")
|
||||
|
||||
@ -387,11 +387,11 @@ class TestInsertParams(TestcaseBase):
|
||||
"""
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
collection_w = self.init_collection_wrap(name=c_name)
|
||||
data = cf.gen_default_list_data(nb=100)
|
||||
data[0][1] = 1.0
|
||||
data = cf.gen_default_rows_data(nb=100)
|
||||
data[0][ct.default_int64_field_name] = 1.0
|
||||
error = {ct.err_code: 999,
|
||||
ct.err_msg: "The Input data type is inconsistent with defined schema, {%s} field should be a int64, "
|
||||
"but got a {<class 'int'>} instead." % ct.default_int64_field_name}
|
||||
"but got a {<class 'float'>} instead." % ct.default_int64_field_name}
|
||||
collection_w.insert(data, check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
|
||||
|
||||
@ -2616,6 +2616,7 @@ class TestQueryOperation(TestcaseBase):
|
||||
ct.default_search_params, limit, multi_exprs)
|
||||
assert(check_res == True)
|
||||
|
||||
|
||||
class TestQueryString(TestcaseBase):
|
||||
"""
|
||||
******************************************************************
|
||||
@ -2726,14 +2727,14 @@ class TestQueryString(TestcaseBase):
|
||||
time.sleep(1)
|
||||
collection_w.load()
|
||||
expression = 'varchar like "0%"'
|
||||
result , _ = collection_w.query(expression, output_fields=['varchar'])
|
||||
result, _ = collection_w.query(expression, output_fields=['varchar'])
|
||||
res_len = len(result)
|
||||
collection_w.release()
|
||||
collection_w.alter_index("bitmap_offset_cache", {'indexoffsetcache.enabled': True})
|
||||
collection_w.create_index("varchar", index_name="bitmap_offset_cache", index_params={"index_type": "BITMAP"})
|
||||
collection_w.load()
|
||||
expression = 'varchar like "0%"'
|
||||
result , _ = collection_w.query(expression, output_fields=['varchar'])
|
||||
result, _ = collection_w.query(expression, output_fields=['varchar'])
|
||||
res_len_new = len(result)
|
||||
assert res_len_new == res_len
|
||||
collection_w.release()
|
||||
@ -2741,7 +2742,7 @@ class TestQueryString(TestcaseBase):
|
||||
collection_w.create_index("varchar", index_name="bitmap_offset_cache", index_params={"index_type": "BITMAP"})
|
||||
collection_w.load()
|
||||
expression = 'varchar like "0%"'
|
||||
result , _ = collection_w.query(expression, output_fields=['varchar'])
|
||||
result, _ = collection_w.query(expression, output_fields=['varchar'])
|
||||
res_len_new = len(result)
|
||||
assert res_len_new == res_len
|
||||
collection_w.release()
|
||||
@ -2824,12 +2825,13 @@ class TestQueryString(TestcaseBase):
|
||||
collection_w, vectors = self.init_collection_general(prefix, insert_data=True,is_index=False,
|
||||
primary_field=default_int_field_name)[0:2]
|
||||
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="query_expr_pre_index")
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
|
||||
index_name="query_expr_pre_index")
|
||||
collection_w.create_index("varchar", index_name="bitmap_auto_index", index_params={"index_type": "BITMAP"})
|
||||
time.sleep(1)
|
||||
collection_w.load()
|
||||
expression = 'varchar like "%0%"'
|
||||
result , _ = collection_w.query(expression, output_fields=['varchar'])
|
||||
result, _ = collection_w.query(expression, output_fields=['varchar'])
|
||||
res_len = len(result)
|
||||
collection_w.release()
|
||||
collection_w.drop_index(index_name="varchar_bitmap_index")
|
||||
@ -2838,7 +2840,6 @@ class TestQueryString(TestcaseBase):
|
||||
res_len_1 = len(result)
|
||||
assert res_len_1 == res_len
|
||||
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_query_string_with_invalid_prefix_expr(self):
|
||||
"""
|
||||
@ -3062,6 +3063,8 @@ class TestQueryString(TestcaseBase):
|
||||
res, _ = collection_w.query(expr, output_fields=output_fields)
|
||||
|
||||
assert len(res) == 4
|
||||
|
||||
|
||||
class TestQueryArray(TestcaseBase):
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
|
||||
@ -1495,13 +1495,10 @@ class TestCollectionSearch(TestcaseBase):
|
||||
"""
|
||||
# 1. initialize collection with random primary key
|
||||
collection_w, _vectors, _, insert_ids, time_stamp = \
|
||||
self.init_collection_general(
|
||||
prefix, True, 10, random_primary_key=random_primary_key)[0:5]
|
||||
self.init_collection_general(prefix, True, 10, random_primary_key=random_primary_key,
|
||||
language="Russian")[0:5]
|
||||
# 2. search
|
||||
log.info("test_search_random_primary_key: searching collection %s" %
|
||||
collection_w.name)
|
||||
vectors = [[random.random() for _ in range(default_dim)]
|
||||
for _ in range(default_nq)]
|
||||
log.info("test_search_random_primary_key: searching collection %s" % collection_w.name)
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp,
|
||||
@ -5272,6 +5269,8 @@ class TestSearchDSL(TestcaseBase):
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": ct.default_top_k})
|
||||
|
||||
|
||||
class TestSearchArray(TestcaseBase):
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@ -5367,25 +5366,28 @@ class TestSearchString(TestcaseBase):
|
||||
# 1. initialize with data
|
||||
auto_id = True
|
||||
enable_dynamic_field = False
|
||||
collection_w, _, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, auto_id=auto_id, dim=default_dim,
|
||||
enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
collection_w, insert_data, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, auto_id=auto_id, dim=default_dim, nb=1000,
|
||||
enable_dynamic_field=enable_dynamic_field, language="Chinese")[0:4]
|
||||
search_str = insert_data[0][default_string_field_name][1]
|
||||
search_exp = f"{default_string_field_name} == '{search_str}'"
|
||||
# 2. search
|
||||
log.info("test_search_string_field_not_primary: searching collection %s" %
|
||||
collection_w.name)
|
||||
vectors = [[random.random() for _ in range(default_dim)]
|
||||
for _ in range(default_nq)]
|
||||
log.info("test_search_string_field_not_primary: searching collection %s" % collection_w.name)
|
||||
log.info("search expr: %s" % search_exp)
|
||||
output_fields = [default_string_field_name, default_float_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_string_exp,
|
||||
output_fields=output_fields,
|
||||
_async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async})
|
||||
res, _ = collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit, search_exp,
|
||||
output_fields=output_fields,
|
||||
_async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": 1,
|
||||
"_async": _async})
|
||||
if _async:
|
||||
res.done()
|
||||
res = res.result()
|
||||
assert res[0][0].entity.varchar == search_str
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_string_field_is_primary_true(self, _async):
|
||||
@ -5399,25 +5401,29 @@ class TestSearchString(TestcaseBase):
|
||||
# 1. initialize with data
|
||||
dim = 64
|
||||
enable_dynamic_field = True
|
||||
collection_w, _, _, insert_ids = \
|
||||
collection_w, insert_data, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, dim=dim, primary_field=ct.default_string_field_name,
|
||||
enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
enable_dynamic_field=enable_dynamic_field, language="English", nb=1000)[0:4]
|
||||
search_str = insert_data[0][1][default_string_field_name]
|
||||
search_exp = f"{default_string_field_name} == '{search_str}'"
|
||||
# 2. search
|
||||
log.info("test_search_string_field_is_primary_true: searching collection %s" %
|
||||
collection_w.name)
|
||||
vectors = [[random.random() for _ in range(dim)]
|
||||
for _ in range(default_nq)]
|
||||
log.info("test_search_string_field_is_primary_true: searching collection %s" % collection_w.name)
|
||||
log.info("search expr: %s" % search_exp)
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
output_fields = [default_string_field_name, default_float_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_string_exp,
|
||||
output_fields=output_fields,
|
||||
_async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async})
|
||||
res, _ = collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit, search_exp,
|
||||
output_fields=output_fields,
|
||||
_async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": 1,
|
||||
"_async": _async})
|
||||
if _async:
|
||||
res.done()
|
||||
res = res.result()
|
||||
assert res[0][0].entity.varchar == search_str
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_string_field_is_primary_true_multi_vector_fields(self, _async):
|
||||
@ -5435,7 +5441,7 @@ class TestSearchString(TestcaseBase):
|
||||
collection_w, _, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, dim=dim, primary_field=ct.default_string_field_name,
|
||||
enable_dynamic_field=enable_dynamic_field,
|
||||
multiple_dim_array=multiple_dim_array)[0:4]
|
||||
multiple_dim_array=multiple_dim_array, language="German")[0:4]
|
||||
# 2. search
|
||||
log.info("test_search_string_field_is_primary_true: searching collection %s" %
|
||||
collection_w.name)
|
||||
@ -5926,6 +5932,48 @@ class TestSearchString(TestcaseBase):
|
||||
"limit": default_limit,
|
||||
"_async": _async})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_string_different_language(self):
|
||||
"""
|
||||
target: test search with string expr using different language
|
||||
method: create collection and insert data
|
||||
create index and collection load
|
||||
collection search uses string expr in string field
|
||||
expected: Search successfully
|
||||
"""
|
||||
# 1. initialize with data
|
||||
_async = random.choice([True, False])
|
||||
auto_id = random.choice([True, False])
|
||||
enable_dynamic_field = random.choice([True, False])
|
||||
all_language = ["English", "French", "Spanish", "German", "Italian", "Portuguese", "Russian", "Chinese",
|
||||
"Japanese", "Arabic", "Hindi"]
|
||||
language = random.choice(all_language)
|
||||
log.info(f"_async: {_async}, auto_id: {auto_id}, enable_dynamic_field: {enable_dynamic_field},"
|
||||
f"language: {language}")
|
||||
collection_w, insert_data, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, auto_id=auto_id, nb=100,
|
||||
enable_dynamic_field=enable_dynamic_field, language=language)[0:4]
|
||||
search_str = insert_data[0][default_string_field_name][1] if not enable_dynamic_field \
|
||||
else insert_data[0][1][default_string_field_name]
|
||||
search_exp = f"{default_string_field_name} == '{search_str}'"
|
||||
# 2. search
|
||||
log.info("test_search_string_field_not_primary: searching collection %s" % collection_w.name)
|
||||
log.info("search expr: %s" % search_exp)
|
||||
output_fields = [default_string_field_name, default_float_field_name]
|
||||
res, _ = collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit, search_exp,
|
||||
output_fields=output_fields,
|
||||
_async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": 1,
|
||||
"_async": _async})
|
||||
if _async:
|
||||
res.done()
|
||||
res = res.result()
|
||||
assert res[0][0].entity.varchar == search_str
|
||||
|
||||
|
||||
class TestSearchPagination(TestcaseBase):
|
||||
""" Test case of search pagination """
|
||||
@ -6802,19 +6850,16 @@ class TestSearchDiskann(TestcaseBase):
|
||||
enable_dynamic_field = True
|
||||
collection_w, _, _, ids = \
|
||||
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False,
|
||||
enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
enable_dynamic_field=enable_dynamic_field, language="French")[0:4]
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN",
|
||||
"metric_type": "COSINE", "params": {}}
|
||||
collection_w.create_index(
|
||||
ct.default_float_vec_field_name, default_index, index_name=index_name1)
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index, index_name=index_name1)
|
||||
if not enable_dynamic_field:
|
||||
index_params_one = {}
|
||||
collection_w.create_index(
|
||||
"float", index_params_one, index_name="a")
|
||||
collection_w.create_index("float", index_params_one, index_name="a")
|
||||
index_param_two = {}
|
||||
collection_w.create_index(
|
||||
"varchar", index_param_two, index_name="b")
|
||||
collection_w.create_index("varchar", index_param_two, index_name="b")
|
||||
|
||||
collection_w.load()
|
||||
tmp_expr = f'{ct.default_int64_field_name} in {[0]}'
|
||||
@ -6826,12 +6871,9 @@ class TestSearchDiskann(TestcaseBase):
|
||||
assert del_res.delete_count == half_nb
|
||||
|
||||
collection_w.delete(tmp_expr)
|
||||
default_search_params = {
|
||||
"metric_type": "COSINE", "params": {"search_list": 30}}
|
||||
vectors = [[random.random() for _ in range(dim)]
|
||||
for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name,
|
||||
default_float_field_name, default_string_field_name]
|
||||
default_search_params = {"metric_type": "COSINE", "params": {"search_list": 30}}
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp,
|
||||
@ -6841,8 +6883,7 @@ class TestSearchDiskann(TestcaseBase):
|
||||
check_items={"nq": default_nq,
|
||||
"ids": ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async}
|
||||
)
|
||||
"_async": _async})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_with_scalar_field(self, _async):
|
||||
@ -9754,7 +9795,7 @@ class TestCollectionSearchJSON(TestcaseBase):
|
||||
dim = 64
|
||||
collection_w, _, _, insert_ids, time_stamp = \
|
||||
self.init_collection_general(prefix, True, auto_id=True, dim=dim, is_flush=is_flush,
|
||||
enable_dynamic_field=enable_dynamic_field)[0:5]
|
||||
enable_dynamic_field=enable_dynamic_field, language="Hindi")[0:5]
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
|
||||
# 2. search after insert
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user