test: Add part of test cases for default and null support (#36130)

issue: #36129

Signed-off-by: binbin lv <binbin.lv@zilliz.com>
This commit is contained in:
binbin 2024-09-11 10:59:07 +08:00 committed by GitHub
parent dbe03a6151
commit d9c8d1ea90
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 516 additions and 79 deletions

View File

@ -242,7 +242,7 @@ class TestcaseBase(Base):
primary_field=ct.default_int64_field_name, is_flush=True, name=None,
enable_dynamic_field=False, with_json=True, random_primary_key=False,
multiple_dim_array=[], is_partition_key=None, vector_data_type="FLOAT_VECTOR",
**kwargs):
nullable_fields={}, default_value_fields={}, **kwargs):
"""
target: create specified collections
method: 1. create collections (binary/non-binary, default/all data type, auto_id or not)
@ -250,6 +250,8 @@ class TestcaseBase(Base):
3. insert specified (binary/non-binary, default/all data type) data
into each partition if any
4. not load if specifying is_index as True
5. enable insert null data: nullable_fields = {"nullable_fields_name": null data percent}
6. enable insert default value: default_value_fields = {"default_fields_name": default value}
expected: return collection and raw data, insert ids
"""
log.info("Test case of search interface: initialize before test case")
@ -258,6 +260,12 @@ class TestcaseBase(Base):
collection_name = cf.gen_unique_str(prefix)
if name is not None:
collection_name = name
if not isinstance(nullable_fields, dict):
log.error("nullable_fields should a dict like {'nullable_fields_name': null data percent}")
assert False
if not isinstance(default_value_fields, dict):
log.error("default_value_fields should a dict like {'default_fields_name': default value}")
assert False
vectors = []
binary_raw_vectors = []
insert_ids = []
@ -267,21 +275,29 @@ class TestcaseBase(Base):
enable_dynamic_field=enable_dynamic_field,
with_json=with_json, multiple_dim_array=multiple_dim_array,
is_partition_key=is_partition_key,
vector_data_type=vector_data_type)
vector_data_type=vector_data_type,
nullable_fields=nullable_fields,
default_value_fields=default_value_fields)
if is_binary:
default_schema = cf.gen_default_binary_collection_schema(auto_id=auto_id, dim=dim,
primary_field=primary_field)
primary_field=primary_field,
nullable_fields=nullable_fields,
default_value_fields=default_value_fields)
if vector_data_type == ct.sparse_vector:
default_schema = cf.gen_default_sparse_schema(auto_id=auto_id, primary_field=primary_field,
enable_dynamic_field=enable_dynamic_field,
with_json=with_json,
multiple_dim_array=multiple_dim_array)
multiple_dim_array=multiple_dim_array,
nullable_fields=nullable_fields,
default_value_fields=default_value_fields)
if is_all_data_type:
default_schema = cf.gen_collection_schema_all_datatype(auto_id=auto_id, dim=dim,
primary_field=primary_field,
enable_dynamic_field=enable_dynamic_field,
with_json=with_json,
multiple_dim_array=multiple_dim_array)
multiple_dim_array=multiple_dim_array,
nullable_fields=nullable_fields,
default_value_fields=default_value_fields)
log.info("init_collection_general: collection creation")
collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema, **kwargs)
vector_name_list = cf.extract_vector_field_name_list(collection_w)
@ -294,7 +310,8 @@ class TestcaseBase(Base):
cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id,
dim=dim, enable_dynamic_field=enable_dynamic_field, with_json=with_json,
random_primary_key=random_primary_key, multiple_dim_array=multiple_dim_array,
primary_field=primary_field, vector_data_type=vector_data_type)
primary_field=primary_field, vector_data_type=vector_data_type,
nullable_fields=nullable_fields)
if is_flush:
assert collection_w.is_empty is False
assert collection_w.num_entities == nb

View File

@ -347,34 +347,54 @@ def gen_sparse_vec_field(name=ct.default_sparse_vec_field_name, is_primary=False
def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=True,
multiple_dim_array=[], is_partition_key=None, vector_data_type="FLOAT_VECTOR",
**kwargs):
if enable_dynamic_field:
if primary_field is ct.default_int64_field_name:
if is_partition_key is None:
fields = [gen_int64_field(), gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)]
else:
fields = [gen_int64_field(is_partition_key=(is_partition_key == ct.default_int64_field_name)),
gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)]
elif primary_field is ct.default_string_field_name:
if is_partition_key is None:
fields = [gen_string_field(), gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)]
else:
fields = [gen_string_field(is_partition_key=(is_partition_key == ct.default_string_field_name)),
gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)]
else:
log.error("Primary key only support int or varchar")
assert False
nullable_fields={}, default_value_fields={}, **kwargs):
# gen primary key field
if default_value_fields.get(ct.default_int64_field_name) is None:
int64_field = gen_int64_field(is_partition_key=(is_partition_key == ct.default_int64_field_name),
nullable=(ct.default_int64_field_name in nullable_fields))
else:
if is_partition_key is None:
int64_field = gen_int64_field()
vchar_field = gen_string_field()
int64_field = gen_int64_field(is_partition_key=(is_partition_key == ct.default_int64_field_name),
nullable=(ct.default_int64_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_int64_field_name))
if default_value_fields.get(ct.default_string_field_name) is None:
string_field = gen_string_field(is_partition_key=(is_partition_key == ct.default_string_field_name),
nullable=(ct.default_string_field_name in nullable_fields))
else:
string_field = gen_string_field(is_partition_key=(is_partition_key == ct.default_string_field_name),
nullable=(ct.default_string_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_string_field_name))
# gen vector field
if default_value_fields.get(ct.default_float_vec_field_name) is None:
float_vector_field = gen_float_vec_field(dim=dim, vector_data_type=vector_data_type,
nullable=(ct.default_float_vec_field_name in nullable_fields))
else:
float_vector_field = gen_float_vec_field(dim=dim, vector_data_type=vector_data_type,
nullable=(ct.default_float_vec_field_name in nullable_fields),
default_value=default_value_fields.get(
ct.default_float_vec_field_name))
if primary_field is ct.default_int64_field_name:
fields = [int64_field]
elif primary_field is ct.default_string_field_name:
fields = [string_field]
else:
log.error("Primary key only support int or varchar")
assert False
if enable_dynamic_field:
fields.append(float_vector_field)
else:
if default_value_fields.get(ct.default_float_field_name) is None:
float_field = gen_float_field(nullable=(ct.default_float_field_name in nullable_fields))
else:
int64_field = gen_int64_field(is_partition_key=(is_partition_key == ct.default_int64_field_name))
vchar_field = gen_string_field(is_partition_key=(is_partition_key == ct.default_string_field_name))
fields = [int64_field, gen_float_field(), vchar_field, gen_json_field(),
gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)]
float_field = gen_float_field(nullable=(ct.default_float_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_float_field_name))
if default_value_fields.get(ct.default_json_field_name) is None:
json_field = gen_json_field(nullable=(ct.default_json_field_name in nullable_fields))
else:
json_field = gen_json_field(nullable=(ct.default_json_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_json_field_name))
fields = [int64_field, float_field, string_field, json_field, float_vector_field]
if with_json is False:
fields.remove(gen_json_field())
fields.remove(json_field)
if len(multiple_dim_array) != 0:
for other_dim in multiple_dim_array:
@ -496,32 +516,96 @@ def gen_multiple_json_default_collection_schema(description=ct.default_desc, pri
return schema
def gen_collection_schema_all_datatype(description=ct.default_desc,
primary_field=ct.default_int64_field_name,
auto_id=False, dim=ct.default_dim,
enable_dynamic_field=False, with_json=True, multiple_dim_array=[], **kwargs):
def gen_collection_schema_all_datatype(description=ct.default_desc, primary_field=ct.default_int64_field_name,
auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=True,
multiple_dim_array=[], nullable_fields={}, default_value_fields={},
**kwargs):
# gen primary key field
if default_value_fields.get(ct.default_int64_field_name) is None:
int64_field = gen_int64_field()
else:
int64_field = gen_int64_field(default_value=default_value_fields.get(ct.default_int64_field_name))
if enable_dynamic_field:
fields = [gen_int64_field()]
else:
fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(),
gen_bool_field(), gen_float_field(), gen_double_field(), gen_string_field(),
gen_json_field()]
if default_value_fields.get(ct.default_int32_field_name) is None:
int32_field = gen_int32_field(nullable=(ct.default_int32_field_name in nullable_fields))
else:
int32_field = gen_int32_field(nullable=(ct.default_int32_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_int32_field_name))
if default_value_fields.get(ct.default_int16_field_name) is None:
int16_field = gen_int16_field(nullable=(ct.default_int16_field_name in nullable_fields))
else:
int16_field = gen_int16_field(nullable=(ct.default_int16_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_int16_field_name))
if default_value_fields.get(ct.default_int8_field_name) is None:
int8_field = gen_int8_field(nullable=(ct.default_int8_field_name in nullable_fields))
else:
int8_field = gen_int8_field(nullable=(ct.default_int8_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_int8_field_name))
if default_value_fields.get(ct.default_bool_field_name) is None:
bool_field = gen_bool_field(nullable=(ct.default_bool_field_name in nullable_fields))
else:
bool_field = gen_bool_field(nullable=(ct.default_bool_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_bool_field_name))
if default_value_fields.get(ct.default_float_field_name) is None:
float_field = gen_float_field(nullable=(ct.default_float_field_name in nullable_fields))
else:
float_field = gen_float_field(nullable=(ct.default_float_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_float_field_name))
if default_value_fields.get(ct.default_double_field_name) is None:
double_field = gen_double_field(nullable=(ct.default_double_field_name in nullable_fields))
else:
double_field = gen_double_field(nullable=(ct.default_double_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_double_field_name))
if default_value_fields.get(ct.default_string_field_name) is None:
string_field = gen_string_field(nullable=(ct.default_string_field_name in nullable_fields))
else:
string_field = gen_string_field(nullable=(ct.default_string_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_string_field_name))
if default_value_fields.get(ct.default_json_field_name) is None:
json_field = gen_json_field(nullable=(ct.default_json_field_name in nullable_fields))
else:
json_field = gen_json_field(nullable=(ct.default_json_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_json_field_name))
fields = [int64_field, int32_field, int16_field, int8_field, bool_field,
float_field, double_field, string_field, json_field]
if with_json is False:
fields.remove(gen_json_field())
fields.remove(json_field)
if len(multiple_dim_array) == 0:
fields.append(gen_float_vec_field(dim=dim))
# gen vector field
if default_value_fields.get(ct.default_float_vec_field_name) is None:
float_vector_field = gen_float_vec_field(dim=dim)
else:
float_vector_field = gen_float_vec_field(dim=dim,
default_value=default_value_fields.get(ct.default_float_vec_field_name))
fields.append(float_vector_field)
else:
multiple_dim_array.insert(0, dim)
for i in range(len(multiple_dim_array)):
if ct.append_vector_type[i%3] != ct.sparse_vector:
fields.append(gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3]}",
dim=multiple_dim_array[i],
vector_data_type=ct.append_vector_type[i%3]))
if default_value_fields.get(ct.append_vector_type[i%3]) is None:
vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3]}",
dim=multiple_dim_array[i],
vector_data_type=ct.append_vector_type[i%3])
else:
vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3]}",
dim=multiple_dim_array[i],
vector_data_type=ct.append_vector_type[i%3],
default_value=default_value_fields.get(ct.append_vector_type[i%3]))
fields.append(vector_field)
else:
# The field of a sparse vector cannot be dimensioned
fields.append(gen_float_vec_field(name=f"multiple_vector_{ct.sparse_vector}",
vector_data_type=ct.sparse_vector))
if default_value_fields.get(ct.default_sparse_vec_field_name) is None:
sparse_vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.sparse_vector}",
vector_data_type=ct.sparse_vector)
else:
sparse_vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.sparse_vector}",
vector_data_type=ct.sparse_vector,
default_value=default_value_fields.get(ct.default_sparse_vec_field_name))
fields.append(sparse_vector_field)
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id,
@ -536,8 +620,29 @@ def gen_collection_schema(fields, primary_field=None, description=ct.default_des
def gen_default_binary_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
auto_id=False, dim=ct.default_dim, **kwargs):
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_binary_vec_field(dim=dim)]
auto_id=False, dim=ct.default_dim, nullable_fields={}, default_value_fields={},
**kwargs):
if default_value_fields.get(ct.default_int64_field_name) is None:
int64_field = gen_int64_field(nullable=(ct.default_int64_field_name in nullable_fields))
else:
int64_field = gen_int64_field(nullable=(ct.default_int64_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_int64_field_name))
if default_value_fields.get(ct.default_float_field_name) is None:
float_field = gen_float_field(nullable=(ct.default_float_field_name in nullable_fields))
else:
float_field = gen_float_field(nullable=(ct.default_float_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_float_field_name))
if default_value_fields.get(ct.default_string_field_name) is None:
string_field = gen_string_field(nullable=(ct.default_string_field_name in nullable_fields))
else:
string_field = gen_string_field(nullable=(ct.default_string_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_string_field_name))
if default_value_fields.get(ct.default_binary_vec_field_name) is None:
binary_vec_field = gen_binary_vec_field(dim=dim, nullable=(ct.default_binary_vec_field_name in nullable_fields))
else:
binary_vec_field = gen_binary_vec_field(dim=dim, nullable=(ct.default_binary_vec_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_binary_vec_field_name))
fields = [int64_field, float_field, string_field, binary_vec_field]
binary_schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field,
auto_id=auto_id, **kwargs)
@ -545,11 +650,37 @@ def gen_default_binary_collection_schema(description=ct.default_desc, primary_fi
def gen_default_sparse_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
auto_id=False, with_json=False, multiple_dim_array=[], **kwargs):
auto_id=False, with_json=False, multiple_dim_array=[], nullable_fields={},
default_value_fields={}, **kwargs):
if default_value_fields.get(ct.default_int64_field_name) is None:
int64_field = gen_int64_field(nullable=(ct.default_int64_field_name in nullable_fields))
else:
int64_field = gen_int64_field(nullable=(ct.default_int64_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_int64_field_name))
if default_value_fields.get(ct.default_float_field_name) is None:
float_field = gen_float_field(nullable=(ct.default_float_field_name in nullable_fields))
else:
float_field = gen_float_field(nullable=(ct.default_float_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_float_field_name))
if default_value_fields.get(ct.default_string_field_name) is None:
string_field = gen_string_field(nullable=(ct.default_string_field_name in nullable_fields))
else:
string_field = gen_string_field(nullable=(ct.default_string_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_string_field_name))
if default_value_fields.get(ct.default_sparse_vec_field_name) is None:
sparse_vec_field = gen_sparse_vec_field(nullable=(ct.default_sparse_vec_field_name in nullable_fields))
else:
sparse_vec_field = gen_sparse_vec_field(nullable=(ct.default_sparse_vec_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_sparse_vec_field_name))
fields = [int64_field, float_field, string_field, sparse_vec_field]
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_sparse_vec_field()]
if with_json:
fields.insert(-1, gen_json_field())
if default_value_fields.get(ct.default_json_field_name) is None:
json_field = gen_json_field(nullable=(ct.default_json_field_name in nullable_fields))
else:
json_field = gen_json_field(nullable=(ct.default_json_field_name in nullable_fields),
default_value=default_value_fields.get(ct.default_json_field_name))
fields.insert(-1, json_field)
if len(multiple_dim_array) != 0:
for i in range(len(multiple_dim_array)):
@ -616,14 +747,36 @@ def gen_binary_vectors(num, dim):
def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[],
vector_data_type="FLOAT_VECTOR", auto_id=False, primary_field = ct.default_int64_field_name):
vector_data_type="FLOAT_VECTOR", auto_id=False,
primary_field = ct.default_int64_field_name, nullable_fields={}):
if not random_primary_key:
int_values = pd.Series(data=[i for i in range(start, start + nb)])
else:
int_values = pd.Series(data=random.sample(range(start, start + nb), nb))
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
float_data = [np.float32(i) for i in range(start, start + nb)]
float_values = pd.Series(data=float_data, dtype="float32")
if ct.default_float_field_name in nullable_fields:
null_number = int(nb*nullable_fields[ct.default_float_field_name])
null_data = [None for _ in range(null_number)]
float_data = float_data[:nb-null_number] + null_data
log.debug(float_data)
float_values = pd.Series(data=float_data, dtype=object)
string_data = [str(i) for i in range(start, start + nb)]
string_values = pd.Series(data=string_data, dtype="string")
if ct.default_string_field_name in nullable_fields:
null_number = int(nb*nullable_fields[ct.default_string_field_name])
null_data = [None for _ in range(null_number)]
string_data = string_data[:nb-null_number] + null_data
string_values = pd.Series(data=string_data, dtype=object)
json_values = [{"number": i, "float": i*1.0} for i in range(start, start + nb)]
if ct.default_json_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_json_field_name])
null_data = [{"number": None, "float": None} for _ in range(null_number)]
json_values = json_values[:nb-null_number] + null_data
float_vec_values = gen_vectors(nb, dim, vector_data_type=vector_data_type)
df = pd.DataFrame({
ct.default_int64_field_name: int_values,
@ -655,15 +808,31 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi
def gen_general_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[],
vector_data_type="FLOAT_VECTOR", auto_id=False,
primary_field=ct.default_int64_field_name):
primary_field=ct.default_int64_field_name, nullable_fields={}):
insert_list = []
if not random_primary_key:
int_values = pd.Series(data=[i for i in range(start, start + nb)])
else:
int_values = pd.Series(data=random.sample(range(start, start + nb), nb))
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
float_data = [np.float32(i) for i in range(start, start + nb)]
float_values = pd.Series(data=float_data, dtype="float32")
if ct.default_float_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_float_field_name])
null_data = [None for _ in range(null_number)]
float_data = float_data[:nb - null_number] + null_data
float_values = pd.Series(data=float_data, dtype=object)
string_data = [str(i) for i in range(start, start + nb)]
string_values = pd.Series(data=string_data, dtype="string")
if ct.default_string_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_string_field_name])
null_data = [None for _ in range(null_number)]
string_data = string_data[:nb - null_number] + null_data
string_values = pd.Series(data=string_data, dtype=object)
json_values = [{"number": i, "float": i*1.0} for i in range(start, start + nb)]
if ct.default_json_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_json_field_name])
null_data = [{"number": None, "float": None} for _ in range(null_number)]
json_values = json_values[:nb-null_number] + null_data
float_vec_values = gen_vectors(nb, dim, vector_data_type=vector_data_type)
insert_list = [int_values, float_values, string_values]
@ -691,7 +860,7 @@ def gen_general_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0,
def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, multiple_dim_array=[],
multiple_vector_field_name=[], vector_data_type="FLOAT_VECTOR", auto_id=False,
primary_field = ct.default_int64_field_name):
primary_field = ct.default_int64_field_name, nullable_fields={}):
array = []
for i in range(start, start + nb):
dict = {ct.default_int64_field_name: i,
@ -712,6 +881,23 @@ def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_js
for i in range(len(multiple_dim_array)):
dict[multiple_vector_field_name[i]] = gen_vectors(1, multiple_dim_array[i],
vector_data_type=vector_data_type)[0]
if ct.default_int64_field_name in nullable_fields:
null_number = int(nb*nullable_fields[ct.default_int64_field_name])
for single_dict in array[-null_number:]:
single_dict[ct.default_int64_field_name] = None
if ct.default_float_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_float_field_name])
for single_dict in array[-null_number:]:
single_dict[ct.default_float_field_name] = None
if ct.default_string_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_string_field_name])
for single_dict in array[-null_number:]:
single_dict[ct.default_string_field_name] = None
if ct.default_json_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_json_field_name])
for single_dict in array[-null_number:]:
single_dict[ct.default_string_field_name] = {"number": None, "float": None}
log.debug("generated default row data")
return array
@ -885,20 +1071,75 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, w
def gen_general_list_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
auto_id=False, random_primary_key=False, multiple_dim_array=[],
multiple_vector_field_name=[], primary_field=ct.default_int64_field_name):
multiple_vector_field_name=[], primary_field=ct.default_int64_field_name,
nullable_fields={}):
if not random_primary_key:
int64_values = pd.Series(data=[i for i in range(start, start + nb)])
else:
int64_values = pd.Series(data=random.sample(range(start, start + nb), nb))
int32_values = pd.Series(data=[np.int32(i) for i in range(start, start + nb)], dtype="int32")
int16_values = pd.Series(data=[np.int16(i) for i in range(start, start + nb)], dtype="int16")
int8_values = pd.Series(data=[np.int8(i) for i in range(start, start + nb)], dtype="int8")
bool_values = pd.Series(data=[np.bool_(i) for i in range(start, start + nb)], dtype="bool")
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
double_values = pd.Series(data=[np.double(i) for i in range(start, start + nb)], dtype="double")
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
int32_data = [np.int32(i) for i in range(start, start + nb)]
int32_values = pd.Series(data=int32_data, dtype="int32")
if ct.default_int32_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_int32_field_name])
null_data = [None for _ in range(null_number)]
int32_data = int32_data[:nb - null_number] + null_data
int32_values = pd.Series(data=int32_data, dtype=object)
int16_data = [np.int16(i) for i in range(start, start + nb)]
int16_values = pd.Series(data=int16_data, dtype="int16")
if ct.default_int16_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_int16_field_name])
null_data = [None for _ in range(null_number)]
int16_data = int16_data[:nb - null_number] + null_data
int16_values = pd.Series(data=int16_data, dtype=object)
int8_data = [np.int8(i) for i in range(start, start + nb)]
int8_values = pd.Series(data=int8_data, dtype="int8")
if ct.default_int8_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_int8_field_name])
null_data = [None for _ in range(null_number)]
int8_data = int8_data[:nb - null_number] + null_data
int8_values = pd.Series(data=int8_data, dtype=object)
bool_data = [np.bool_(i) for i in range(start, start + nb)]
bool_values = pd.Series(data=bool_data, dtype="bool")
if ct.default_bool_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_bool_field_name])
null_data = [None for _ in range(null_number)]
bool_data = bool_data[:nb - null_number] + null_data
bool_values = pd.Series(data=bool_data, dtype=object)
float_data = [np.float32(i) for i in range(start, start + nb)]
float_values = pd.Series(data=float_data, dtype="float32")
if ct.default_float_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_float_field_name])
null_data = [None for _ in range(null_number)]
float_data = float_data[:nb - null_number] + null_data
float_values = pd.Series(data=float_data, dtype=object)
double_data = [np.double(i) for i in range(start, start + nb)]
double_values = pd.Series(data=double_data, dtype="double")
if ct.default_double_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_double_field_name])
null_data = [None for _ in range(null_number)]
double_data = double_data[:nb - null_number] + null_data
double_values = pd.Series(data=double_data, dtype=object)
string_data = [str(i) for i in range(start, start + nb)]
string_values = pd.Series(data=string_data, dtype="string")
if ct.default_string_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_string_field_name])
null_data = [None for _ in range(null_number)]
string_data = string_data[:nb - null_number] + null_data
string_values = pd.Series(data=string_data, dtype=object)
json_values = [{"number": i, "string": str(i), "bool": bool(i),
"list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(start, start + nb)]
if ct.default_json_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_json_field_name])
null_data = [{"number": None, "string": None, "bool": None,
"list": [None for _ in range(i, i + ct.default_json_list_length)]} for i in range(null_number)]
json_values = json_values[:nb - null_number] + null_data
float_vec_values = gen_vectors(nb, dim)
insert_list = [int64_values, int32_values, int16_values, int8_values, bool_values, float_values, double_values,
string_values, json_values]
@ -962,10 +1203,31 @@ def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, st
def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, auto_id=False,
primary_field=ct.default_int64_field_name):
int_values = pd.Series(data=[i for i in range(start, start + nb)])
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
primary_field=ct.default_int64_field_name, nullable_fields={}):
int_data = [i for i in range(start, start + nb)]
int_values = pd.Series(data=int_data)
if ct.default_int64_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_int64_field_name])
null_data = [None for _ in range(null_number)]
int_data = int_data[:nb - null_number] + null_data
int_values = pd.Series(data=int_data, dtype=object)
float_data = [np.float32(i) for i in range(start, start + nb)]
float_values = pd.Series(data=float_data, dtype="float32")
if ct.default_float_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_float_field_name])
null_data = [None for _ in range(null_number)]
float_data = float_data[:nb - null_number] + null_data
float_values = pd.Series(data=float_data, dtype=object)
string_data = [str(i) for i in range(start, start + nb)]
string_values = pd.Series(data=string_data, dtype="string")
if ct.default_string_field_name in nullable_fields:
null_number = int(nb * nullable_fields[ct.default_string_field_name])
null_data = [None for _ in range(null_number)]
string_data = string_data[:nb - null_number] + null_data
string_values = pd.Series(data=string_data, dtype=object)
binary_raw_values, binary_vec_values = gen_binary_vectors(nb, dim)
df = pd.DataFrame({
ct.default_int64_field_name: int_values,
@ -2012,7 +2274,7 @@ def gen_partitions(collection_w, partition_num=1):
def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_type=False,
auto_id=False, dim=ct.default_dim, insert_offset=0, enable_dynamic_field=False, with_json=True,
random_primary_key=False, multiple_dim_array=[], primary_field=ct.default_int64_field_name,
vector_data_type="FLOAT_VECTOR"):
vector_data_type="FLOAT_VECTOR", nullable_fields={}):
"""
target: insert non-binary/binary data
method: insert non-binary/binary data into partitions if any
@ -2039,21 +2301,24 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
multiple_dim_array=multiple_dim_array,
multiple_vector_field_name=vector_name_list,
vector_data_type=vector_data_type,
auto_id=auto_id, primary_field=primary_field)
auto_id=auto_id, primary_field=primary_field,
nullable_fields=nullable_fields)
elif vector_data_type in ct.append_vector_type:
default_data = gen_general_default_list_data(nb // num, dim=dim, start=start, with_json=with_json,
random_primary_key=random_primary_key,
multiple_dim_array=multiple_dim_array,
multiple_vector_field_name=vector_name_list,
vector_data_type=vector_data_type,
auto_id=auto_id, primary_field=primary_field)
auto_id=auto_id, primary_field=primary_field,
nullable_fields=nullable_fields)
else:
default_data = gen_default_rows_data(nb // num, dim=dim, start=start, with_json=with_json,
multiple_dim_array=multiple_dim_array,
multiple_vector_field_name=vector_name_list,
vector_data_type=vector_data_type,
auto_id=auto_id, primary_field=primary_field)
auto_id=auto_id, primary_field=primary_field,
nullable_fields=nullable_fields)
else:
if not enable_dynamic_field:
@ -2062,13 +2327,15 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
random_primary_key=random_primary_key,
multiple_dim_array=multiple_dim_array,
multiple_vector_field_name=vector_name_list,
auto_id=auto_id, primary_field=primary_field)
auto_id=auto_id, primary_field=primary_field,
nullable_fields=nullable_fields)
elif vector_data_type == "FLOAT16_VECTOR" or "BFLOAT16_VECTOR":
default_data = gen_general_list_all_data_type(nb // num, dim=dim, start=start, with_json=with_json,
random_primary_key=random_primary_key,
multiple_dim_array=multiple_dim_array,
multiple_vector_field_name=vector_name_list,
auto_id=auto_id, primary_field=primary_field)
auto_id=auto_id, primary_field=primary_field,
nullable_fields=nullable_fields)
else:
if os.path.exists(ct.rows_all_data_type_file_path + f'_{i}' + f'_dim{dim}.txt'):
with open(ct.rows_all_data_type_file_path + f'_{i}' + f'_dim{dim}.txt', 'rb') as f:
@ -2083,7 +2350,8 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
else:
default_data, binary_raw_data = gen_default_binary_dataframe_data(nb // num, dim=dim, start=start,
auto_id=auto_id,
primary_field=primary_field)
primary_field=primary_field,
nullable_fields=nullable_fields)
binary_raw_vectors.extend(binary_raw_data)
insert_res = collection_w.insert(default_data, par[i].name)[0]
log.info(f"inserted {nb // num} data into collection {collection_w.name}")

View File

@ -47,6 +47,7 @@ bfloat16_type = "BFLOAT16_VECTOR"
sparse_vector = "SPARSE_FLOAT_VECTOR"
append_vector_type = [float16_type, bfloat16_type, sparse_vector]
all_dense_vector_types = [float_type, float16_type, bfloat16_type]
all_vector_data_types = [float_type, float16_type, bfloat16_type, sparse_vector]
default_sparse_vec_field_name = "sparse_vector"
default_partition_name = "_default"
default_resource_group_name = '__default_resource_group'

View File

@ -4517,4 +4517,150 @@ class TestCollectionMmap(TestcaseBase):
collection_w.drop()
collection_w.set_properties({'mmap.enabled': True}, check_task=CheckTasks.err_res,
check_items={ct.err_code: 100,
ct.err_msg: f"collection not found"})
ct.err_msg: f"collection not found"})
class TestCollectionNullInvalid(TestcaseBase):
""" Test case of collection interface """
"""
******************************************************************
# The followings are invalid cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("vector_type", ct.all_vector_data_types)
def test_create_collection_set_nullable_on_pk_field(self, vector_type):
"""
target: test create collection with set nullable=True on pk field
method: create collection with multiple vector fields
expected: raise exception
"""
self._connect()
int_fields = []
c_name = cf.gen_unique_str(prefix)
# add other vector fields to maximum fields num
int_fields.append(cf.gen_int64_field(is_primary=True, nullable=True))
int_fields.append(cf.gen_float_vec_field(vector_data_type=vector_type))
schema = cf.gen_collection_schema(fields=int_fields)
error = {ct.err_code: 1100, ct.err_msg: "primary field not support null"}
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("vector_type", ct.all_vector_data_types)
def test_create_collection_set_nullable_on_vector_field(self, vector_type):
"""
target: test create collection with set nullable=True on vector field
method: create collection with multiple vector fields
expected: raise exception
"""
self._connect()
int_fields = []
c_name = cf.gen_unique_str(prefix)
# add other vector fields to maximum fields num
int_fields.append(cf.gen_int64_field(is_primary=True))
int_fields.append(cf.gen_float_vec_field(vector_data_type=vector_type, nullable=True))
schema = cf.gen_collection_schema(fields=int_fields)
error = {ct.err_code: 1100, ct.err_msg: "vector type not support null"}
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
class TestCollectionDefaultValueInvalid(TestcaseBase):
""" Test case of collection interface """
"""
******************************************************************
# The followings are invalid cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("vector_type", ct.all_vector_data_types)
def test_create_collection_default_value_on_pk_field(self, vector_type):
"""
target: test create collection with set default value on pk field
method: create collection with default value on primary key field
expected: raise exception
"""
self._connect()
int_fields = []
c_name = cf.gen_unique_str(prefix)
# add other vector fields to maximum fields num
int_fields.append(cf.gen_int64_field(is_primary=True, default_value=10))
int_fields.append(cf.gen_float_vec_field(vector_data_type=vector_type))
schema = cf.gen_collection_schema(fields=int_fields)
error = {ct.err_code: 1100, ct.err_msg: "primary field not support default_value"}
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("vector_type", ct.all_vector_data_types)
def test_create_collection_default_value_on_vector_field(self, vector_type):
"""
target: test create collection with set default value on vector field
method: create collection with default value on vector field
expected: raise exception
"""
self._connect()
int_fields = []
c_name = cf.gen_unique_str(prefix)
# add other vector fields to maximum fields num
int_fields.append(cf.gen_int64_field(is_primary=True))
int_fields.append(cf.gen_float_vec_field(vector_data_type=vector_type, default_value=10))
schema = cf.gen_collection_schema(fields=int_fields)
error = {ct.err_code: 1100, ct.err_msg: "default value type mismatches field schema type"}
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("scalar_type", ["JSON", "ARRAY"])
def test_create_collection_default_value_on_not_support_scalar_field(self, scalar_type):
"""
target: test create collection with set default value on not supported scalar field
method: create collection with default value on json and array field
expected: raise exception
"""
self._connect()
int_fields = []
c_name = cf.gen_unique_str(prefix)
# add other vector fields to maximum fields num
if scalar_type == "JSON":
int_fields.append(cf.gen_json_field(default_value=10))
if scalar_type == "ARRAY":
int_fields.append(cf.gen_array_field(default_value=10))
int_fields.append(cf.gen_int64_field(is_primary=True, default_value=10))
int_fields.append(cf.gen_float_vec_field())
schema = cf.gen_collection_schema(fields=int_fields)
error = {ct.err_code: 1100, ct.err_msg: "default value type mismatches field schema type"}
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_create_collection_non_match_default_value(self):
"""
target: test create collection with set data type not matched default value
method: create collection with data type not matched default value
expected: raise exception
"""
self._connect()
int_fields = []
c_name = cf.gen_unique_str(prefix)
# add other vector fields to maximum fields num
int_fields.append(cf.gen_int64_field(is_primary=True))
int_fields.append(cf.gen_int8_field(default_value=10.0))
int_fields.append(cf.gen_float_vec_field())
schema = cf.gen_collection_schema(fields=int_fields)
error = {ct.err_code: 1100, ct.err_msg: "default value type mismatches field schema type"}
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_create_collection_default_value_none(self):
"""
target: test create field with set None default value when nullable is false or true
method: create collection with default_value=None on one field
expected: 1. raise exception when nullable=False and default_value=None
2. create field successfully when nullable=True and default_value=None
"""
self._connect()
self.field_schema_wrap.init_field_schema(name="int8", dtype=DataType.INT8, nullable=True, default_value=None)
error = {ct.err_code: 1,
ct.err_msg: "Default value cannot be None for a field that is defined as nullable == false"}
self.field_schema_wrap.init_field_schema(name="int8_null", dtype=DataType.INT8, default_value=None,
check_task=CheckTasks.err_res, check_items=error)

View File

@ -1329,6 +1329,10 @@ class TestCollectionSearch(TestcaseBase):
def scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=[0, 0.5, 1])
def null_data_percent(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
@ -1336,7 +1340,7 @@ class TestCollectionSearch(TestcaseBase):
"""
@pytest.mark.tags(CaseLabel.L0)
def test_search_normal(self, nq, dim, auto_id, is_flush, enable_dynamic_field, vector_data_type):
def test_search_normal(self, nq, dim, auto_id, is_flush, enable_dynamic_field, vector_data_type, null_data_percent):
"""
target: test search normal case
method: create connection, collection, insert and search
@ -1346,7 +1350,8 @@ class TestCollectionSearch(TestcaseBase):
collection_w, _, _, insert_ids, time_stamp = \
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush,
enable_dynamic_field=enable_dynamic_field,
vector_data_type=vector_data_type)[0:5]
vector_data_type=vector_data_type,
nullable_fields={ct.default_float_field_name: null_data_percent})[0:5]
# 2. generate search data
vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type)
# 3. search after insert