From 6f7b2b4e759241ebe992d488050d213a47f47d45 Mon Sep 17 00:00:00 2001 From: congqixia Date: Fri, 24 Jan 2025 13:43:06 +0800 Subject: [PATCH] enhance: [2.5] Refine error msg for schema & index checking (#39533) (#39565) Cherry-pick from master pr: #39533 The error message was malformated or missing some meta info, say field name. This PR recitfies some message format and add field name in error message when type param check fails. --------- Signed-off-by: Congqi Xia --- internal/datacoord/index_service.go | 8 +++---- internal/proxy/task_index.go | 2 +- internal/proxy/util.go | 22 +++++++++---------- tests/go_client/testcases/collection_test.go | 10 +++++---- .../test_milvus_client_collection.py | 2 +- .../testcases/test_collection.py | 4 ++-- 6 files changed, 25 insertions(+), 23 deletions(-) diff --git a/internal/datacoord/index_service.go b/internal/datacoord/index_service.go index 893365d50d..087861de79 100644 --- a/internal/datacoord/index_service.go +++ b/internal/datacoord/index_service.go @@ -295,16 +295,16 @@ func ValidateIndexParams(index *model.Index) error { indexParams := funcutil.KeyValuePair2Map(index.IndexParams) userIndexParams := funcutil.KeyValuePair2Map(index.UserIndexParams) if err := indexparamcheck.ValidateMmapIndexParams(indexType, indexParams); err != nil { - return merr.WrapErrParameterInvalidMsg("invalid mmap index params", err.Error()) + return merr.WrapErrParameterInvalidMsg("invalid mmap index params: %s", err.Error()) } if err := indexparamcheck.ValidateMmapIndexParams(indexType, userIndexParams); err != nil { - return merr.WrapErrParameterInvalidMsg("invalid mmap user index params", err.Error()) + return merr.WrapErrParameterInvalidMsg("invalid mmap user index params: %s", err.Error()) } if err := indexparamcheck.ValidateOffsetCacheIndexParams(indexType, indexParams); err != nil { - return merr.WrapErrParameterInvalidMsg("invalid offset cache index params", err.Error()) + return merr.WrapErrParameterInvalidMsg("invalid offset cache index params: %s", err.Error()) } if err := indexparamcheck.ValidateOffsetCacheIndexParams(indexType, userIndexParams); err != nil { - return merr.WrapErrParameterInvalidMsg("invalid offset cache index params", err.Error()) + return merr.WrapErrParameterInvalidMsg("invalid offset cache index params: %s", err.Error()) } return nil } diff --git a/internal/proxy/task_index.go b/internal/proxy/task_index.go index c647e10591..5aa7961d86 100644 --- a/internal/proxy/task_index.go +++ b/internal/proxy/task_index.go @@ -195,7 +195,7 @@ func (cit *createIndexTask) parseIndexParams(ctx context.Context) error { if exist && specifyIndexType != "" { if err := indexparamcheck.ValidateMmapIndexParams(specifyIndexType, indexParamsMap); err != nil { log.Ctx(ctx).Warn("Invalid mmap type params", zap.String(common.IndexTypeKey, specifyIndexType), zap.Error(err)) - return merr.WrapErrParameterInvalidMsg("invalid mmap type params", err.Error()) + return merr.WrapErrParameterInvalidMsg("invalid mmap type params: %s", err.Error()) } checker, err := indexparamcheck.GetIndexCheckerMgrInstance().GetChecker(specifyIndexType) // not enable hybrid index for user, used in milvus internally diff --git a/internal/proxy/util.go b/internal/proxy/util.go index 88b93bf776..892ef989f1 100644 --- a/internal/proxy/util.go +++ b/internal/proxy/util.go @@ -324,12 +324,12 @@ func validateDimension(field *schemapb.FieldSchema) error { } if typeutil.IsSparseFloatVectorType(field.DataType) { if exist { - return fmt.Errorf("dim should not be specified for sparse vector field %s(%d)", field.Name, field.FieldID) + return fmt.Errorf("dim should not be specified for sparse vector field %s(%d)", field.GetName(), field.FieldID) } return nil } if !exist { - return errors.New("dimension is not defined in field type params, check type param `dim` for vector field") + return errors.Newf("dimension is not defined in field type params of field %s, check type param `dim` for vector field", field.GetName()) } if dim <= 1 { @@ -338,14 +338,14 @@ func validateDimension(field *schemapb.FieldSchema) error { if typeutil.IsFloatVectorType(field.DataType) { if dim > Params.ProxyCfg.MaxDimension.GetAsInt64() { - return fmt.Errorf("invalid dimension: %d. float vector dimension should be in range 2 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt()) + return fmt.Errorf("invalid dimension: %d of field %s. float vector dimension should be in range 2 ~ %d", dim, field.GetName(), Params.ProxyCfg.MaxDimension.GetAsInt()) } } else { if dim%8 != 0 { - return fmt.Errorf("invalid dimension: %d. binary vector dimension should be multiple of 8. ", dim) + return fmt.Errorf("invalid dimension: %d of field %s. binary vector dimension should be multiple of 8. ", dim, field.GetName()) } if dim > Params.ProxyCfg.MaxDimension.GetAsInt64()*8 { - return fmt.Errorf("invalid dimension: %d. binary vector dimension should be in range 2 ~ %d", dim, Params.ProxyCfg.MaxDimension.GetAsInt()*8) + return fmt.Errorf("invalid dimension: %d of field %s. binary vector dimension should be in range 2 ~ %d", dim, field.GetName(), Params.ProxyCfg.MaxDimension.GetAsInt()*8) } } return nil @@ -365,13 +365,13 @@ func validateMaxLengthPerRow(collectionName string, field *schemapb.FieldSchema) defaultMaxVarCharLength := Params.ProxyCfg.MaxVarCharLength.GetAsInt64() if maxLengthPerRow > defaultMaxVarCharLength || maxLengthPerRow <= 0 { - return merr.WrapErrParameterInvalidMsg("the maximum length specified for a VarChar should be in (0, %d]", defaultMaxVarCharLength) + return merr.WrapErrParameterInvalidMsg("the maximum length specified for a VarChar field(%s) should be in (0, %d], but got %d instead", field.GetName(), defaultMaxVarCharLength, maxLengthPerRow) } exist = true } // if not exist type params max_length, return error if !exist { - return fmt.Errorf("type param(max_length) should be specified for varChar field of collection %s", collectionName) + return fmt.Errorf("type param(max_length) should be specified for varChar field(%s) of collection %s", field.GetName(), collectionName) } return nil @@ -386,7 +386,7 @@ func validateMaxCapacityPerRow(collectionName string, field *schemapb.FieldSchem maxCapacityPerRow, err := strconv.ParseInt(param.Value, 10, 64) if err != nil { - return fmt.Errorf("the value of %s must be an integer", common.MaxCapacityKey) + return fmt.Errorf("the value for %s of field %s must be an integer", common.MaxCapacityKey, field.GetName()) } if maxCapacityPerRow > defaultMaxArrayCapacity || maxCapacityPerRow <= 0 { return fmt.Errorf("the maximum capacity specified for a Array should be in (0, 4096]") @@ -395,7 +395,7 @@ func validateMaxCapacityPerRow(collectionName string, field *schemapb.FieldSchem } // if not exist type params max_length, return error if !exist { - return fmt.Errorf("type param(max_capacity) should be specified for array field of collection %s", collectionName) + return fmt.Errorf("type param(max_capacity) should be specified for array field %s of collection %s", field.GetName(), collectionName) } return nil @@ -410,7 +410,7 @@ func validateVectorFieldMetricType(field *schemapb.FieldSchema) error { return nil } } - return errors.New("vector float without metric_type") + return fmt.Errorf(`index param "metric_type" is not specified for index float vector %s`, field.GetName()) } func validateDuplicatedFieldName(fields []*schemapb.FieldSchema) error { @@ -418,7 +418,7 @@ func validateDuplicatedFieldName(fields []*schemapb.FieldSchema) error { for _, field := range fields { _, ok := names[field.Name] if ok { - return errors.New("duplicated field name") + return errors.Newf("duplicated field name %s found", field.GetName()) } names[field.Name] = true } diff --git a/tests/go_client/testcases/collection_test.go b/tests/go_client/testcases/collection_test.go index e5e009bb2b..37a71b4817 100644 --- a/tests/go_client/testcases/collection_test.go +++ b/tests/go_client/testcases/collection_test.go @@ -770,12 +770,14 @@ func TestCreateVectorWithoutDim(t *testing.T) { mc := createDefaultMilvusClient(ctx, t) collName := common.GenRandomString(prefix, 6) + vecFieldName := "vec" + schema := entity.NewSchema().WithField( entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)).WithField( - entity.NewField().WithName("vec").WithDataType(entity.FieldTypeFloatVector), + entity.NewField().WithName(vecFieldName).WithDataType(entity.FieldTypeFloatVector), ).WithName(collName) err := mc.CreateCollection(ctx, client.NewCreateCollectionOption(collName, schema)) - common.CheckErr(t, err, false, "dimension is not defined in field type params, check type param `dim` for vector field") + common.CheckErr(t, err, false, fmt.Sprintf("dimension is not defined in field type params of field %s, check type param `dim` for vector field", vecFieldName)) } // specify dim for sparse vector -> error @@ -836,7 +838,7 @@ func TestCreateVarcharArrayInvalidLength(t *testing.T) { for _, invalidLength := range []int64{-1, 0, common.MaxLength + 1} { arrayVarcharField.WithMaxLength(invalidLength) err := mc.CreateCollection(ctx, client.NewCreateCollectionOption(collName, schema)) - common.CheckErr(t, err, false, "the maximum length specified for a VarChar should be in (0, 65535]") + common.CheckErr(t, err, false, fmt.Sprintf("the maximum length specified for a VarChar field(%s) should be in (0, 65535], but got %d instead: invalid parameter", arrayVarcharField.Name, invalidLength)) } } @@ -858,7 +860,7 @@ func TestCreateVarcharInvalidLength(t *testing.T) { for _, invalidLength := range []int64{-1, 0, common.MaxLength + 1} { varcharField.WithMaxLength(invalidLength) err := mc.CreateCollection(ctx, client.NewCreateCollectionOption(collName, schema)) - common.CheckErr(t, err, false, "the maximum length specified for a VarChar should be in (0, 65535]") + common.CheckErr(t, err, false, fmt.Sprintf("the maximum length specified for a VarChar field(%s) should be in (0, 65535], but got %d instead", varcharField.Name, invalidLength)) } } diff --git a/tests/python_client/milvus_client/test_milvus_client_collection.py b/tests/python_client/milvus_client/test_milvus_client_collection.py index da5f5fcd88..0c2ff3ff8c 100644 --- a/tests/python_client/milvus_client/test_milvus_client_collection.py +++ b/tests/python_client/milvus_client/test_milvus_client_collection.py @@ -103,7 +103,7 @@ class TestMilvusClientCollectionInvalid(TestMilvusClientV2Base): client = self._client() collection_name = cf.gen_unique_str(prefix) # 1. create collection - error = {ct.err_code: 65535, ct.err_msg: f"invalid dimension: {dim}. " + error = {ct.err_code: 65535, ct.err_msg: f"invalid dimension: {dim} of field {default_vector_field_name}. " f"float vector dimension should be in range 2 ~ 32768"} if dim < ct.min_dim: error = {ct.err_code: 65535, ct.err_msg: f"invalid dimension: {dim}. " diff --git a/tests/python_client/testcases/test_collection.py b/tests/python_client/testcases/test_collection.py index 7e5aeb1847..54b2fdc066 100644 --- a/tests/python_client/testcases/test_collection.py +++ b/tests/python_client/testcases/test_collection.py @@ -1608,7 +1608,7 @@ class TestCollectionCountBinary(TestcaseBase): collection_w = self.init_collection_wrap(schema=c_schema, check_task=CheckTasks.err_res, check_items={"err_code": 1, - "err_msg": f"invalid dimension: {dim}. binary vector dimension should be multiple of 8."}) + "err_msg": f"invalid dimension: {dim} of field {ct.default_binary_vec_field_name}. binary vector dimension should be multiple of 8."}) @pytest.mark.tags(CaseLabel.L2) def test_collection_count_no_entities(self): @@ -3878,7 +3878,7 @@ class TestCollectionString(TestcaseBase): max_length = 65535 + 1 string_field = cf.gen_string_field(max_length=max_length) schema = cf.gen_collection_schema([int_field, string_field, vec_field]) - error = {ct.err_code: 65535, ct.err_msg: "the maximum length specified for a VarChar should be in (0, 65535]"} + error = {ct.err_code: 65535, ct.err_msg: f"the maximum length specified for a VarChar field({ct.default_string_field_name}) should be in (0, 65535]"} self.collection_wrap.init_collection(name=c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)