mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 09:08:43 +08:00
issue: #44320 Replace the DeduplicateFieldData function with CheckDuplicatePkExist that returns an error when duplicate primary keys are detected in the same batch, instead of silently deduplicating. Changes: - Replace DeduplicateFieldData with CheckDuplicatePkExist in util.go - Update upsertTask.PreExecute to return error on duplicate PKs - Simplify helper function from findLastOccurrenceIndices to hasDuplicates - Update unit tests to verify the new error behavior - Add Python integration tests for duplicate PK error cases Signed-off-by: Wei Liu <wei.liu@zilliz.com>
745 lines
40 KiB
Go
745 lines
40 KiB
Go
package testcases
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strconv"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus/client/v2/column"
|
|
"github.com/milvus-io/milvus/client/v2/entity"
|
|
client "github.com/milvus-io/milvus/client/v2/milvusclient"
|
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
|
"github.com/milvus-io/milvus/tests/go_client/common"
|
|
hp "github.com/milvus-io/milvus/tests/go_client/testcases/helper"
|
|
)
|
|
|
|
func TestUpsertAllFields(t *testing.T) {
|
|
/*
|
|
1. prepare create -> insert -> index -> load -> query
|
|
2. upsert exist entities -> data updated -> query and verify
|
|
3. delete some pks -> query and verify
|
|
4. upsert part deleted(not exist) pk and part existed pk -> query and verify
|
|
5. upsert all not exist pk -> query and verify
|
|
*/
|
|
t.Parallel()
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
// connect
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert [0, 3000) -> flush -> index -> load
|
|
// create -> insert -> flush -> index -> load
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.AllFields), hp.TNewFieldsOption(), hp.TNewSchemaOption().TWithEnableDynamicField(true))
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), hp.TNewDataOption())
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
upsertNb := 200
|
|
baseOpt := hp.TNewDataOption().TWithNb(upsertNb)
|
|
baseColumnOps := hp.TNewColumnOptions().WithColumnOption(common.DefaultDynamicFieldName, baseOpt)
|
|
for _, field := range schema.Fields {
|
|
baseColumnOps = baseColumnOps.WithColumnOption(field.Name, baseOpt)
|
|
}
|
|
// upsert exist entities [0, 200) -> query and verify
|
|
columns, dynamicColumns := hp.GenColumnsBasedSchema(schema, baseColumnOps)
|
|
upsertRes, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(columns...).WithColumns(dynamicColumns...))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, upsertNb, upsertRes.UpsertCount)
|
|
|
|
expr := fmt.Sprintf("%s < %d", common.DefaultInt64FieldName, upsertNb)
|
|
resSet, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(expr).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckQueryResult(t, append(columns, hp.MergeColumnsToDynamic(upsertNb, dynamicColumns, common.DefaultDynamicFieldName)), resSet.Fields)
|
|
|
|
// deleted all upsert entities -> query and verify
|
|
delRes, err := mc.Delete(ctx, client.NewDeleteOption(schema.CollectionName).WithExpr(expr))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, upsertNb, delRes.DeleteCount)
|
|
|
|
resSet, err = mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(expr).WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
require.Zero(t, resSet.ResultCount)
|
|
|
|
// upsert part deleted(not exist) pk and part existed pk [100, 500) -> query and verify the updated entities
|
|
newUpsertNb := 400
|
|
newUpsertStart := 100
|
|
baseOpt = hp.TNewDataOption().TWithNb(newUpsertNb).TWithStart(newUpsertStart)
|
|
baseColumnOps = hp.TNewColumnOptions().WithColumnOption(common.DefaultDynamicFieldName, baseOpt)
|
|
for _, field := range schema.Fields {
|
|
baseColumnOps = baseColumnOps.WithColumnOption(field.Name, hp.TNewDataOption().TWithNb(newUpsertNb).TWithStart(newUpsertStart))
|
|
}
|
|
columnsPart, dynamicColumnsPart := hp.GenColumnsBasedSchema(schema, baseColumnOps)
|
|
upsertResPart, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(columnsPart...).WithColumns(dynamicColumnsPart...))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, newUpsertNb, upsertResPart.UpsertCount)
|
|
|
|
newExpr := fmt.Sprintf("%d <= %s < %d", newUpsertStart, common.DefaultInt64FieldName, newUpsertNb+newUpsertStart)
|
|
resSetPart, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(newExpr).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckQueryResult(t, append(columnsPart, hp.MergeColumnsToDynamic(newUpsertNb, dynamicColumnsPart, common.DefaultDynamicFieldName)), resSetPart.Fields)
|
|
|
|
// upsert all deleted(not exist) pk [0, 100)
|
|
baseOpt = hp.TNewDataOption().TWithNb(newUpsertStart)
|
|
baseColumnOps = hp.TNewColumnOptions().WithColumnOption(common.DefaultDynamicFieldName, baseOpt)
|
|
for _, field := range schema.Fields {
|
|
baseColumnOps = baseColumnOps.WithColumnOption(field.Name, hp.TNewDataOption().TWithNb(newUpsertStart))
|
|
}
|
|
columnsNot, dynamicColumnsNot := hp.GenColumnsBasedSchema(schema, baseColumnOps)
|
|
upsertResNot, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(columnsNot...).WithColumns(dynamicColumnsNot...))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, newUpsertStart, upsertResNot.UpsertCount)
|
|
|
|
newExprNot := fmt.Sprintf("%s < %d", common.DefaultInt64FieldName, newUpsertStart)
|
|
resSetNot, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(newExprNot).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckQueryResult(t, append(columnsNot, hp.MergeColumnsToDynamic(newUpsertStart, dynamicColumnsNot, common.DefaultDynamicFieldName)), resSetNot.Fields)
|
|
}
|
|
|
|
func TestUpsertAllFieldsFp32VecConversion(t *testing.T) {
|
|
/*
|
|
1. prepare create -> insert -> index -> load -> query
|
|
2. upsert exist entities -> data updated -> query and verify
|
|
3. delete some pks -> query and verify
|
|
4. upsert part deleted(not exist) pk and part existed pk -> query and verify
|
|
5. upsert all not exist pk -> query and verify
|
|
*/
|
|
t.Parallel()
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
// connect
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert [0, 3000) -> flush -> index -> load
|
|
// create -> insert -> flush -> index -> load
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.AllFields), hp.TNewFieldsOption(), hp.TNewSchemaOption().TWithEnableDynamicField(true))
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), hp.TNewDataOption())
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
upsertNb := 200
|
|
baseColumnOps := hp.TNewColumnOptions()
|
|
for _, field := range schema.Fields {
|
|
baseColumnOps = baseColumnOps.WithColumnOption(field.Name, hp.TNewDataOption().TWithNb(upsertNb))
|
|
}
|
|
// upsert exist entities [0, 200) -> query and verify
|
|
columns, dynamicColumns := hp.GenColumnsBasedSchemaWithFp32VecConversion(schema, hp.TNewDataOption().TWithNb(upsertNb))
|
|
upsertRes, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(columns...).WithColumns(dynamicColumns...))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, upsertNb, upsertRes.UpsertCount)
|
|
|
|
expr := fmt.Sprintf("%s < %d", common.DefaultInt64FieldName, upsertNb)
|
|
resSet, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(expr).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckQueryResult(t, append(columns, hp.MergeColumnsToDynamic(upsertNb, dynamicColumns, common.DefaultDynamicFieldName)), resSet.Fields)
|
|
|
|
// deleted all upsert entities -> query and verify
|
|
delRes, err := mc.Delete(ctx, client.NewDeleteOption(schema.CollectionName).WithExpr(expr))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, upsertNb, delRes.DeleteCount)
|
|
|
|
resSet, err = mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(expr).WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
require.Zero(t, resSet.ResultCount)
|
|
|
|
// upsert part deleted(not exist) pk and part existed pk [100, 500) -> query and verify the updated entities
|
|
newUpsertNb := 400
|
|
newUpsertStart := 100
|
|
columnsPart, dynamicColumnsPart := hp.GenColumnsBasedSchemaWithFp32VecConversion(schema, hp.TNewDataOption().TWithNb(newUpsertNb).TWithStart(newUpsertStart))
|
|
upsertResPart, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(columnsPart...).WithColumns(dynamicColumnsPart...))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, newUpsertNb, upsertResPart.UpsertCount)
|
|
|
|
newExpr := fmt.Sprintf("%d <= %s < %d", newUpsertStart, common.DefaultInt64FieldName, newUpsertNb+newUpsertStart)
|
|
resSetPart, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(newExpr).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckQueryResult(t, append(columnsPart, hp.MergeColumnsToDynamic(newUpsertNb, dynamicColumnsPart, common.DefaultDynamicFieldName)), resSetPart.Fields)
|
|
|
|
// upsert all deleted(not exist) pk [0, 100)
|
|
columnsNot, dynamicColumnsNot := hp.GenColumnsBasedSchemaWithFp32VecConversion(schema, hp.TNewDataOption().TWithNb(newUpsertStart))
|
|
upsertResNot, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(columnsNot...).WithColumns(dynamicColumnsNot...))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, newUpsertStart, upsertResNot.UpsertCount)
|
|
|
|
newExprNot := fmt.Sprintf("%s < %d", common.DefaultInt64FieldName, newUpsertStart)
|
|
resSetNot, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(newExprNot).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckQueryResult(t, append(columnsNot, hp.MergeColumnsToDynamic(newUpsertStart, dynamicColumnsNot, common.DefaultDynamicFieldName)), resSetNot.Fields)
|
|
}
|
|
|
|
func TestUpsertSparse(t *testing.T) {
|
|
t.Skip("https://github.com/milvus-io/milvus-sdk-go/issues/769")
|
|
/*
|
|
1. prepare create -> insert -> index -> load -> query
|
|
2. upsert exist entities -> data updated -> query and verify
|
|
3. delete some pks -> query and verify
|
|
4. upsert part deleted(not exist) pk and part existed pk -> query and verify
|
|
5. upsert all not exist pk -> query and verify
|
|
*/
|
|
t.Parallel()
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
// connect
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert [0, 3000) -> flush -> index -> load
|
|
// create -> insert -> flush -> index -> load
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.Int64VarcharSparseVec), hp.TNewFieldsOption(), hp.TNewSchemaOption().TWithEnableDynamicField(true))
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), hp.TNewDataOption().TWithSparseMaxLen(128).TWithNb(0))
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
|
|
upsertNb := 200
|
|
|
|
// upsert exist entities [0, 200) -> query and verify
|
|
baseColumnOps := hp.TNewColumnOptions()
|
|
for _, field := range schema.Fields {
|
|
baseColumnOps = baseColumnOps.WithColumnOption(field.Name, hp.TNewDataOption().TWithNb(upsertNb))
|
|
}
|
|
columns, dynamicColumns := hp.GenColumnsBasedSchema(schema, baseColumnOps)
|
|
upsertRes, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(columns...).WithColumns(dynamicColumns...))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, upsertNb, upsertRes.UpsertCount)
|
|
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
expr := fmt.Sprintf("%s < %d", common.DefaultInt64FieldName, upsertNb)
|
|
resSet, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(expr).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckQueryResult(t, append(columns, hp.MergeColumnsToDynamic(upsertNb, dynamicColumns, common.DefaultDynamicFieldName)), resSet.Fields)
|
|
|
|
// deleted all upsert entities -> query and verify
|
|
delRes, err := mc.Delete(ctx, client.NewDeleteOption(schema.CollectionName).WithExpr(expr))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, upsertNb, delRes.DeleteCount)
|
|
|
|
resSet, err = mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(expr).WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
require.Zero(t, resSet.ResultCount)
|
|
|
|
// upsert part deleted(not exist) pk and part existed pk [100, 500) -> query and verify the updated entities
|
|
newUpsertNb := 400
|
|
newUpsertStart := 100
|
|
baseColumnOpsNew := hp.TNewColumnOptions()
|
|
for _, field := range schema.Fields {
|
|
baseColumnOpsNew = baseColumnOpsNew.WithColumnOption(field.Name, hp.TNewDataOption().TWithNb(newUpsertNb).TWithStart(newUpsertStart))
|
|
}
|
|
columnsPart, dynamicColumnsPart := hp.GenColumnsBasedSchema(schema, baseColumnOpsNew)
|
|
upsertResPart, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(columnsPart...).WithColumns(dynamicColumnsPart...))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, newUpsertNb, upsertResPart.UpsertCount)
|
|
|
|
newExpr := fmt.Sprintf("%d <= %s < %d", newUpsertStart, common.DefaultInt64FieldName, newUpsertNb+newUpsertStart)
|
|
resSetPart, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(newExpr).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckQueryResult(t, append(columnsPart, hp.MergeColumnsToDynamic(newUpsertNb, dynamicColumnsPart, common.DefaultDynamicFieldName)), resSetPart.Fields)
|
|
|
|
// upsert all deleted(not exist) pk [0, 100)
|
|
baseColumnOpsStart := hp.TNewColumnOptions()
|
|
for _, field := range schema.Fields {
|
|
baseColumnOpsStart = baseColumnOpsStart.WithColumnOption(field.Name, hp.TNewDataOption().TWithStart(newUpsertStart))
|
|
}
|
|
columnsNot, dynamicColumnsNot := hp.GenColumnsBasedSchema(schema, baseColumnOpsStart)
|
|
upsertResNot, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(columnsNot...).WithColumns(dynamicColumnsNot...))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, newUpsertStart, upsertResNot.UpsertCount)
|
|
|
|
newExprNot := fmt.Sprintf("%s < %d", common.DefaultInt64FieldName, newUpsertStart)
|
|
resSetNot, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(newExprNot).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckQueryResult(t, append(columnsNot, hp.MergeColumnsToDynamic(newUpsertStart, dynamicColumnsNot, common.DefaultDynamicFieldName)), resSetNot.Fields)
|
|
}
|
|
|
|
func TestUpsertVarcharPk(t *testing.T) {
|
|
/*
|
|
test upsert varchar pks
|
|
upsert after query
|
|
upsert "a" -> " a " -> actually new insert
|
|
*/
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert [0, 3000) -> flush -> index -> load
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.VarcharBinary), hp.TNewFieldsOption(), hp.TNewSchemaOption())
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), hp.TNewDataOption())
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
upsertNb := 10
|
|
// upsert exist entities [0, 10) varchar: ["1", ... "9"]
|
|
genDataOpt := *hp.TNewDataOption()
|
|
varcharColumn, binaryColumn := hp.GenColumnData(upsertNb, entity.FieldTypeVarChar, genDataOpt), hp.GenColumnData(upsertNb, entity.FieldTypeBinaryVector, genDataOpt)
|
|
upsertRes, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(varcharColumn, binaryColumn))
|
|
common.CheckErr(t, err, true)
|
|
common.EqualColumn(t, varcharColumn, upsertRes.IDs)
|
|
|
|
// query and verify the updated entities
|
|
expr := fmt.Sprintf("%s in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] ", common.DefaultVarcharFieldName)
|
|
resSet1, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(expr).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckQueryResult(t, []column.Column{varcharColumn, binaryColumn}, resSet1.Fields)
|
|
|
|
// upsert varchar (with space): [" 1 ", ... " 9 "]
|
|
varcharValues := make([]string, 0, upsertNb)
|
|
for i := 0; i < upsertNb; i++ {
|
|
varcharValues = append(varcharValues, " "+strconv.Itoa(i)+" ")
|
|
}
|
|
varcharColumn1 := column.NewColumnVarChar(common.DefaultVarcharFieldName, varcharValues)
|
|
binaryColumn1 := hp.GenColumnData(upsertNb, entity.FieldTypeBinaryVector, genDataOpt)
|
|
upsertRes1, err1 := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(varcharColumn1, binaryColumn1))
|
|
common.CheckErr(t, err1, true)
|
|
common.EqualColumn(t, varcharColumn1, upsertRes1.IDs)
|
|
|
|
// query old varchar pk (no space): ["1", ... "9"]
|
|
resSet2, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(expr).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckQueryResult(t, []column.Column{varcharColumn, binaryColumn}, resSet2.Fields)
|
|
|
|
// query and verify the updated entities
|
|
exprNew := fmt.Sprintf("%s like ' %% ' ", common.DefaultVarcharFieldName)
|
|
resSet3, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(exprNew).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckQueryResult(t, []column.Column{varcharColumn1, binaryColumn1}, resSet3.Fields)
|
|
}
|
|
|
|
// test upsert with partition
|
|
func TestUpsertMultiPartitions(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.AllFields), hp.TNewFieldsOption(), hp.TNewSchemaOption().TWithEnableDynamicField(true))
|
|
parName := common.GenRandomString("p", 4)
|
|
err := mc.CreatePartition(ctx, client.NewCreatePartitionOption(schema.CollectionName, parName))
|
|
common.CheckErr(t, err, true)
|
|
|
|
// insert [0, nb) into default, insert [nb, nb*2) into new
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), hp.TNewDataOption())
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema).TWithPartitionName(parName), hp.TNewDataOption().TWithStart(common.DefaultNb))
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// upsert new partition
|
|
baseColumnOps := hp.TNewColumnOptions()
|
|
for _, field := range schema.Fields {
|
|
baseColumnOps = baseColumnOps.WithColumnOption(field.Name, hp.TNewDataOption().TWithStart(common.DefaultNb))
|
|
}
|
|
columns, dynamicColumns := hp.GenColumnsBasedSchema(schema, baseColumnOps)
|
|
upsertRes, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(columns...).WithColumns(dynamicColumns...).WithPartition(parName))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, common.DefaultNb, upsertRes.UpsertCount)
|
|
|
|
// query and verify
|
|
expr := fmt.Sprintf("%d <= %s < %d", common.DefaultNb, common.DefaultInt64FieldName, common.DefaultNb+200)
|
|
resSet3, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(expr).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
expColumns := []column.Column{hp.MergeColumnsToDynamic(200, dynamicColumns, common.DefaultDynamicFieldName)}
|
|
for _, c := range columns {
|
|
expColumns = append(expColumns, c.Slice(0, 200))
|
|
}
|
|
common.CheckQueryResult(t, expColumns, resSet3.Fields)
|
|
}
|
|
|
|
func TestUpsertSamePksManyTimes(t *testing.T) {
|
|
// upsert pks [0, 1000) many times with different vector
|
|
// query -> gets last upsert entities
|
|
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout*2)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create and insert
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.AllFields), hp.TNewFieldsOption(), hp.TNewSchemaOption())
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), hp.TNewDataOption())
|
|
|
|
var _columns []column.Column
|
|
upsertNb := 10
|
|
baseColumnOps := hp.TNewColumnOptions()
|
|
for _, field := range schema.Fields {
|
|
baseColumnOps = baseColumnOps.WithColumnOption(field.Name, hp.TNewDataOption().TWithNb(upsertNb))
|
|
}
|
|
for i := 0; i < 10; i++ {
|
|
// upsert exist entities [0, 10)
|
|
_columns, _ = hp.GenColumnsBasedSchema(schema, baseColumnOps)
|
|
_, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(_columns...))
|
|
common.CheckErr(t, err, true)
|
|
}
|
|
|
|
// flush -> index -> load
|
|
prepare.FlushData(ctx, t, mc, schema.CollectionName)
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// query and verify the updated entities
|
|
resSet, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(fmt.Sprintf("%s < %d", common.DefaultInt64FieldName, upsertNb)).
|
|
WithOutputFields(common.DefaultFloatVecFieldName).WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
for _, c := range _columns {
|
|
if c.Name() == common.DefaultFloatVecFieldName {
|
|
common.EqualColumn(t, c, resSet.GetColumn(common.DefaultFloatVecFieldName))
|
|
}
|
|
}
|
|
}
|
|
|
|
// test upsert autoId collection
|
|
func TestUpsertAutoID(t *testing.T) {
|
|
/*
|
|
prepare autoID collection
|
|
upsert not exist pk -> error
|
|
upsert exist pk -> error ? autoID not supported upsert
|
|
*/
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
nb := 100
|
|
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.Int64Vec), hp.TNewFieldsOption().TWithAutoID(true), hp.TNewSchemaOption())
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
_, insertRes := prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), hp.TNewDataOption().TWithNb(nb))
|
|
|
|
// upsert autoID collection with existed pks -> actually delete passed pks and auto generate new pks
|
|
vecColumn := hp.GenColumnData(nb, entity.FieldTypeFloatVector, *hp.TNewDataOption())
|
|
upsertRes, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(insertRes.IDs, vecColumn))
|
|
common.CheckErr(t, err, true)
|
|
log.Debug("upsertRes", zap.Any("len", upsertRes.IDs.(*column.ColumnInt64).Data()))
|
|
|
|
// insertRes pks were deleted
|
|
expr := fmt.Sprintf("%s <= %d", common.DefaultInt64FieldName, insertRes.IDs.(*column.ColumnInt64).Data()[nb-1])
|
|
log.Debug("expr", zap.String("expr", expr))
|
|
resSet, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithConsistencyLevel(entity.ClStrong).WithOutputFields(common.DefaultFloatVecFieldName).WithFilter(expr))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, 0, resSet.ResultCount)
|
|
|
|
exprUpsert := fmt.Sprintf("%s <= %d", common.DefaultInt64FieldName, upsertRes.IDs.(*column.ColumnInt64).Data()[nb-1])
|
|
log.Debug("expr", zap.String("expr", expr))
|
|
resSet1, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithConsistencyLevel(entity.ClStrong).WithOutputFields(common.DefaultFloatVecFieldName).WithFilter(exprUpsert))
|
|
common.CheckErr(t, err, true)
|
|
common.EqualColumn(t, vecColumn, resSet1.GetColumn(common.DefaultFloatVecFieldName))
|
|
|
|
// upsert with not existing pks -> actually auto generate id
|
|
pkColumn := hp.GenColumnData(100, entity.FieldTypeInt64, *hp.TNewDataOption())
|
|
upsertRes, err1 := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(pkColumn, vecColumn))
|
|
common.CheckErr(t, err1, true)
|
|
require.EqualValues(t, nb, upsertRes.UpsertCount)
|
|
// actual pk is auto-generated not passed
|
|
require.NotContains(t, upsertRes.IDs.(*column.ColumnInt64).Data(), 0)
|
|
|
|
// query and verify upsert result
|
|
upsertPks := upsertRes.IDs.(*column.ColumnInt64).Data()
|
|
resSet, err = mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithConsistencyLevel(entity.ClStrong).WithOutputFields(common.DefaultFloatVecFieldName).
|
|
WithFilter(fmt.Sprintf("%d <= %s", upsertPks[0], common.DefaultInt64FieldName)))
|
|
common.CheckErr(t, err, true)
|
|
common.EqualColumn(t, vecColumn, resSet.GetColumn(common.DefaultFloatVecFieldName))
|
|
|
|
// upsert without pks -> error
|
|
vecColumn = hp.GenColumnData(nb, entity.FieldTypeFloatVector, *hp.TNewDataOption())
|
|
_, err = mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(vecColumn))
|
|
common.CheckErr(t, err, false, "must assign pk when upsert")
|
|
}
|
|
|
|
// test upsert autoId collection
|
|
func TestUpsertAutoIDRows(t *testing.T) {
|
|
t.Skip("https://github.com/milvus-io/milvus/issues/40816")
|
|
/*
|
|
prepare autoID collection
|
|
upsert not exist pk -> error
|
|
upsert exist pk -> error ? autoID not supported upsert
|
|
*/
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
nb := 100
|
|
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.Int64Vec), hp.TNewFieldsOption().TWithAutoID(true), hp.TNewSchemaOption())
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
_, insertRes := prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema).TWithIsRows(true), hp.TNewDataOption().TWithNb(nb))
|
|
|
|
// upsert autoID collection with existed pks -> actually delete passed pks and auto generate new pks
|
|
vecFloatValues := make([][]float32, 0, nb)
|
|
for i := 0; i < nb; i++ {
|
|
vec := common.GenFloatVector(common.DefaultDim)
|
|
vecFloatValues = append(vecFloatValues, vec)
|
|
}
|
|
vecColumn := column.NewColumnFloatVector(common.DefaultFloatVecFieldName, common.DefaultDim, vecFloatValues)
|
|
rows := make([]interface{}, 0, nb)
|
|
for i := 0; i < nb; i++ {
|
|
idValue, _ := insertRes.IDs.GetAsInt64(i)
|
|
baseRow := hp.BaseRow{
|
|
Int64: idValue,
|
|
FloatVec: vecFloatValues[i],
|
|
}
|
|
rows = append(rows, &baseRow)
|
|
}
|
|
upsertRes, err := mc.Upsert(ctx, client.NewRowBasedInsertOption(schema.CollectionName, rows...))
|
|
common.CheckErr(t, err, true)
|
|
log.Debug("upsertRes", zap.Any("len", upsertRes.IDs.(*column.ColumnInt64).Data()))
|
|
|
|
// insertRes pks were deleted
|
|
expr := fmt.Sprintf("%s <= %d", common.DefaultInt64FieldName, insertRes.IDs.(*column.ColumnInt64).Data()[nb-1])
|
|
log.Debug("expr", zap.String("expr", expr))
|
|
resSet, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithConsistencyLevel(entity.ClStrong).WithOutputFields(common.DefaultFloatVecFieldName).WithFilter(expr))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, 0, resSet.ResultCount)
|
|
|
|
exprUpsert := fmt.Sprintf("%s <= %d", common.DefaultInt64FieldName, upsertRes.IDs.(*column.ColumnInt64).Data()[nb-1])
|
|
log.Debug("expr", zap.String("expr", expr))
|
|
resSet1, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithConsistencyLevel(entity.ClStrong).WithOutputFields(common.DefaultFloatVecFieldName).WithFilter(exprUpsert))
|
|
common.CheckErr(t, err, true)
|
|
common.EqualColumn(t, vecColumn, resSet1.GetColumn(common.DefaultFloatVecFieldName))
|
|
|
|
// upsert with not existing pks -> actually auto generate id
|
|
rowsWithPk := hp.GenInt64VecRows(nb, false, false, *hp.TNewDataOption().TWithStart(0))
|
|
upsertRes, err1 := mc.Upsert(ctx, client.NewRowBasedInsertOption(schema.CollectionName, rowsWithPk...))
|
|
common.CheckErr(t, err1, true)
|
|
require.EqualValues(t, nb, upsertRes.UpsertCount)
|
|
// actual pk is auto-generated not passed
|
|
require.NotContains(t, upsertRes.IDs.(*column.ColumnInt64).Data(), 0)
|
|
|
|
// query and verify upsert result
|
|
upsertPks := upsertRes.IDs.(*column.ColumnInt64).Data()
|
|
resSet, err = mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithConsistencyLevel(entity.ClStrong).WithOutputFields(common.DefaultFloatVecFieldName).
|
|
WithFilter(fmt.Sprintf("%d <= %s", upsertPks[0], common.DefaultInt64FieldName)))
|
|
common.CheckErr(t, err, true)
|
|
common.EqualColumn(t, vecColumn, resSet.GetColumn(common.DefaultFloatVecFieldName))
|
|
|
|
// upsert without pks -> error
|
|
rowsWithoutPk := hp.GenInt64VecRows(nb, false, true, *hp.TNewDataOption())
|
|
_, err = mc.Upsert(ctx, client.NewRowBasedInsertOption(schema.CollectionName, rowsWithoutPk...))
|
|
common.CheckErr(t, err, false, "has no corresponding fieldData pass in: invalid parameter")
|
|
}
|
|
|
|
// test upsert with invalid collection / partition name
|
|
func TestUpsertNotExistCollectionPartition(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// upsert not exist collection
|
|
_, errUpsert := mc.Upsert(ctx, client.NewColumnBasedInsertOption("aaa"))
|
|
common.CheckErr(t, errUpsert, false, "can't find collection")
|
|
|
|
// create default collection with autoID true
|
|
_, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.Int64Vec), hp.TNewFieldsOption(), hp.TNewSchemaOption())
|
|
|
|
_, errUpsert = mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithPartition("aaa"))
|
|
common.CheckErr(t, errUpsert, false, "num_rows should be greater than 0")
|
|
|
|
// upsert not exist partition
|
|
opt := *hp.TNewDataOption()
|
|
pkColumn, vecColumn := hp.GenColumnData(10, entity.FieldTypeInt64, opt), hp.GenColumnData(10, entity.FieldTypeFloatVector, opt)
|
|
_, errUpsert = mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithPartition("aaa").WithColumns(pkColumn, vecColumn))
|
|
common.CheckErr(t, errUpsert, false, "partition not found[partition=aaa]")
|
|
}
|
|
|
|
// test upsert with invalid column data
|
|
func TestUpsertInvalidColumnData(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create and insert
|
|
_, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.Int64Vec), hp.TNewFieldsOption(), hp.TNewSchemaOption())
|
|
|
|
upsertNb := 10
|
|
// 1. upsert missing columns
|
|
opt := *hp.TNewDataOption()
|
|
pkColumn, vecColumn := hp.GenColumnData(upsertNb, entity.FieldTypeInt64, opt), hp.GenColumnData(upsertNb, entity.FieldTypeFloatVector, opt)
|
|
_, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(pkColumn))
|
|
common.CheckErr(t, err, false, fmt.Sprintf("fieldSchema(%s) has no corresponding fieldData pass in", common.DefaultFloatVecFieldName))
|
|
|
|
// 2. upsert extra a column
|
|
_, err = mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(pkColumn, vecColumn, vecColumn))
|
|
common.CheckErr(t, err, false, fmt.Sprintf("duplicated column %s found", common.DefaultFloatVecFieldName))
|
|
|
|
// 3. upsert vector has different dim
|
|
dimColumn := hp.GenColumnData(upsertNb, entity.FieldTypeFloatVector, *hp.TNewDataOption().TWithDim(64))
|
|
_, err = mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(pkColumn, dimColumn))
|
|
common.CheckErr(t, err, false, fmt.Sprintf("params column %s vector dim 64 not match collection definition, which has dim of %d",
|
|
common.DefaultFloatVecFieldName, common.DefaultDim))
|
|
|
|
// 4. different columns has different length
|
|
diffLenColumn := hp.GenColumnData(upsertNb+1, entity.FieldTypeFloatVector, opt)
|
|
_, err = mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(pkColumn, diffLenColumn))
|
|
common.CheckErr(t, err, false, "column size not match")
|
|
|
|
// 5. column type different with schema
|
|
varColumn := hp.GenColumnData(upsertNb, entity.FieldTypeVarChar, opt)
|
|
_, err = mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(pkColumn, varColumn, vecColumn))
|
|
common.CheckErr(t, err, false, "field varchar does not exist in collection")
|
|
|
|
// 6. empty column
|
|
pkColumnEmpty, vecColumnEmpty := hp.GenColumnData(0, entity.FieldTypeInt64, opt), hp.GenColumnData(0, entity.FieldTypeFloatVector, opt)
|
|
_, err = mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(pkColumnEmpty, vecColumnEmpty))
|
|
common.CheckErr(t, err, false, "num_rows should be greater than 0")
|
|
|
|
// 6. empty column
|
|
pkColumnEmpty, vecColumnEmpty = hp.GenColumnData(0, entity.FieldTypeInt64, opt), hp.GenColumnData(10, entity.FieldTypeFloatVector, opt)
|
|
_, err = mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(pkColumnEmpty, vecColumnEmpty))
|
|
common.CheckErr(t, err, false, "invalid parameter[expected=need long int array][actual=got nil]")
|
|
|
|
// 6. empty column
|
|
pkColumnEmpty, vecColumnEmpty = hp.GenColumnData(10, entity.FieldTypeInt64, opt), hp.GenColumnData(0, entity.FieldTypeFloatVector, opt)
|
|
_, err = mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(pkColumnEmpty, vecColumnEmpty))
|
|
common.CheckErr(t, err, false, "column size not match")
|
|
}
|
|
|
|
func TestUpsertDynamicField(t *testing.T) {
|
|
// enable dynamic field and insert dynamic column
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create -> insert [0, 3000) -> flush -> index -> load
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.Int64Vec), hp.TNewFieldsOption(), hp.TNewSchemaOption().TWithEnableDynamicField(true))
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), hp.TNewDataOption())
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
// verify that dynamic field exists
|
|
upsertNb := 10
|
|
resSet, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(fmt.Sprintf("%s < %d", common.DefaultDynamicNumberField, upsertNb)).
|
|
WithOutputFields(common.DefaultDynamicFieldName).WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, upsertNb, resSet.GetColumn(common.DefaultDynamicFieldName).Len())
|
|
|
|
// 1. upsert exist pk without dynamic column
|
|
opt := *hp.TNewDataOption()
|
|
pkColumn, vecColumn := hp.GenColumnData(upsertNb, entity.FieldTypeInt64, opt), hp.GenColumnData(upsertNb, entity.FieldTypeFloatVector, opt)
|
|
_, err = mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(pkColumn, vecColumn))
|
|
common.CheckErr(t, err, true)
|
|
|
|
// query and gets empty
|
|
resSet, err = mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(fmt.Sprintf("%s < %d", common.DefaultDynamicNumberField, upsertNb)).
|
|
WithOutputFields(common.DefaultDynamicFieldName).WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
require.Equal(t, 0, resSet.GetColumn(common.DefaultDynamicFieldName).Len())
|
|
|
|
// 2. upsert not exist pk with dynamic column -> field dynamicNumber does not exist in collection
|
|
opt.TWithStart(common.DefaultNb)
|
|
pkColumn2, vecColumn2 := hp.GenColumnData(upsertNb, entity.FieldTypeInt64, opt), hp.GenColumnData(upsertNb, entity.FieldTypeFloatVector, opt)
|
|
dynamicColumns := hp.GenDynamicColumnData(common.DefaultNb, upsertNb)
|
|
_, err = mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(pkColumn2, vecColumn2).WithColumns(dynamicColumns...))
|
|
common.CheckErr(t, err, true)
|
|
// query and gets dynamic field
|
|
resSet, err = mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(fmt.Sprintf("%s >= %d", common.DefaultDynamicNumberField, common.DefaultNb)).
|
|
WithOutputFields(common.DefaultDynamicFieldName).WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.EqualColumn(t, hp.MergeColumnsToDynamic(upsertNb, dynamicColumns, common.DefaultDynamicFieldName), resSet.GetColumn(common.DefaultDynamicFieldName))
|
|
}
|
|
|
|
func TestUpsertWithoutLoading(t *testing.T) {
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// create and insert
|
|
prepare, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, hp.NewCreateCollectionParams(hp.Int64VecJSON), hp.TNewFieldsOption(), hp.TNewSchemaOption())
|
|
prepare.InsertData(ctx, t, mc, hp.NewInsertParams(schema), hp.TNewDataOption())
|
|
|
|
// upsert
|
|
upsertNb := 10
|
|
opt := *hp.TNewDataOption()
|
|
pkColumn, jsonColumn, vecColumn := hp.GenColumnData(upsertNb, entity.FieldTypeInt64, opt), hp.GenColumnData(upsertNb, entity.FieldTypeJSON, opt), hp.GenColumnData(upsertNb, entity.FieldTypeFloatVector, opt)
|
|
_, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(schema.CollectionName).WithColumns(pkColumn, jsonColumn, vecColumn))
|
|
common.CheckErr(t, err, true)
|
|
|
|
// index -> load
|
|
prepare.CreateIndex(ctx, t, mc, hp.TNewIndexParams(schema))
|
|
prepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName))
|
|
|
|
// query and verify
|
|
resSet, err := mc.Query(ctx, client.NewQueryOption(schema.CollectionName).WithFilter(fmt.Sprintf("%s < %d", common.DefaultInt64FieldName, upsertNb)).
|
|
WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
common.CheckQueryResult(t, []column.Column{pkColumn, jsonColumn, vecColumn}, resSet.Fields)
|
|
}
|
|
|
|
func TestUpsertPartitionKeyCollection(t *testing.T) {
|
|
t.Skip("waiting gen partition key field")
|
|
}
|
|
|
|
func TestUpsertNullableFieldBehavior(t *testing.T) {
|
|
/*
|
|
Test nullable field behavior for Upsert operation:
|
|
1. Insert data with nullable field having a value
|
|
2. Upsert the same entity without providing the nullable field
|
|
3. Verify that the nullable field is set to null (upsert replaces all fields)
|
|
*/
|
|
ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout)
|
|
mc := hp.CreateDefaultMilvusClient(ctx, t)
|
|
|
|
// Create collection with nullable field using custom schema
|
|
collName := common.GenRandomString("upsert_nullable", 6)
|
|
|
|
// Create fields including nullable field
|
|
pkField := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)
|
|
vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim)
|
|
nullableField := entity.NewField().WithName("nullable_varchar").WithDataType(entity.FieldTypeVarChar).WithMaxLength(100).WithNullable(true)
|
|
|
|
fields := []*entity.Field{pkField, vecField, nullableField}
|
|
schema := hp.GenSchema(hp.TNewSchemaOption().TWithName(collName).TWithDescription("test nullable field behavior for upsert").TWithFields(fields))
|
|
|
|
// Create collection using schema
|
|
err := mc.CreateCollection(ctx, client.NewCreateCollectionOption(collName, schema))
|
|
common.CheckErr(t, err, true)
|
|
|
|
// Cleanup
|
|
t.Cleanup(func() {
|
|
ctx, cancel := context.WithTimeout(context.WithoutCancel(ctx), time.Second*10)
|
|
defer cancel()
|
|
err := mc.DropCollection(ctx, client.NewDropCollectionOption(collName))
|
|
common.CheckErr(t, err, true)
|
|
})
|
|
|
|
// Insert initial data with nullable field having a value
|
|
pkColumn := column.NewColumnInt64(common.DefaultInt64FieldName, []int64{1, 2, 3})
|
|
vecColumn := hp.GenColumnData(3, entity.FieldTypeFloatVector, *hp.TNewDataOption())
|
|
nullableColumn := column.NewColumnVarChar("nullable_varchar", []string{"original_1", "original_2", "original_3"})
|
|
|
|
_, err = mc.Insert(ctx, client.NewColumnBasedInsertOption(collName).WithColumns(pkColumn, vecColumn, nullableColumn))
|
|
common.CheckErr(t, err, true)
|
|
|
|
// Use prepare pattern for remaining operations
|
|
prepare := &hp.CollectionPrepare{}
|
|
|
|
// Flush data
|
|
prepare.FlushData(ctx, t, mc, collName)
|
|
|
|
// Create index for vector field
|
|
indexParams := hp.TNewIndexParams(schema)
|
|
prepare.CreateIndex(ctx, t, mc, indexParams)
|
|
|
|
// Load collection
|
|
loadParams := hp.NewLoadParams(collName)
|
|
prepare.Load(ctx, t, mc, loadParams)
|
|
|
|
// Wait for loading to complete
|
|
time.Sleep(time.Second * 5)
|
|
|
|
// Upsert entities without providing nullable field (should set to null)
|
|
upsertPkColumn := column.NewColumnInt64(common.DefaultInt64FieldName, []int64{1, 2})
|
|
upsertVecColumn := hp.GenColumnData(2, entity.FieldTypeFloatVector, *hp.TNewDataOption().TWithStart(100))
|
|
|
|
upsertRes, err := mc.Upsert(ctx, client.NewColumnBasedInsertOption(collName).WithColumns(upsertPkColumn, upsertVecColumn))
|
|
common.CheckErr(t, err, true)
|
|
require.EqualValues(t, 2, upsertRes.UpsertCount)
|
|
|
|
// Wait for consistency
|
|
time.Sleep(time.Second * 3)
|
|
|
|
// Query to verify nullable field is set to null
|
|
resSet, err := mc.Query(ctx, client.NewQueryOption(collName).WithFilter(fmt.Sprintf("%s in [1, 2]", common.DefaultInt64FieldName)).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
|
|
// Verify results - nullable field should be null
|
|
require.Equal(t, 2, resSet.GetColumn("nullable_varchar").Len())
|
|
nullableResults := resSet.GetColumn("nullable_varchar").(*column.ColumnVarChar).Data()
|
|
require.Equal(t, "", nullableResults[0]) // null value is represented as empty string
|
|
require.Equal(t, "", nullableResults[1]) // null value is represented as empty string
|
|
|
|
// Query entity that was not upserted to verify original value is preserved
|
|
resSet3, err := mc.Query(ctx, client.NewQueryOption(collName).WithFilter(fmt.Sprintf("%s == 3", common.DefaultInt64FieldName)).WithOutputFields("*").WithConsistencyLevel(entity.ClStrong))
|
|
common.CheckErr(t, err, true)
|
|
|
|
require.Equal(t, 1, resSet3.GetColumn("nullable_varchar").Len())
|
|
nullableResult3 := resSet3.GetColumn("nullable_varchar").(*column.ColumnVarChar).Data()
|
|
require.Equal(t, "original_3", nullableResult3[0])
|
|
}
|