mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
fix: [StorageV2] index/stats task level storage v2 fs (#42191)
related: #39173 --------- Signed-off-by: shaoting-huang <shaoting.huang@zilliz.com>
This commit is contained in:
parent
fd6e2b52ff
commit
89c3afb12e
@ -239,8 +239,6 @@ CreateIndex(CIndex* res_index,
|
||||
milvus::storage::FileManagerContext fileManagerContext(
|
||||
field_meta, index_meta, chunk_manager);
|
||||
|
||||
auto fs = milvus::storage::InitArrowFileSystem(storage_config);
|
||||
|
||||
auto index =
|
||||
milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex(
|
||||
field_type, config, fileManagerContext);
|
||||
@ -254,13 +252,11 @@ CreateIndex(CIndex* res_index,
|
||||
auto status = CStatus();
|
||||
status.error_code = e.get_error_code();
|
||||
status.error_msg = strdup(e.what());
|
||||
milvus_storage::ArrowFileSystemSingleton::GetInstance().Release();
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
milvus_storage::ArrowFileSystemSingleton::GetInstance().Release();
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
@ -25,6 +25,52 @@
|
||||
#include "common/EasyAssert.h"
|
||||
#include "common/type_c.h"
|
||||
|
||||
CStatus
|
||||
NewPackedReaderWithStorageConfig(char** paths,
|
||||
int64_t num_paths,
|
||||
struct ArrowSchema* schema,
|
||||
const int64_t buffer_size,
|
||||
CStorageConfig c_storage_config,
|
||||
CPackedReader* c_packed_reader) {
|
||||
try {
|
||||
auto truePaths = std::vector<std::string>(paths, paths + num_paths);
|
||||
|
||||
milvus_storage::ArrowFileSystemConfig conf;
|
||||
conf.address = std::string(c_storage_config.address);
|
||||
conf.bucket_name = std::string(c_storage_config.bucket_name);
|
||||
conf.access_key_id = std::string(c_storage_config.access_key_id);
|
||||
conf.access_key_value = std::string(c_storage_config.access_key_value);
|
||||
conf.root_path = std::string(c_storage_config.root_path);
|
||||
conf.storage_type = std::string(c_storage_config.storage_type);
|
||||
conf.cloud_provider = std::string(c_storage_config.cloud_provider);
|
||||
conf.iam_endpoint = std::string(c_storage_config.iam_endpoint);
|
||||
conf.log_level = std::string(c_storage_config.log_level);
|
||||
conf.region = std::string(c_storage_config.region);
|
||||
conf.useSSL = c_storage_config.useSSL;
|
||||
conf.sslCACert = std::string(c_storage_config.sslCACert);
|
||||
conf.useIAM = c_storage_config.useIAM;
|
||||
conf.useVirtualHost = c_storage_config.useVirtualHost;
|
||||
conf.requestTimeoutMs = c_storage_config.requestTimeoutMs;
|
||||
conf.gcp_credential_json =
|
||||
std::string(c_storage_config.gcp_credential_json);
|
||||
conf.use_custom_part_upload = c_storage_config.use_custom_part_upload;
|
||||
milvus_storage::ArrowFileSystemSingleton::GetInstance().Init(conf);
|
||||
auto trueFs = milvus_storage::ArrowFileSystemSingleton::GetInstance()
|
||||
.GetArrowFileSystem();
|
||||
if (!trueFs) {
|
||||
return milvus::FailureCStatus(milvus::ErrorCode::FileReadFailed,
|
||||
"Failed to get filesystem");
|
||||
}
|
||||
auto trueSchema = arrow::ImportSchema(schema).ValueOrDie();
|
||||
auto reader = std::make_unique<milvus_storage::PackedRecordBatchReader>(
|
||||
trueFs, truePaths, trueSchema, buffer_size);
|
||||
*c_packed_reader = reader.release();
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(&e);
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
NewPackedReader(char** paths,
|
||||
int64_t num_paths,
|
||||
|
||||
@ -25,6 +25,14 @@ typedef void* CPackedReader;
|
||||
typedef void* CArrowArray;
|
||||
typedef void* CArrowSchema;
|
||||
|
||||
CStatus
|
||||
NewPackedReaderWithStorageConfig(char** paths,
|
||||
int64_t num_paths,
|
||||
struct ArrowSchema* schema,
|
||||
const int64_t buffer_size,
|
||||
CStorageConfig c_storage_config,
|
||||
CPackedReader* c_packed_reader);
|
||||
|
||||
/**
|
||||
* @brief Open a packed reader to read needed columns in the specified path.
|
||||
*
|
||||
|
||||
@ -24,6 +24,69 @@
|
||||
#include "common/EasyAssert.h"
|
||||
#include "common/type_c.h"
|
||||
|
||||
CStatus
|
||||
NewPackedWriterWithStorageConfig(struct ArrowSchema* schema,
|
||||
const int64_t buffer_size,
|
||||
char** paths,
|
||||
int64_t num_paths,
|
||||
int64_t part_upload_size,
|
||||
CColumnGroups column_groups,
|
||||
CStorageConfig c_storage_config,
|
||||
CPackedWriter* c_packed_writer) {
|
||||
try {
|
||||
auto truePaths = std::vector<std::string>(paths, paths + num_paths);
|
||||
|
||||
auto storage_config = milvus_storage::StorageConfig();
|
||||
storage_config.part_size = part_upload_size;
|
||||
|
||||
milvus_storage::ArrowFileSystemConfig conf;
|
||||
conf.address = std::string(c_storage_config.address);
|
||||
conf.bucket_name = std::string(c_storage_config.bucket_name);
|
||||
conf.access_key_id = std::string(c_storage_config.access_key_id);
|
||||
conf.access_key_value = std::string(c_storage_config.access_key_value);
|
||||
conf.root_path = std::string(c_storage_config.root_path);
|
||||
conf.storage_type = std::string(c_storage_config.storage_type);
|
||||
conf.cloud_provider = std::string(c_storage_config.cloud_provider);
|
||||
conf.iam_endpoint = std::string(c_storage_config.iam_endpoint);
|
||||
conf.log_level = std::string(c_storage_config.log_level);
|
||||
conf.region = std::string(c_storage_config.region);
|
||||
conf.useSSL = c_storage_config.useSSL;
|
||||
conf.sslCACert = std::string(c_storage_config.sslCACert);
|
||||
conf.useIAM = c_storage_config.useIAM;
|
||||
conf.useVirtualHost = c_storage_config.useVirtualHost;
|
||||
conf.requestTimeoutMs = c_storage_config.requestTimeoutMs;
|
||||
conf.gcp_credential_json =
|
||||
std::string(c_storage_config.gcp_credential_json);
|
||||
conf.use_custom_part_upload = c_storage_config.use_custom_part_upload;
|
||||
milvus_storage::ArrowFileSystemSingleton::GetInstance().Init(conf);
|
||||
auto trueFs = milvus_storage::ArrowFileSystemSingleton::GetInstance()
|
||||
.GetArrowFileSystem();
|
||||
if (!trueFs) {
|
||||
return milvus::FailureCStatus(milvus::ErrorCode::FileReadFailed,
|
||||
"Failed to get filesystem");
|
||||
}
|
||||
|
||||
auto trueSchema = arrow::ImportSchema(schema).ValueOrDie();
|
||||
|
||||
auto columnGroups =
|
||||
*static_cast<std::vector<std::vector<int>>*>(column_groups);
|
||||
|
||||
auto writer = std::make_unique<milvus_storage::PackedRecordBatchWriter>(
|
||||
trueFs,
|
||||
truePaths,
|
||||
trueSchema,
|
||||
storage_config,
|
||||
columnGroups,
|
||||
buffer_size);
|
||||
AssertInfo(writer, "write must not be null");
|
||||
|
||||
*c_packed_writer = writer.release();
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(&e);
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
NewPackedWriter(struct ArrowSchema* schema,
|
||||
const int64_t buffer_size,
|
||||
|
||||
@ -24,6 +24,16 @@ extern "C" {
|
||||
|
||||
typedef void* CPackedWriter;
|
||||
|
||||
CStatus
|
||||
NewPackedWriterWithStorageConfig(struct ArrowSchema* schema,
|
||||
const int64_t buffer_size,
|
||||
char** paths,
|
||||
int64_t num_paths,
|
||||
int64_t part_upload_size,
|
||||
CColumnGroups column_groups,
|
||||
CStorageConfig c_storage_config,
|
||||
CPackedWriter* c_packed_writer);
|
||||
|
||||
CStatus
|
||||
NewPackedWriter(struct ArrowSchema* schema,
|
||||
const int64_t buffer_size,
|
||||
|
||||
@ -20,6 +20,7 @@
|
||||
|
||||
#include "common/Common.h"
|
||||
#include "common/FieldData.h"
|
||||
#include "common/Types.h"
|
||||
#include "log/Log.h"
|
||||
#include "storage/Util.h"
|
||||
#include "storage/FileManager.h"
|
||||
@ -137,13 +138,13 @@ MemFileManagerImpl::CacheRawDataToMemory(const Config& config) {
|
||||
index::GetValueFromConfig<int64_t>(config, STORAGE_VERSION_KEY)
|
||||
.value_or(0);
|
||||
if (storage_version == STORAGE_V2) {
|
||||
return cache_row_data_to_memory_storage_v2(config);
|
||||
return cache_raw_data_to_memory_storage_v2(config);
|
||||
}
|
||||
return cache_row_data_to_memory_internal(config);
|
||||
return cache_raw_data_to_memory_internal(config);
|
||||
}
|
||||
|
||||
std::vector<FieldDataPtr>
|
||||
MemFileManagerImpl::cache_row_data_to_memory_internal(const Config& config) {
|
||||
MemFileManagerImpl::cache_raw_data_to_memory_internal(const Config& config) {
|
||||
auto insert_files = index::GetValueFromConfig<std::vector<std::string>>(
|
||||
config, INSERT_FILES_KEY);
|
||||
AssertInfo(insert_files.has_value(),
|
||||
@ -180,7 +181,7 @@ MemFileManagerImpl::cache_row_data_to_memory_internal(const Config& config) {
|
||||
}
|
||||
|
||||
std::vector<FieldDataPtr>
|
||||
MemFileManagerImpl::cache_row_data_to_memory_storage_v2(const Config& config) {
|
||||
MemFileManagerImpl::cache_raw_data_to_memory_storage_v2(const Config& config) {
|
||||
auto data_type = index::GetValueFromConfig<DataType>(config, DATA_TYPE_KEY);
|
||||
AssertInfo(data_type.has_value(), "data type is empty when build index");
|
||||
auto dim = index::GetValueFromConfig<int64_t>(config, DIM_KEY).value_or(0);
|
||||
|
||||
@ -81,10 +81,10 @@ class MemFileManagerImpl : public FileManagerImpl {
|
||||
AddBinarySet(const BinarySet& binary_set, const std::string& prefix);
|
||||
|
||||
std::vector<FieldDataPtr>
|
||||
cache_row_data_to_memory_internal(const Config& config);
|
||||
cache_raw_data_to_memory_internal(const Config& config);
|
||||
|
||||
std::vector<FieldDataPtr>
|
||||
cache_row_data_to_memory_storage_v2(const Config& config);
|
||||
cache_raw_data_to_memory_storage_v2(const Config& config);
|
||||
|
||||
std::unordered_map<int64_t, std::vector<std::vector<uint32_t>>>
|
||||
cache_opt_field_memory(const Config& config);
|
||||
|
||||
@ -301,7 +301,8 @@ TEST(CStringIndexTest, All) {
|
||||
#endif
|
||||
|
||||
TEST(CreateIndexTest, StorageV2) {
|
||||
GTEST_SKIP() << "TODO: after index/stats task level fs is finished, should fix shutdown sdk api in test";
|
||||
GTEST_SKIP() << "TODO: after index/stats task level fs is finished, should "
|
||||
"fix shutdown sdk api in test";
|
||||
auto build_index_info =
|
||||
std::make_unique<milvus::proto::indexcgo::BuildIndexInfo>();
|
||||
|
||||
|
||||
@ -163,6 +163,7 @@ func (st *statsTask) PreExecute(ctx context.Context) error {
|
||||
zap.Int64("partitionID", st.req.GetPartitionID()),
|
||||
zap.Int64("segmentID", st.req.GetSegmentID()),
|
||||
zap.Int64("preExecuteRecordSpan(ms)", preExecuteRecordSpan.Milliseconds()),
|
||||
zap.Any("storageConfig", st.req.StorageConfig),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
@ -189,6 +190,7 @@ func (st *statsTask) sort(ctx context.Context) ([]*datapb.FieldBinlog, error) {
|
||||
return st.binlogIO.Upload(ctx, kvs)
|
||||
}),
|
||||
storage.WithVersion(st.req.GetStorageVersion()),
|
||||
storage.WithStorageConfig(st.req.GetStorageConfig()),
|
||||
)
|
||||
if err != nil {
|
||||
log.Ctx(ctx).Warn("sort segment wrong, unable to init segment writer",
|
||||
@ -234,6 +236,7 @@ func (st *statsTask) sort(ctx context.Context) ([]*datapb.FieldBinlog, error) {
|
||||
storage.WithVersion(st.req.StorageVersion),
|
||||
storage.WithDownloader(st.binlogIO.Download),
|
||||
storage.WithBucketName(st.req.StorageConfig.BucketName),
|
||||
storage.WithStorageConfig(st.req.GetStorageConfig()),
|
||||
)
|
||||
if err != nil {
|
||||
log.Warn("error creating insert binlog reader", zap.Error(err))
|
||||
|
||||
@ -139,7 +139,7 @@ func (bw *BulkPackWriterV2) writeInserts(ctx context.Context, pack *SyncPack) (m
|
||||
|
||||
bucketName := paramtable.Get().ServiceParam.MinioCfg.BucketName.GetValue()
|
||||
|
||||
w, err := storage.NewPackedRecordWriter(bucketName, paths, bw.schema, bw.bufferSize, bw.multiPartUploadSize, columnGroups)
|
||||
w, err := storage.NewPackedRecordWriter(bucketName, paths, bw.schema, bw.bufferSize, bw.multiPartUploadSize, columnGroups, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -30,6 +30,7 @@ import (
|
||||
"github.com/milvus-io/milvus/internal/storagecommon"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/packed"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/indexpb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
||||
)
|
||||
@ -52,6 +53,7 @@ type rwOptions struct {
|
||||
multiPartUploadSize int64
|
||||
columnGroups []storagecommon.ColumnGroup
|
||||
bucketName string
|
||||
storageConfig *indexpb.StorageConfig
|
||||
}
|
||||
|
||||
type RwOption func(*rwOptions)
|
||||
@ -105,6 +107,12 @@ func WithColumnGroups(columnGroups []storagecommon.ColumnGroup) RwOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithStorageConfig(storageConfig *indexpb.StorageConfig) RwOption {
|
||||
return func(options *rwOptions) {
|
||||
options.storageConfig = storageConfig
|
||||
}
|
||||
}
|
||||
|
||||
func GuessStorageVersion(binlogs []*datapb.FieldBinlog, schema *schemapb.CollectionSchema) int64 {
|
||||
if len(binlogs) == len(schema.Fields) {
|
||||
return StorageV1
|
||||
@ -212,7 +220,7 @@ func NewBinlogRecordReader(ctx context.Context, binlogs []*datapb.FieldBinlog, s
|
||||
paths[j] = append(paths[j], logPath)
|
||||
}
|
||||
}
|
||||
return newPackedRecordReader(paths, schema, rwOptions.bufferSize)
|
||||
return newPackedRecordReader(paths, schema, rwOptions.bufferSize, rwOptions.storageConfig)
|
||||
}
|
||||
return nil, merr.WrapErrServiceInternal(fmt.Sprintf("unsupported storage version %d", rwOptions.version))
|
||||
}
|
||||
@ -241,6 +249,7 @@ func NewBinlogRecordWriter(ctx context.Context, collectionID, partitionID, segme
|
||||
return newPackedBinlogRecordWriter(collectionID, partitionID, segmentID, schema,
|
||||
blobsWriter, allocator, chunkSize, bucketName, rootPath, maxRowNum,
|
||||
rwOptions.bufferSize, rwOptions.multiPartUploadSize, rwOptions.columnGroups,
|
||||
rwOptions.storageConfig,
|
||||
)
|
||||
}
|
||||
return nil, merr.WrapErrServiceInternal(fmt.Sprintf("unsupported storage version %d", rwOptions.version))
|
||||
|
||||
@ -34,6 +34,7 @@ import (
|
||||
"github.com/milvus-io/milvus/pkg/v2/log"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/etcdpb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/indexpb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/metautil"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
||||
@ -48,6 +49,7 @@ type packedRecordReader struct {
|
||||
bufferSize int64
|
||||
arrowSchema *arrow.Schema
|
||||
field2Col map[FieldID]int
|
||||
storageConfig *indexpb.StorageConfig
|
||||
}
|
||||
|
||||
var _ RecordReader = (*packedRecordReader)(nil)
|
||||
@ -63,7 +65,7 @@ func (pr *packedRecordReader) iterateNextBatch() error {
|
||||
return io.EOF
|
||||
}
|
||||
|
||||
reader, err := packed.NewPackedReader(pr.paths[pr.chunk], pr.arrowSchema, pr.bufferSize)
|
||||
reader, err := packed.NewPackedReader(pr.paths[pr.chunk], pr.arrowSchema, pr.bufferSize, pr.storageConfig)
|
||||
pr.chunk++
|
||||
if err != nil {
|
||||
return merr.WrapErrParameterInvalid("New binlog record packed reader error: %s", err.Error())
|
||||
@ -100,7 +102,7 @@ func (pr *packedRecordReader) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func newPackedRecordReader(paths [][]string, schema *schemapb.CollectionSchema, bufferSize int64,
|
||||
func newPackedRecordReader(paths [][]string, schema *schemapb.CollectionSchema, bufferSize int64, storageConfig *indexpb.StorageConfig,
|
||||
) (*packedRecordReader, error) {
|
||||
arrowSchema, err := ConvertToArrowSchema(schema.Fields)
|
||||
if err != nil {
|
||||
@ -115,13 +117,14 @@ func newPackedRecordReader(paths [][]string, schema *schemapb.CollectionSchema,
|
||||
bufferSize: bufferSize,
|
||||
arrowSchema: arrowSchema,
|
||||
field2Col: field2Col,
|
||||
storageConfig: storageConfig,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func NewPackedDeserializeReader(paths [][]string, schema *schemapb.CollectionSchema,
|
||||
bufferSize int64,
|
||||
) (*DeserializeReaderImpl[*Value], error) {
|
||||
reader, err := newPackedRecordReader(paths, schema, bufferSize)
|
||||
reader, err := newPackedRecordReader(paths, schema, bufferSize, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -144,6 +147,7 @@ type packedRecordWriter struct {
|
||||
rowNum int64
|
||||
writtenUncompressed uint64
|
||||
columnGroupUncompressed []uint64
|
||||
storageConfig *indexpb.StorageConfig
|
||||
}
|
||||
|
||||
func (pw *packedRecordWriter) Write(r Record) error {
|
||||
@ -196,7 +200,7 @@ func (pw *packedRecordWriter) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewPackedRecordWriter(bucketName string, paths []string, schema *schemapb.CollectionSchema, bufferSize int64, multiPartUploadSize int64, columnGroups []storagecommon.ColumnGroup) (*packedRecordWriter, error) {
|
||||
func NewPackedRecordWriter(bucketName string, paths []string, schema *schemapb.CollectionSchema, bufferSize int64, multiPartUploadSize int64, columnGroups []storagecommon.ColumnGroup, storageConfig *indexpb.StorageConfig) (*packedRecordWriter, error) {
|
||||
arrowSchema, err := ConvertToArrowSchema(schema.Fields)
|
||||
if err != nil {
|
||||
return nil, merr.WrapErrServiceInternal(
|
||||
@ -210,7 +214,7 @@ func NewPackedRecordWriter(bucketName string, paths []string, schema *schemapb.C
|
||||
}
|
||||
return path.Join(bucketName, p)
|
||||
})
|
||||
writer, err := packed.NewPackedWriter(truePaths, arrowSchema, bufferSize, multiPartUploadSize, columnGroups)
|
||||
writer, err := packed.NewPackedWriter(truePaths, arrowSchema, bufferSize, multiPartUploadSize, columnGroups, storageConfig)
|
||||
if err != nil {
|
||||
return nil, merr.WrapErrServiceInternal(
|
||||
fmt.Sprintf("can not new packed record writer %s", err.Error()))
|
||||
@ -225,13 +229,14 @@ func NewPackedRecordWriter(bucketName string, paths []string, schema *schemapb.C
|
||||
paths: paths,
|
||||
columnGroups: columnGroups,
|
||||
columnGroupUncompressed: columnGroupUncompressed,
|
||||
storageConfig: storageConfig,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func NewPackedSerializeWriter(bucketName string, paths []string, schema *schemapb.CollectionSchema, bufferSize int64,
|
||||
multiPartUploadSize int64, columnGroups []storagecommon.ColumnGroup, batchSize int,
|
||||
) (*SerializeWriterImpl[*Value], error) {
|
||||
PackedBinlogRecordWriter, err := NewPackedRecordWriter(bucketName, paths, schema, bufferSize, multiPartUploadSize, columnGroups)
|
||||
PackedBinlogRecordWriter, err := NewPackedRecordWriter(bucketName, paths, schema, bufferSize, multiPartUploadSize, columnGroups, nil)
|
||||
if err != nil {
|
||||
return nil, merr.WrapErrServiceInternal(
|
||||
fmt.Sprintf("can not new packed record writer %s", err.Error()))
|
||||
@ -259,6 +264,7 @@ type PackedBinlogRecordWriter struct {
|
||||
bufferSize int64
|
||||
multiPartUploadSize int64
|
||||
columnGroups []storagecommon.ColumnGroup
|
||||
storageConfig *indexpb.StorageConfig
|
||||
|
||||
// writer and stats generated at runtime
|
||||
writer *packedRecordWriter
|
||||
@ -358,7 +364,7 @@ func (pw *PackedBinlogRecordWriter) initWriters(r Record) error {
|
||||
paths = append(paths, path)
|
||||
logIdStart++
|
||||
}
|
||||
pw.writer, err = NewPackedRecordWriter(pw.bucketName, paths, pw.schema, pw.bufferSize, pw.multiPartUploadSize, pw.columnGroups)
|
||||
pw.writer, err = NewPackedRecordWriter(pw.bucketName, paths, pw.schema, pw.bufferSize, pw.multiPartUploadSize, pw.columnGroups, pw.storageConfig)
|
||||
if err != nil {
|
||||
return merr.WrapErrServiceInternal(fmt.Sprintf("can not new packed record writer %s", err.Error()))
|
||||
}
|
||||
@ -525,6 +531,7 @@ func (pw *PackedBinlogRecordWriter) GetBufferUncompressed() uint64 {
|
||||
|
||||
func newPackedBinlogRecordWriter(collectionID, partitionID, segmentID UniqueID, schema *schemapb.CollectionSchema,
|
||||
blobsWriter ChunkedBlobsWriter, allocator allocator.Interface, chunkSize uint64, bucketName, rootPath string, maxRowNum int64, bufferSize, multiPartUploadSize int64, columnGroups []storagecommon.ColumnGroup,
|
||||
storageConfig *indexpb.StorageConfig,
|
||||
) (*PackedBinlogRecordWriter, error) {
|
||||
arrowSchema, err := ConvertToArrowSchema(schema.Fields)
|
||||
if err != nil {
|
||||
@ -567,5 +574,6 @@ func newPackedBinlogRecordWriter(collectionID, partitionID, segmentID UniqueID,
|
||||
columnGroups: columnGroups,
|
||||
pkstats: stats,
|
||||
bm25Stats: bm25Stats,
|
||||
storageConfig: storageConfig,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -31,9 +31,11 @@ import (
|
||||
|
||||
"github.com/apache/arrow/go/v17/arrow"
|
||||
"github.com/apache/arrow/go/v17/arrow/cdata"
|
||||
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/indexpb"
|
||||
)
|
||||
|
||||
func NewPackedReader(filePaths []string, schema *arrow.Schema, bufferSize int64) (*PackedReader, error) {
|
||||
func NewPackedReader(filePaths []string, schema *arrow.Schema, bufferSize int64, storageConfig *indexpb.StorageConfig) (*PackedReader, error) {
|
||||
cFilePaths := make([]*C.char, len(filePaths))
|
||||
for i, path := range filePaths {
|
||||
cFilePaths[i] = C.CString(path)
|
||||
@ -50,7 +52,45 @@ func NewPackedReader(filePaths []string, schema *arrow.Schema, bufferSize int64)
|
||||
cBufferSize := C.int64_t(bufferSize)
|
||||
|
||||
var cPackedReader C.CPackedReader
|
||||
status := C.NewPackedReader(cFilePathsArray, cNumPaths, cSchema, cBufferSize, &cPackedReader)
|
||||
var status C.CStatus
|
||||
|
||||
if storageConfig != nil {
|
||||
cStorageConfig := C.CStorageConfig{
|
||||
address: C.CString(storageConfig.GetAddress()),
|
||||
bucket_name: C.CString(storageConfig.GetBucketName()),
|
||||
access_key_id: C.CString(storageConfig.GetAccessKeyID()),
|
||||
access_key_value: C.CString(storageConfig.GetSecretAccessKey()),
|
||||
root_path: C.CString(storageConfig.GetRootPath()),
|
||||
storage_type: C.CString(storageConfig.GetStorageType()),
|
||||
cloud_provider: C.CString(storageConfig.GetCloudProvider()),
|
||||
iam_endpoint: C.CString(storageConfig.GetIAMEndpoint()),
|
||||
log_level: C.CString("warn"),
|
||||
useSSL: C.bool(storageConfig.GetUseSSL()),
|
||||
sslCACert: C.CString(storageConfig.GetSslCACert()),
|
||||
useIAM: C.bool(storageConfig.GetUseIAM()),
|
||||
region: C.CString(storageConfig.GetRegion()),
|
||||
useVirtualHost: C.bool(storageConfig.GetUseVirtualHost()),
|
||||
requestTimeoutMs: C.int64_t(storageConfig.GetRequestTimeoutMs()),
|
||||
gcp_credential_json: C.CString(storageConfig.GetGcpCredentialJSON()),
|
||||
use_custom_part_upload: true,
|
||||
}
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.address))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.bucket_name))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.access_key_id))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.access_key_value))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.root_path))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.storage_type))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.cloud_provider))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.iam_endpoint))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.log_level))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.sslCACert))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.region))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.gcp_credential_json))
|
||||
|
||||
status = C.NewPackedReaderWithStorageConfig(cFilePathsArray, cNumPaths, cSchema, cBufferSize, cStorageConfig, &cPackedReader)
|
||||
} else {
|
||||
status = C.NewPackedReader(cFilePathsArray, cNumPaths, cSchema, cBufferSize, &cPackedReader)
|
||||
}
|
||||
if err := ConsumeCStatusIntoError(&status); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -82,7 +82,7 @@ func (suite *PackedTestSuite) TestPackedOneFile() {
|
||||
columnGroups := []storagecommon.ColumnGroup{{Columns: []int{0, 1, 2}}}
|
||||
bufferSize := int64(10 * 1024 * 1024) // 10MB
|
||||
multiPartUploadSize := int64(0)
|
||||
pw, err := NewPackedWriter(paths, suite.schema, bufferSize, multiPartUploadSize, columnGroups)
|
||||
pw, err := NewPackedWriter(paths, suite.schema, bufferSize, multiPartUploadSize, columnGroups, nil)
|
||||
suite.NoError(err)
|
||||
for i := 0; i < batches; i++ {
|
||||
err = pw.WriteRecordBatch(suite.rec)
|
||||
@ -91,7 +91,7 @@ func (suite *PackedTestSuite) TestPackedOneFile() {
|
||||
err = pw.Close()
|
||||
suite.NoError(err)
|
||||
|
||||
reader, err := NewPackedReader(paths, suite.schema, bufferSize)
|
||||
reader, err := NewPackedReader(paths, suite.schema, bufferSize, nil)
|
||||
suite.NoError(err)
|
||||
rr, err := reader.ReadNext()
|
||||
suite.NoError(err)
|
||||
@ -134,7 +134,7 @@ func (suite *PackedTestSuite) TestPackedMultiFiles() {
|
||||
columnGroups := []storagecommon.ColumnGroup{{Columns: []int{2}}, {Columns: []int{0, 1}}}
|
||||
bufferSize := int64(10 * 1024 * 1024) // 10MB
|
||||
multiPartUploadSize := int64(0)
|
||||
pw, err := NewPackedWriter(paths, suite.schema, bufferSize, multiPartUploadSize, columnGroups)
|
||||
pw, err := NewPackedWriter(paths, suite.schema, bufferSize, multiPartUploadSize, columnGroups, nil)
|
||||
suite.NoError(err)
|
||||
for i := 0; i < batches; i++ {
|
||||
err = pw.WriteRecordBatch(rec)
|
||||
@ -143,7 +143,7 @@ func (suite *PackedTestSuite) TestPackedMultiFiles() {
|
||||
err = pw.Close()
|
||||
suite.NoError(err)
|
||||
|
||||
reader, err := NewPackedReader(paths, suite.schema, bufferSize)
|
||||
reader, err := NewPackedReader(paths, suite.schema, bufferSize, nil)
|
||||
suite.NoError(err)
|
||||
var rows int64 = 0
|
||||
var rr arrow.Record
|
||||
|
||||
@ -33,9 +33,10 @@ import (
|
||||
"github.com/cockroachdb/errors"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagecommon"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/indexpb"
|
||||
)
|
||||
|
||||
func NewPackedWriter(filePaths []string, schema *arrow.Schema, bufferSize int64, multiPartUploadSize int64, columnGroups []storagecommon.ColumnGroup) (*PackedWriter, error) {
|
||||
func NewPackedWriter(filePaths []string, schema *arrow.Schema, bufferSize int64, multiPartUploadSize int64, columnGroups []storagecommon.ColumnGroup, storageConfig *indexpb.StorageConfig) (*PackedWriter, error) {
|
||||
cFilePaths := make([]*C.char, len(filePaths))
|
||||
for i, path := range filePaths {
|
||||
cFilePaths[i] = C.CString(path)
|
||||
@ -68,7 +69,44 @@ func NewPackedWriter(filePaths []string, schema *arrow.Schema, bufferSize int64,
|
||||
}
|
||||
|
||||
var cPackedWriter C.CPackedWriter
|
||||
status := C.NewPackedWriter(cSchema, cBufferSize, cFilePathsArray, cNumPaths, cMultiPartUploadSize, cColumnGroups, &cPackedWriter)
|
||||
var status C.CStatus
|
||||
|
||||
if storageConfig != nil {
|
||||
cStorageConfig := C.CStorageConfig{
|
||||
address: C.CString(storageConfig.GetAddress()),
|
||||
bucket_name: C.CString(storageConfig.GetBucketName()),
|
||||
access_key_id: C.CString(storageConfig.GetAccessKeyID()),
|
||||
access_key_value: C.CString(storageConfig.GetSecretAccessKey()),
|
||||
root_path: C.CString(storageConfig.GetRootPath()),
|
||||
storage_type: C.CString(storageConfig.GetStorageType()),
|
||||
cloud_provider: C.CString(storageConfig.GetCloudProvider()),
|
||||
iam_endpoint: C.CString(storageConfig.GetIAMEndpoint()),
|
||||
log_level: C.CString("warn"),
|
||||
useSSL: C.bool(storageConfig.GetUseSSL()),
|
||||
sslCACert: C.CString(storageConfig.GetSslCACert()),
|
||||
useIAM: C.bool(storageConfig.GetUseIAM()),
|
||||
region: C.CString(storageConfig.GetRegion()),
|
||||
useVirtualHost: C.bool(storageConfig.GetUseVirtualHost()),
|
||||
requestTimeoutMs: C.int64_t(storageConfig.GetRequestTimeoutMs()),
|
||||
gcp_credential_json: C.CString(storageConfig.GetGcpCredentialJSON()),
|
||||
use_custom_part_upload: true,
|
||||
}
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.address))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.bucket_name))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.access_key_id))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.access_key_value))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.root_path))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.storage_type))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.cloud_provider))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.iam_endpoint))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.log_level))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.sslCACert))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.region))
|
||||
defer C.free(unsafe.Pointer(cStorageConfig.gcp_credential_json))
|
||||
status = C.NewPackedWriterWithStorageConfig(cSchema, cBufferSize, cFilePathsArray, cNumPaths, cMultiPartUploadSize, cColumnGroups, cStorageConfig, &cPackedWriter)
|
||||
} else {
|
||||
status = C.NewPackedWriter(cSchema, cBufferSize, cFilePathsArray, cNumPaths, cMultiPartUploadSize, cColumnGroups, &cPackedWriter)
|
||||
}
|
||||
if err := ConsumeCStatusIntoError(&status); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -187,7 +187,7 @@ func InitRemoteArrowFileSystem(params *paramtable.ComponentParam) error {
|
||||
}
|
||||
|
||||
status := C.InitRemoteArrowFileSystemSingleton(storageConfig)
|
||||
return HandleCStatus(&status, "InitRemoteChunkManagerSingleton failed")
|
||||
return HandleCStatus(&status, "InitRemoteArrowFileSystemSingleton failed")
|
||||
}
|
||||
|
||||
func InitRemoteChunkManager(params *paramtable.ComponentParam) error {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user