mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
enhance: [StorageV2] Use compressed size as log file size (#44402)
Related to #39173 backlog issue that memory size and log size shared same value. This patch add `GetFileSize` api to get remote compressed binlog size as meta log file size to calculate usage more accurate. --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
691a8df953
commit
6f7318a731
@ -12,6 +12,7 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "common/common_type_c.h"
|
||||||
#include "parquet/encryption/encryption.h"
|
#include "parquet/encryption/encryption.h"
|
||||||
#include "parquet/properties.h"
|
#include "parquet/properties.h"
|
||||||
#include "parquet/types.h"
|
#include "parquet/types.h"
|
||||||
@ -22,6 +23,7 @@
|
|||||||
#include "milvus-storage/filesystem/fs.h"
|
#include "milvus-storage/filesystem/fs.h"
|
||||||
#include "storage/PluginLoader.h"
|
#include "storage/PluginLoader.h"
|
||||||
#include "storage/KeyRetriever.h"
|
#include "storage/KeyRetriever.h"
|
||||||
|
#include "storage/Util.h"
|
||||||
|
|
||||||
#include <arrow/c/bridge.h>
|
#include <arrow/c/bridge.h>
|
||||||
#include <arrow/filesystem/filesystem.h>
|
#include <arrow/filesystem/filesystem.h>
|
||||||
@ -262,3 +264,80 @@ CloseWriter(CPackedWriter c_packed_writer) {
|
|||||||
return milvus::FailureCStatus(&e);
|
return milvus::FailureCStatus(&e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CStatus
|
||||||
|
GetFileSize(const char* path, int64_t* size) {
|
||||||
|
SCOPE_CGO_CALL_METRIC();
|
||||||
|
|
||||||
|
try {
|
||||||
|
auto trueFs = milvus_storage::ArrowFileSystemSingleton::GetInstance()
|
||||||
|
.GetArrowFileSystem();
|
||||||
|
if (!trueFs) {
|
||||||
|
return milvus::FailureCStatus(
|
||||||
|
milvus::ErrorCode::FileReadFailed,
|
||||||
|
"[StorageV2] Failed to get filesystem");
|
||||||
|
}
|
||||||
|
|
||||||
|
auto result = trueFs->GetFileInfo(path);
|
||||||
|
if (!result.ok()) {
|
||||||
|
return milvus::FailureCStatus(
|
||||||
|
milvus::ErrorCode::FileReadFailed,
|
||||||
|
"[StorageV2] Failed to get file info");
|
||||||
|
}
|
||||||
|
*size = result.ValueOrDie().size();
|
||||||
|
|
||||||
|
return milvus::SuccessCStatus();
|
||||||
|
} catch (std::exception& e) {
|
||||||
|
return milvus::FailureCStatus(&e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CStatus
|
||||||
|
GetFileSizeWithStorageConfig(const char* path,
|
||||||
|
int64_t* size,
|
||||||
|
CStorageConfig c_storage_config) {
|
||||||
|
SCOPE_CGO_CALL_METRIC();
|
||||||
|
|
||||||
|
try {
|
||||||
|
milvus_storage::ArrowFileSystemConfig conf;
|
||||||
|
conf.address = std::string(c_storage_config.address);
|
||||||
|
conf.bucket_name = std::string(c_storage_config.bucket_name);
|
||||||
|
conf.access_key_id = std::string(c_storage_config.access_key_id);
|
||||||
|
conf.access_key_value = std::string(c_storage_config.access_key_value);
|
||||||
|
conf.root_path = std::string(c_storage_config.root_path);
|
||||||
|
conf.storage_type = std::string(c_storage_config.storage_type);
|
||||||
|
conf.cloud_provider = std::string(c_storage_config.cloud_provider);
|
||||||
|
conf.iam_endpoint = std::string(c_storage_config.iam_endpoint);
|
||||||
|
conf.log_level = std::string(c_storage_config.log_level);
|
||||||
|
conf.region = std::string(c_storage_config.region);
|
||||||
|
conf.useSSL = c_storage_config.useSSL;
|
||||||
|
conf.sslCACert = std::string(c_storage_config.sslCACert);
|
||||||
|
conf.useIAM = c_storage_config.useIAM;
|
||||||
|
conf.useVirtualHost = c_storage_config.useVirtualHost;
|
||||||
|
conf.requestTimeoutMs = c_storage_config.requestTimeoutMs;
|
||||||
|
conf.gcp_credential_json =
|
||||||
|
std::string(c_storage_config.gcp_credential_json);
|
||||||
|
conf.use_custom_part_upload = c_storage_config.use_custom_part_upload;
|
||||||
|
milvus_storage::ArrowFileSystemSingleton::GetInstance().Init(conf);
|
||||||
|
auto trueFs = milvus_storage::ArrowFileSystemSingleton::GetInstance()
|
||||||
|
.GetArrowFileSystem();
|
||||||
|
|
||||||
|
if (!trueFs) {
|
||||||
|
return milvus::FailureCStatus(
|
||||||
|
milvus::ErrorCode::FileReadFailed,
|
||||||
|
"[StorageV2] Failed to get filesystem");
|
||||||
|
}
|
||||||
|
|
||||||
|
auto result = trueFs->GetFileInfo(path);
|
||||||
|
if (!result.ok()) {
|
||||||
|
return milvus::FailureCStatus(
|
||||||
|
milvus::ErrorCode::FileReadFailed,
|
||||||
|
"[StorageV2] Failed to get file info");
|
||||||
|
}
|
||||||
|
*size = result.ValueOrDie().size();
|
||||||
|
|
||||||
|
return milvus::SuccessCStatus();
|
||||||
|
} catch (std::exception& e) {
|
||||||
|
return milvus::FailureCStatus(&e);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -14,6 +14,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "common/common_type_c.h"
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
@ -54,6 +55,14 @@ WriteRecordBatch(CPackedWriter c_packed_writer,
|
|||||||
CStatus
|
CStatus
|
||||||
CloseWriter(CPackedWriter c_packed_writer);
|
CloseWriter(CPackedWriter c_packed_writer);
|
||||||
|
|
||||||
|
CStatus
|
||||||
|
GetFileSize(const char* path, int64_t* size);
|
||||||
|
|
||||||
|
CStatus
|
||||||
|
GetFileSizeWithStorageConfig(const char* path,
|
||||||
|
int64_t* size,
|
||||||
|
CStorageConfig c_storage_config);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -181,6 +181,10 @@ func (bw *BulkPackWriterV2) writeInserts(ctx context.Context, pack *SyncPack) (m
|
|||||||
if err = w.Write(rec); err != nil {
|
if err = w.Write(rec); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
// close first to get compressed size
|
||||||
|
if err = w.Close(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
for _, columnGroup := range columnGroups {
|
for _, columnGroup := range columnGroups {
|
||||||
columnGroupID := columnGroup.GroupID
|
columnGroupID := columnGroup.GroupID
|
||||||
logs[columnGroupID] = &datapb.FieldBinlog{
|
logs[columnGroupID] = &datapb.FieldBinlog{
|
||||||
@ -188,7 +192,7 @@ func (bw *BulkPackWriterV2) writeInserts(ctx context.Context, pack *SyncPack) (m
|
|||||||
ChildFields: columnGroup.Fields,
|
ChildFields: columnGroup.Fields,
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{
|
{
|
||||||
LogSize: int64(w.GetColumnGroupWrittenUncompressed(columnGroup.GroupID)),
|
LogSize: int64(w.GetColumnGroupWrittenCompressed(columnGroup.GroupID)),
|
||||||
MemorySize: int64(w.GetColumnGroupWrittenUncompressed(columnGroup.GroupID)),
|
MemorySize: int64(w.GetColumnGroupWrittenUncompressed(columnGroup.GroupID)),
|
||||||
LogPath: w.GetWrittenPaths(columnGroupID),
|
LogPath: w.GetWrittenPaths(columnGroupID),
|
||||||
EntriesNum: w.GetWrittenRowNum(),
|
EntriesNum: w.GetWrittenRowNum(),
|
||||||
@ -198,9 +202,6 @@ func (bw *BulkPackWriterV2) writeInserts(ctx context.Context, pack *SyncPack) (m
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err = w.Close(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return logs, nil
|
return logs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -156,6 +156,7 @@ type packedRecordWriter struct {
|
|||||||
rowNum int64
|
rowNum int64
|
||||||
writtenUncompressed uint64
|
writtenUncompressed uint64
|
||||||
columnGroupUncompressed map[typeutil.UniqueID]uint64
|
columnGroupUncompressed map[typeutil.UniqueID]uint64
|
||||||
|
columnGroupCompressed map[typeutil.UniqueID]uint64
|
||||||
storageConfig *indexpb.StorageConfig
|
storageConfig *indexpb.StorageConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -199,6 +200,13 @@ func (pw *packedRecordWriter) GetColumnGroupWrittenUncompressed(columnGroup type
|
|||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (pw *packedRecordWriter) GetColumnGroupWrittenCompressed(columnGroup typeutil.UniqueID) uint64 {
|
||||||
|
if size, ok := pw.columnGroupCompressed[columnGroup]; ok {
|
||||||
|
return size
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
func (pw *packedRecordWriter) GetWrittenPaths(columnGroup typeutil.UniqueID) string {
|
func (pw *packedRecordWriter) GetWrittenPaths(columnGroup typeutil.UniqueID) string {
|
||||||
if path, ok := pw.pathsMap[columnGroup]; ok {
|
if path, ok := pw.pathsMap[columnGroup]; ok {
|
||||||
return path
|
return path
|
||||||
@ -212,7 +220,18 @@ func (pw *packedRecordWriter) GetWrittenRowNum() int64 {
|
|||||||
|
|
||||||
func (pw *packedRecordWriter) Close() error {
|
func (pw *packedRecordWriter) Close() error {
|
||||||
if pw.writer != nil {
|
if pw.writer != nil {
|
||||||
return pw.writer.Close()
|
err := pw.writer.Close()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for id, fpath := range pw.pathsMap {
|
||||||
|
truePath := path.Join(pw.bucketName, fpath)
|
||||||
|
size, err := packed.GetFileSize(truePath, pw.storageConfig)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
pw.columnGroupCompressed[id] = uint64(size)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -242,6 +261,7 @@ func NewPackedRecordWriter(bucketName string, paths []string, schema *schemapb.C
|
|||||||
fmt.Sprintf("can not new packed record writer %s", err.Error()))
|
fmt.Sprintf("can not new packed record writer %s", err.Error()))
|
||||||
}
|
}
|
||||||
columnGroupUncompressed := make(map[typeutil.UniqueID]uint64)
|
columnGroupUncompressed := make(map[typeutil.UniqueID]uint64)
|
||||||
|
columnGroupCompressed := make(map[typeutil.UniqueID]uint64)
|
||||||
pathsMap := make(map[typeutil.UniqueID]string)
|
pathsMap := make(map[typeutil.UniqueID]string)
|
||||||
if len(paths) != len(columnGroups) {
|
if len(paths) != len(columnGroups) {
|
||||||
return nil, merr.WrapErrParameterInvalid(len(paths), len(columnGroups),
|
return nil, merr.WrapErrParameterInvalid(len(paths), len(columnGroups),
|
||||||
@ -249,6 +269,7 @@ func NewPackedRecordWriter(bucketName string, paths []string, schema *schemapb.C
|
|||||||
}
|
}
|
||||||
for i, columnGroup := range columnGroups {
|
for i, columnGroup := range columnGroups {
|
||||||
columnGroupUncompressed[columnGroup.GroupID] = 0
|
columnGroupUncompressed[columnGroup.GroupID] = 0
|
||||||
|
columnGroupCompressed[columnGroup.GroupID] = 0
|
||||||
pathsMap[columnGroup.GroupID] = paths[i]
|
pathsMap[columnGroup.GroupID] = paths[i]
|
||||||
}
|
}
|
||||||
return &packedRecordWriter{
|
return &packedRecordWriter{
|
||||||
@ -260,6 +281,7 @@ func NewPackedRecordWriter(bucketName string, paths []string, schema *schemapb.C
|
|||||||
pathsMap: pathsMap,
|
pathsMap: pathsMap,
|
||||||
columnGroups: columnGroups,
|
columnGroups: columnGroups,
|
||||||
columnGroupUncompressed: columnGroupUncompressed,
|
columnGroupUncompressed: columnGroupUncompressed,
|
||||||
|
columnGroupCompressed: columnGroupCompressed,
|
||||||
storageConfig: storageConfig,
|
storageConfig: storageConfig,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
@ -402,6 +424,11 @@ func (pw *PackedBinlogRecordWriter) GetWrittenUncompressed() uint64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (pw *PackedBinlogRecordWriter) Close() error {
|
func (pw *PackedBinlogRecordWriter) Close() error {
|
||||||
|
if pw.writer != nil {
|
||||||
|
if err := pw.writer.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
pw.finalizeBinlogs()
|
pw.finalizeBinlogs()
|
||||||
if err := pw.writeStats(); err != nil {
|
if err := pw.writeStats(); err != nil {
|
||||||
return err
|
return err
|
||||||
@ -409,11 +436,6 @@ func (pw *PackedBinlogRecordWriter) Close() error {
|
|||||||
if err := pw.writeBm25Stats(); err != nil {
|
if err := pw.writeBm25Stats(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if pw.writer != nil {
|
|
||||||
if err := pw.writer.Close(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -434,7 +456,7 @@ func (pw *PackedBinlogRecordWriter) finalizeBinlogs() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
pw.fieldBinlogs[columnGroupID].Binlogs = append(pw.fieldBinlogs[columnGroupID].Binlogs, &datapb.Binlog{
|
pw.fieldBinlogs[columnGroupID].Binlogs = append(pw.fieldBinlogs[columnGroupID].Binlogs, &datapb.Binlog{
|
||||||
LogSize: int64(pw.writer.GetColumnGroupWrittenUncompressed(columnGroupID)),
|
LogSize: int64(pw.writer.GetColumnGroupWrittenCompressed(columnGroupID)),
|
||||||
MemorySize: int64(pw.writer.GetColumnGroupWrittenUncompressed(columnGroupID)),
|
MemorySize: int64(pw.writer.GetColumnGroupWrittenUncompressed(columnGroupID)),
|
||||||
LogPath: pw.writer.GetWrittenPaths(columnGroupID),
|
LogPath: pw.writer.GetWrittenPaths(columnGroupID),
|
||||||
EntriesNum: pw.writer.GetWrittenRowNum(),
|
EntriesNum: pw.writer.GetWrittenRowNum(),
|
||||||
|
|||||||
@ -22,6 +22,7 @@ package packed
|
|||||||
#include "common/type_c.h"
|
#include "common/type_c.h"
|
||||||
#include "common/protobuf_utils_c.h"
|
#include "common/protobuf_utils_c.h"
|
||||||
#include "segcore/segment_c.h"
|
#include "segcore/segment_c.h"
|
||||||
|
#include "segcore/packed_writer_c.h"
|
||||||
#include "storage/storage_c.h"
|
#include "storage/storage_c.h"
|
||||||
*/
|
*/
|
||||||
import "C"
|
import "C"
|
||||||
@ -29,6 +30,7 @@ import "C"
|
|||||||
import (
|
import (
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/pkg/v2/proto/indexpb"
|
||||||
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -41,3 +43,57 @@ func ConsumeCStatusIntoError(status *C.CStatus) error {
|
|||||||
C.free(unsafe.Pointer(status.error_msg))
|
C.free(unsafe.Pointer(status.error_msg))
|
||||||
return merr.SegcoreError(int32(errorCode), errorMsg)
|
return merr.SegcoreError(int32(errorCode), errorMsg)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetFileSize(path string, storageConfig *indexpb.StorageConfig) (int64, error) {
|
||||||
|
cPath := C.CString(path)
|
||||||
|
defer C.free(unsafe.Pointer(cPath))
|
||||||
|
|
||||||
|
var fileSize int64
|
||||||
|
if storageConfig == nil {
|
||||||
|
status := C.GetFileSize(cPath, (*C.int64_t)(unsafe.Pointer(&fileSize)))
|
||||||
|
return fileSize, ConsumeCStatusIntoError(&status)
|
||||||
|
} else {
|
||||||
|
cStorageConfig := GetCStorageConfig(storageConfig)
|
||||||
|
defer DeleteCStorageConfig(cStorageConfig)
|
||||||
|
status := C.GetFileSizeWithStorageConfig(cPath, (*C.int64_t)(unsafe.Pointer(&fileSize)), cStorageConfig)
|
||||||
|
return fileSize, ConsumeCStatusIntoError(&status)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetCStorageConfig(storageConfig *indexpb.StorageConfig) C.CStorageConfig {
|
||||||
|
cStorageConfig := C.CStorageConfig{
|
||||||
|
address: C.CString(storageConfig.GetAddress()),
|
||||||
|
bucket_name: C.CString(storageConfig.GetBucketName()),
|
||||||
|
access_key_id: C.CString(storageConfig.GetAccessKeyID()),
|
||||||
|
access_key_value: C.CString(storageConfig.GetSecretAccessKey()),
|
||||||
|
root_path: C.CString(storageConfig.GetRootPath()),
|
||||||
|
storage_type: C.CString(storageConfig.GetStorageType()),
|
||||||
|
cloud_provider: C.CString(storageConfig.GetCloudProvider()),
|
||||||
|
iam_endpoint: C.CString(storageConfig.GetIAMEndpoint()),
|
||||||
|
log_level: C.CString("warn"),
|
||||||
|
useSSL: C.bool(storageConfig.GetUseSSL()),
|
||||||
|
sslCACert: C.CString(storageConfig.GetSslCACert()),
|
||||||
|
useIAM: C.bool(storageConfig.GetUseIAM()),
|
||||||
|
region: C.CString(storageConfig.GetRegion()),
|
||||||
|
useVirtualHost: C.bool(storageConfig.GetUseVirtualHost()),
|
||||||
|
requestTimeoutMs: C.int64_t(storageConfig.GetRequestTimeoutMs()),
|
||||||
|
gcp_credential_json: C.CString(storageConfig.GetGcpCredentialJSON()),
|
||||||
|
use_custom_part_upload: true,
|
||||||
|
}
|
||||||
|
return cStorageConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
func DeleteCStorageConfig(cStorageConfig C.CStorageConfig) {
|
||||||
|
C.free(unsafe.Pointer(cStorageConfig.address))
|
||||||
|
C.free(unsafe.Pointer(cStorageConfig.bucket_name))
|
||||||
|
C.free(unsafe.Pointer(cStorageConfig.access_key_id))
|
||||||
|
C.free(unsafe.Pointer(cStorageConfig.access_key_value))
|
||||||
|
C.free(unsafe.Pointer(cStorageConfig.root_path))
|
||||||
|
C.free(unsafe.Pointer(cStorageConfig.storage_type))
|
||||||
|
C.free(unsafe.Pointer(cStorageConfig.cloud_provider))
|
||||||
|
C.free(unsafe.Pointer(cStorageConfig.iam_endpoint))
|
||||||
|
C.free(unsafe.Pointer(cStorageConfig.log_level))
|
||||||
|
C.free(unsafe.Pointer(cStorageConfig.sslCACert))
|
||||||
|
C.free(unsafe.Pointer(cStorageConfig.region))
|
||||||
|
C.free(unsafe.Pointer(cStorageConfig.gcp_credential_json))
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user