mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 09:08:43 +08:00
This PR adds support for reading data from StorageV2 using manifest files and the Loon FFI interface during index building, providing an alternative to the traditional segment insert files approach. Key changes: Core C++ changes: - Add SEGMENT_MANIFEST_KEY and LOON_FFI_PROPERTIES_KEY constants for manifest handling - Extend FileManagerContext to carry loon_ffi_properties for FFI operations - Update index_c.cpp to pass manifest and loon properties to file managers for all index types (vector, JSON key, text) - Implement GetFieldDatasFromManifest() in Util.cpp using Arrow C Stream interface: * Create Arrow schema from field metadata * Initialize FFI reader with manifest content and storage properties * Import record batches from C data interface * Convert to FieldData for index building - Update DiskFileManagerImpl and MemFileManagerImpl to support manifest-based data reading with fallback to traditional paths Loon FFI utilities (internal/core/src/storage/loon_ffi/): - Add ToCStorageConfig() to convert StorageConfig to C-compatible structure - Implement GetManifest() to parse manifest JSON and retrieve column groups via FFI - Enhance MakePropertiesFromStorageConfig() integration Storage V2 integration: - Update milvus-storage dependency from 0883026 to 302143c for latest FFI support Protobuf changes: - Add manifest field to BuildIndexInfo for passing manifest path to C++ layer Configuration: - Add common.storageV2.useLoonFFI config option (default: false) for feature toggle This change is part of issue #44956 to integrate the StorageV2 FFI interface as the unified storage layer. The implementation maintains backward compatibility by checking for manifest presence and falling back to existing segment insert files approach when manifest is not provided. Related issue: #44956 --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
139 lines
3.3 KiB
Protocol Buffer
139 lines
3.3 KiB
Protocol Buffer
syntax = "proto3";
|
|
|
|
package milvus.proto.indexcgo;
|
|
option go_package="github.com/milvus-io/milvus/pkg/v2/proto/indexcgopb";
|
|
|
|
import "common.proto";
|
|
import "schema.proto";
|
|
|
|
message TypeParams {
|
|
repeated common.KeyValuePair params = 1;
|
|
}
|
|
|
|
message IndexParams {
|
|
repeated common.KeyValuePair params = 1;
|
|
}
|
|
|
|
// TypeParams & IndexParams will be replaced by MapParams later
|
|
message MapParams {
|
|
repeated common.KeyValuePair params = 1;
|
|
}
|
|
|
|
message MapParamsV2 {
|
|
map<string, string> params = 1;
|
|
}
|
|
|
|
message Binary {
|
|
string key = 1;
|
|
bytes value = 2;
|
|
}
|
|
|
|
message BinarySet {
|
|
repeated Binary datas = 1;
|
|
}
|
|
|
|
message FieldInsertFiles {
|
|
repeated string file_paths = 1;
|
|
}
|
|
|
|
// segment insert files include all field insert files
|
|
message SegmentInsertFiles {
|
|
repeated FieldInsertFiles field_insert_files = 1;
|
|
}
|
|
|
|
// Synchronously modify StorageConfig in index_coord.proto file
|
|
message StorageConfig {
|
|
string address = 1;
|
|
string access_keyID = 2;
|
|
string secret_access_key = 3;
|
|
bool useSSL = 4;
|
|
string bucket_name = 5;
|
|
string root_path = 6;
|
|
bool useIAM = 7;
|
|
string IAMEndpoint = 8;
|
|
string storage_type = 9;
|
|
bool use_virtual_host = 10;
|
|
string region = 11;
|
|
string cloud_provider = 12;
|
|
int64 request_timeout_ms = 13;
|
|
string sslCACert = 14;
|
|
string GcpCredentialJSON = 15;
|
|
uint32 max_connections = 16;
|
|
}
|
|
|
|
// Synchronously modify OptionalFieldInfo in index_coord.proto file
|
|
message OptionalFieldInfo {
|
|
int64 fieldID = 1;
|
|
string field_name = 2;
|
|
int32 field_type = 3;
|
|
repeated string data_paths = 4;
|
|
int32 element_type = 5;
|
|
}
|
|
|
|
message BuildIndexInfo {
|
|
string clusterID = 1;
|
|
int64 buildID = 2;
|
|
int64 collectionID = 3;
|
|
int64 partitionID = 4;
|
|
int64 segmentID = 5;
|
|
int64 index_version = 6;
|
|
int32 current_index_version = 7;
|
|
int64 num_rows = 8;
|
|
int64 dim = 9;
|
|
string index_file_prefix = 10;
|
|
repeated string insert_files = 11;
|
|
// repeated int64 data_ids = 12;
|
|
schema.FieldSchema field_schema = 12;
|
|
StorageConfig storage_config = 13;
|
|
repeated common.KeyValuePair index_params = 14;
|
|
repeated common.KeyValuePair type_params = 15;
|
|
string store_path = 16;
|
|
int64 store_version = 17;
|
|
string index_store_path = 18;
|
|
repeated OptionalFieldInfo opt_fields = 19;
|
|
bool partition_key_isolation = 20;
|
|
int32 current_scalar_index_version = 21;
|
|
int64 json_key_stats_tantivy_memory = 22;
|
|
int64 lack_binlog_rows = 23;
|
|
int64 storage_version = 24;
|
|
SegmentInsertFiles segment_insert_files = 25;
|
|
StoragePluginContext storage_plugin_context = 26;
|
|
int64 json_stats_max_shredding_columns = 27;
|
|
double json_stats_shredding_ratio_threshold = 28;
|
|
int64 json_stats_write_batch_size = 29;
|
|
string manifest = 30;
|
|
}
|
|
|
|
message StoragePluginContext {
|
|
int64 encryption_zone_id = 1;
|
|
int64 collection_id = 2;
|
|
string encryption_key = 3;
|
|
}
|
|
|
|
message LoadTextIndexInfo {
|
|
int64 FieldID = 1;
|
|
int64 version = 2;
|
|
int64 buildID = 3;
|
|
repeated string files = 4;
|
|
schema.FieldSchema schema = 5;
|
|
int64 collectionID = 6;
|
|
int64 partitionID = 7;
|
|
common.LoadPriority load_priority = 8;
|
|
bool enable_mmap = 9;
|
|
int64 index_size = 10;
|
|
}
|
|
|
|
message LoadJsonKeyIndexInfo {
|
|
int64 FieldID = 1;
|
|
int64 version = 2;
|
|
int64 buildID = 3;
|
|
repeated string files = 4;
|
|
schema.FieldSchema schema = 5;
|
|
int64 collectionID = 6;
|
|
int64 partitionID = 7;
|
|
common.LoadPriority load_priority = 8;
|
|
bool enable_mmap = 9;
|
|
string mmap_dir_path = 10;
|
|
int64 stats_size = 11;
|
|
}
|