milvus/pkg/proto/index_cgo_msg.proto
aoiasd ee216877bb
enhance: support compaction with file resource in ref mode (#46399)
Add support for DataNode compaction using file resources in ref mode.
SortCompation and StatsJobs will build text indexes, which may use file
resources.
relate: https://github.com/milvus-io/milvus/issues/43687

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
- Core invariant: file resources (analyzer binaries/metadata) are only
fetched, downloaded and used when the node is configured in Ref mode
(fileresource.IsRefMode via CommonCfg.QNFileResourceMode /
DNFileResourceMode); Sync now carries a version and managers track
per-resource versions/resource IDs so newer resource sets win and older
entries are pruned (RefManager/SynchManager resource maps).
- Logic removed / simplified: component-specific FileResourceMode flags
and an indirection through a long-lived BinlogIO wrapper were
consolidated — file-resource mode moved to CommonCfg, Sync/Download APIs
became version- and context-aware, and compaction/index tasks accept a
ChunkManager directly (binlog IO wrapper creation inlined). This
eliminates duplicated config checks and wrapper indirection while
preserving the same chunk/IO semantics.
- Why no data loss or behavior regression: all file-resource code paths
are gated by the configured mode (default remains "sync"); when not in
ref-mode or when no resources exist, compaction and stats flows follow
existing code paths unchanged. Versioned Sync + resourceID maps ensure
newly synced sets replace older ones and RefManager prunes stale files;
GetFileResources returns an error if requested IDs are missing (prevents
silent use of wrong resources). Analyzer naming/parameter changes add
analyzer_extra_info but default-callers pass "" so existing analyzers
and index contents remain unchanged.
- New capability: DataNode compaction and StatsJobs can now build text
indexes using external file resources in Ref mode — DataCoord exposes
GetFileResources and populates CompactionPlan.file_resources;
SortCompaction/StatsTask download resources via fileresource.Manager,
produce an analyzer_extra_info JSON (storage + resource->id map) via
analyzer.BuildExtraResourceInfo, and propagate analyzer_extra_info into
BuildIndexInfo so the tantivy bindings can load custom analyzers during
text index creation.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: aoiasd <zhicheng.yue@zilliz.com>
2026-01-06 16:31:31 +08:00

140 lines
3.4 KiB
Protocol Buffer

syntax = "proto3";
package milvus.proto.indexcgo;
option go_package="github.com/milvus-io/milvus/pkg/v2/proto/indexcgopb";
import "common.proto";
import "schema.proto";
message TypeParams {
repeated common.KeyValuePair params = 1;
}
message IndexParams {
repeated common.KeyValuePair params = 1;
}
// TypeParams & IndexParams will be replaced by MapParams later
message MapParams {
repeated common.KeyValuePair params = 1;
}
message MapParamsV2 {
map<string, string> params = 1;
}
message Binary {
string key = 1;
bytes value = 2;
}
message BinarySet {
repeated Binary datas = 1;
}
message FieldInsertFiles {
repeated string file_paths = 1;
}
// segment insert files include all field insert files
message SegmentInsertFiles {
repeated FieldInsertFiles field_insert_files = 1;
}
// Synchronously modify StorageConfig in index_coord.proto file
message StorageConfig {
string address = 1;
string access_keyID = 2;
string secret_access_key = 3;
bool useSSL = 4;
string bucket_name = 5;
string root_path = 6;
bool useIAM = 7;
string IAMEndpoint = 8;
string storage_type = 9;
bool use_virtual_host = 10;
string region = 11;
string cloud_provider = 12;
int64 request_timeout_ms = 13;
string sslCACert = 14;
string GcpCredentialJSON = 15;
uint32 max_connections = 16;
}
// Synchronously modify OptionalFieldInfo in index_coord.proto file
message OptionalFieldInfo {
int64 fieldID = 1;
string field_name = 2;
int32 field_type = 3;
repeated string data_paths = 4;
int32 element_type = 5;
}
message BuildIndexInfo {
string clusterID = 1;
int64 buildID = 2;
int64 collectionID = 3;
int64 partitionID = 4;
int64 segmentID = 5;
int64 index_version = 6;
int32 current_index_version = 7;
int64 num_rows = 8;
int64 dim = 9;
string index_file_prefix = 10;
repeated string insert_files = 11;
// repeated int64 data_ids = 12;
schema.FieldSchema field_schema = 12;
StorageConfig storage_config = 13;
repeated common.KeyValuePair index_params = 14;
repeated common.KeyValuePair type_params = 15;
string store_path = 16;
int64 store_version = 17;
string index_store_path = 18;
repeated OptionalFieldInfo opt_fields = 19;
bool partition_key_isolation = 20;
int32 current_scalar_index_version = 21;
int64 json_key_stats_tantivy_memory = 22;
int64 lack_binlog_rows = 23;
int64 storage_version = 24;
SegmentInsertFiles segment_insert_files = 25;
StoragePluginContext storage_plugin_context = 26;
int64 json_stats_max_shredding_columns = 27;
double json_stats_shredding_ratio_threshold = 28;
int64 json_stats_write_batch_size = 29;
string manifest = 30;
string analyzer_extra_info = 31;
}
message StoragePluginContext {
int64 encryption_zone_id = 1;
int64 collection_id = 2;
string encryption_key = 3;
}
message LoadTextIndexInfo {
int64 FieldID = 1;
int64 version = 2;
int64 buildID = 3;
repeated string files = 4;
schema.FieldSchema schema = 5;
int64 collectionID = 6;
int64 partitionID = 7;
common.LoadPriority load_priority = 8;
bool enable_mmap = 9;
int64 index_size = 10;
}
message LoadJsonKeyIndexInfo {
int64 FieldID = 1;
int64 version = 2;
int64 buildID = 3;
repeated string files = 4;
schema.FieldSchema schema = 5;
int64 collectionID = 6;
int64 partitionID = 7;
common.LoadPriority load_priority = 8;
bool enable_mmap = 9;
string mmap_dir_path = 10;
int64 stats_size = 11;
}