mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
Migrate the ability to upload and download binlog to cpp (#22984)
Signed-off-by: xige-16 <xi.ge@zilliz.com>
This commit is contained in:
parent
6a1eff3487
commit
04082b3de2
@ -19,27 +19,27 @@
|
||||
|
||||
namespace milvus {
|
||||
|
||||
int64_t index_file_slice_size = DEFAULT_INDEX_FILE_SLICE_SIZE;
|
||||
int64_t thread_core_coefficient = DEFAULT_THREAD_CORE_COEFFICIENT;
|
||||
int cpu_num = DEFAULT_CPU_NUM;
|
||||
int64_t FILE_SLICE_SIZE = DEFAULT_INDEX_FILE_SLICE_SIZE;
|
||||
int64_t THREAD_CORE_COEFFICIENT = DEFAULT_THREAD_CORE_COEFFICIENT;
|
||||
int CPU_NUM = DEFAULT_CPU_NUM;
|
||||
|
||||
void
|
||||
SetIndexSliceSize(const int64_t size) {
|
||||
index_file_slice_size = size;
|
||||
LOG_SEGCORE_DEBUG_ << "set config index slice size: "
|
||||
<< index_file_slice_size;
|
||||
FILE_SLICE_SIZE = size << 20;
|
||||
LOG_SEGCORE_DEBUG_ << "set config index slice size (byte): "
|
||||
<< FILE_SLICE_SIZE;
|
||||
}
|
||||
|
||||
void
|
||||
SetThreadCoreCoefficient(const int64_t coefficient) {
|
||||
thread_core_coefficient = coefficient;
|
||||
THREAD_CORE_COEFFICIENT = coefficient;
|
||||
LOG_SEGCORE_DEBUG_ << "set thread pool core coefficient: "
|
||||
<< thread_core_coefficient;
|
||||
<< THREAD_CORE_COEFFICIENT;
|
||||
}
|
||||
|
||||
void
|
||||
SetCpuNum(const int num) {
|
||||
cpu_num = num;
|
||||
CPU_NUM = num;
|
||||
}
|
||||
|
||||
} // namespace milvus
|
||||
|
||||
@ -21,9 +21,9 @@
|
||||
|
||||
namespace milvus {
|
||||
|
||||
extern int64_t index_file_slice_size;
|
||||
extern int64_t thread_core_coefficient;
|
||||
extern int cpu_num;
|
||||
extern int64_t FILE_SLICE_SIZE;
|
||||
extern int64_t THREAD_CORE_COEFFICIENT;
|
||||
extern int CPU_NUM;
|
||||
|
||||
void
|
||||
SetIndexSliceSize(const int64_t size);
|
||||
|
||||
@ -39,10 +39,10 @@ const char INDEX_BUILD_ID_KEY[] = "indexBuildID";
|
||||
const char INDEX_ROOT_PATH[] = "index_files";
|
||||
const char RAWDATA_ROOT_PATH[] = "raw_datas";
|
||||
|
||||
const int64_t DEFAULT_DISK_INDEX_MAX_MEMORY_LIMIT = 67108864; // bytes
|
||||
const int64_t DEFAULT_FIELD_MAX_MEMORY_LIMIT = 67108864; // bytes
|
||||
const int64_t DEFAULT_THREAD_CORE_COEFFICIENT = 50;
|
||||
|
||||
const int64_t DEFAULT_INDEX_FILE_SLICE_SIZE = 4; // megabytes
|
||||
const int64_t DEFAULT_INDEX_FILE_SLICE_SIZE = 4194304; // bytes
|
||||
|
||||
const int DEFAULT_CPU_NUM = 1;
|
||||
|
||||
|
||||
@ -18,19 +18,24 @@
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Types.h"
|
||||
#include "common/CDataType.h"
|
||||
|
||||
// NOTE: field_id can be system field
|
||||
// NOTE: Refer to common/SystemProperty.cpp for details
|
||||
// TODO: use arrow to pass field data instead of proto
|
||||
struct LoadFieldDataInfo {
|
||||
struct FieldBinlogInfo {
|
||||
int64_t field_id;
|
||||
// const void* blob = nullptr;
|
||||
const milvus::DataArray* field_data;
|
||||
int64_t row_count{-1};
|
||||
const char* mmap_dir_path{nullptr};
|
||||
int64_t row_count = -1;
|
||||
std::vector<std::string> insert_files;
|
||||
};
|
||||
|
||||
struct LoadFieldDataInfo {
|
||||
std::map<int64_t, FieldBinlogInfo> field_infos;
|
||||
// Set null to disable mmap,
|
||||
// mmap file path will be {mmap_dir_path}/{segment_id}/{field_id}
|
||||
std::string mmap_dir_path = "";
|
||||
};
|
||||
|
||||
struct LoadDeletedRecordInfo {
|
||||
|
||||
@ -20,11 +20,10 @@
|
||||
|
||||
namespace milvus {
|
||||
|
||||
static const char* INDEX_FILE_SLICE_META = "SLICE_META";
|
||||
static const char* META = "meta";
|
||||
static const char* NAME = "name";
|
||||
static const char* SLICE_NUM = "slice_num";
|
||||
static const char* TOTAL_LEN = "total_len";
|
||||
std::string
|
||||
GenSlicedFileName(const std::string& prefix, size_t slice_num) {
|
||||
return prefix + "_" + std::to_string(slice_num);
|
||||
}
|
||||
|
||||
void
|
||||
Slice(const std::string& prefix,
|
||||
@ -42,8 +41,7 @@ Slice(const std::string& prefix,
|
||||
auto size = static_cast<size_t>(ri - i);
|
||||
auto slice_i = std::shared_ptr<uint8_t[]>(new uint8_t[size]);
|
||||
memcpy(slice_i.get(), data_src->data.get() + i, size);
|
||||
binarySet.Append(
|
||||
prefix + "_" + std::to_string(slice_num), slice_i, ri - i);
|
||||
binarySet.Append(GenSlicedFileName(prefix, slice_num), slice_i, ri - i);
|
||||
i = ri;
|
||||
}
|
||||
ret[NAME] = prefix;
|
||||
@ -68,7 +66,7 @@ Assemble(BinarySet& binarySet) {
|
||||
auto p_data = std::shared_ptr<uint8_t[]>(new uint8_t[total_len]);
|
||||
int64_t pos = 0;
|
||||
for (auto i = 0; i < slice_num; ++i) {
|
||||
auto slice_i_sp = binarySet.Erase(prefix + "_" + std::to_string(i));
|
||||
auto slice_i_sp = binarySet.Erase(GenSlicedFileName(prefix, i));
|
||||
memcpy(p_data.get() + pos,
|
||||
slice_i_sp->data.get(),
|
||||
static_cast<size_t>(slice_i_sp->size));
|
||||
@ -90,17 +88,15 @@ Disassemble(BinarySet& binarySet) {
|
||||
}
|
||||
}
|
||||
|
||||
const int64_t slice_size_in_byte = index_file_slice_size << 20;
|
||||
std::vector<std::string> slice_key_list;
|
||||
for (auto& kv : binarySet.binary_map_) {
|
||||
if (kv.second->size > slice_size_in_byte) {
|
||||
if (kv.second->size > FILE_SLICE_SIZE) {
|
||||
slice_key_list.push_back(kv.first);
|
||||
}
|
||||
}
|
||||
for (auto& key : slice_key_list) {
|
||||
Config slice_i;
|
||||
Slice(
|
||||
key, binarySet.Erase(key), slice_size_in_byte, binarySet, slice_i);
|
||||
Slice(key, binarySet.Erase(key), FILE_SLICE_SIZE, binarySet, slice_i);
|
||||
meta_info[META].emplace_back(slice_i);
|
||||
}
|
||||
if (!slice_key_list.empty()) {
|
||||
|
||||
@ -20,6 +20,16 @@
|
||||
|
||||
namespace milvus {
|
||||
|
||||
// used for disassemble and assemble index data
|
||||
const char INDEX_FILE_SLICE_META[] = "SLICE_META";
|
||||
const char META[] = "meta";
|
||||
const char NAME[] = "name";
|
||||
const char SLICE_NUM[] = "slice_num";
|
||||
const char TOTAL_LEN[] = "total_len";
|
||||
|
||||
std::string
|
||||
GenSlicedFileName(const std::string& prefix, size_t slice_num);
|
||||
|
||||
void
|
||||
Assemble(BinarySet& binarySet);
|
||||
|
||||
|
||||
@ -28,7 +28,6 @@
|
||||
#include "common/FieldMeta.h"
|
||||
#include "common/LoadInfo.h"
|
||||
#include "common/Types.h"
|
||||
#include "config/ConfigChunkManager.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "knowhere/dataset.h"
|
||||
#include "knowhere/expected.h"
|
||||
@ -209,263 +208,24 @@ MatchKnowhereError(knowhere::Status status) {
|
||||
}
|
||||
}
|
||||
|
||||
inline size_t
|
||||
GetDataSize(const FieldMeta& field, size_t row_count, const DataArray* data) {
|
||||
auto data_type = field.get_data_type();
|
||||
if (datatype_is_variable(data_type)) {
|
||||
switch (data_type) {
|
||||
case DataType::VARCHAR:
|
||||
case DataType::STRING: {
|
||||
ssize_t size{};
|
||||
for (auto& data : FIELD_DATA(data, string)) {
|
||||
size += data.size();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
case DataType::JSON: {
|
||||
ssize_t size{};
|
||||
for (auto& data : FIELD_DATA(data, json)) {
|
||||
size += data.size();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
default:
|
||||
PanicInfo(fmt::format("not supported data type {}",
|
||||
datatype_name(data_type)));
|
||||
}
|
||||
}
|
||||
|
||||
return field.get_sizeof() * row_count;
|
||||
inline std::vector<IndexType>
|
||||
DISK_INDEX_LIST() {
|
||||
static std::vector<IndexType> ret{
|
||||
knowhere::IndexEnum::INDEX_DISKANN,
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline void*
|
||||
FillField(DataType data_type,
|
||||
size_t size,
|
||||
const LoadFieldDataInfo& info,
|
||||
void* dst) {
|
||||
auto data = info.field_data;
|
||||
switch (data_type) {
|
||||
case DataType::BOOL: {
|
||||
return memcpy(dst, FIELD_DATA(data, bool).data(), size);
|
||||
}
|
||||
case DataType::INT8: {
|
||||
auto src_data = FIELD_DATA(data, int);
|
||||
std::vector<int8_t> data_raw(src_data.size());
|
||||
std::copy_n(src_data.data(), src_data.size(), data_raw.data());
|
||||
return memcpy(dst, data_raw.data(), size);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
auto src_data = FIELD_DATA(data, int);
|
||||
std::vector<int16_t> data_raw(src_data.size());
|
||||
std::copy_n(src_data.data(), src_data.size(), data_raw.data());
|
||||
return memcpy(dst, data_raw.data(), size);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
return memcpy(dst, FIELD_DATA(data, int).data(), size);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
return memcpy(dst, FIELD_DATA(data, long).data(), size);
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
return memcpy(dst, FIELD_DATA(data, float).data(), size);
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
return memcpy(dst, FIELD_DATA(data, double).data(), size);
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
char* dest = reinterpret_cast<char*>(dst);
|
||||
for (auto& data : FIELD_DATA(data, string)) {
|
||||
memcpy(dest, data.data(), data.size());
|
||||
dest += data.size();
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
case DataType::JSON: {
|
||||
char* dest = reinterpret_cast<char*>(dst);
|
||||
for (auto& data : FIELD_DATA(data, json)) {
|
||||
memcpy(dest, data.data(), data.size());
|
||||
dest += data.size();
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
case DataType::VECTOR_FLOAT:
|
||||
return memcpy(dst, VEC_FIELD_DATA(data, float).data(), size);
|
||||
|
||||
case DataType::VECTOR_BINARY:
|
||||
return memcpy(dst, VEC_FIELD_DATA(data, binary), size);
|
||||
|
||||
default: {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
template <typename T>
|
||||
inline bool
|
||||
is_in_list(const T& t, std::function<std::vector<T>()> list_func) {
|
||||
auto l = list_func();
|
||||
return std::find(l.begin(), l.end(), t) != l.end();
|
||||
}
|
||||
|
||||
inline ssize_t
|
||||
WriteFieldData(int fd, DataType data_type, const DataArray* data, size_t size) {
|
||||
switch (data_type) {
|
||||
case DataType::BOOL: {
|
||||
return write(fd, FIELD_DATA(data, bool).data(), size);
|
||||
}
|
||||
case DataType::INT8: {
|
||||
auto src_data = FIELD_DATA(data, int);
|
||||
std::vector<int8_t> data_raw(src_data.size());
|
||||
std::copy_n(src_data.data(), src_data.size(), data_raw.data());
|
||||
return write(fd, data_raw.data(), size);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
auto src_data = FIELD_DATA(data, int);
|
||||
std::vector<int16_t> data_raw(src_data.size());
|
||||
std::copy_n(src_data.data(), src_data.size(), data_raw.data());
|
||||
return write(fd, data_raw.data(), size);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
return write(fd, FIELD_DATA(data, int).data(), size);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
return write(fd, FIELD_DATA(data, long).data(), size);
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
return write(fd, FIELD_DATA(data, float).data(), size);
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
return write(fd, FIELD_DATA(data, double).data(), size);
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
ssize_t total_written{0};
|
||||
for (auto& str : FIELD_DATA(data, string)) {
|
||||
ssize_t written = write(fd, str.data(), str.size());
|
||||
if (written < str.size()) {
|
||||
break;
|
||||
}
|
||||
total_written += written;
|
||||
}
|
||||
return total_written;
|
||||
}
|
||||
case DataType::JSON: {
|
||||
ssize_t total_written{0};
|
||||
for (auto& json : FIELD_DATA(data, json)) {
|
||||
ssize_t written = write(fd, json.data(), json.size());
|
||||
if (written < json.size()) {
|
||||
break;
|
||||
}
|
||||
total_written += written;
|
||||
}
|
||||
return total_written;
|
||||
}
|
||||
case DataType::VECTOR_FLOAT:
|
||||
return write(fd, VEC_FIELD_DATA(data, float).data(), size);
|
||||
|
||||
case DataType::VECTOR_BINARY:
|
||||
return write(fd, VEC_FIELD_DATA(data, binary), size);
|
||||
|
||||
default: {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CreateMap creates a memory mapping,
|
||||
// if mmap enabled, this writes field data to disk and create a map to the file,
|
||||
// otherwise this just alloc memory
|
||||
inline void*
|
||||
CreateMap(int64_t segment_id,
|
||||
const FieldMeta& field_meta,
|
||||
const LoadFieldDataInfo& info) {
|
||||
static int mmap_flags = MAP_PRIVATE;
|
||||
#ifdef MAP_POPULATE
|
||||
// macOS doesn't support MAP_POPULATE
|
||||
mmap_flags |= MAP_POPULATE;
|
||||
#endif
|
||||
|
||||
// simdjson requires a padding following the json data
|
||||
size_t padding = field_meta.get_data_type() == DataType::JSON
|
||||
? simdjson::SIMDJSON_PADDING
|
||||
: 0;
|
||||
// Allocate memory
|
||||
if (info.mmap_dir_path == nullptr) {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
auto data_size =
|
||||
GetDataSize(field_meta, info.row_count, info.field_data);
|
||||
if (data_size == 0)
|
||||
return nullptr;
|
||||
|
||||
// Use anon mapping so we are able to free these memory with munmap only
|
||||
void* map = mmap(nullptr,
|
||||
data_size + padding,
|
||||
PROT_READ | PROT_WRITE,
|
||||
mmap_flags | MAP_ANON,
|
||||
-1,
|
||||
0);
|
||||
AssertInfo(
|
||||
map != MAP_FAILED,
|
||||
fmt::format("failed to create anon map, err: {}", strerror(errno)));
|
||||
FillField(data_type, data_size, info, map);
|
||||
return map;
|
||||
}
|
||||
|
||||
auto filepath = std::filesystem::path(info.mmap_dir_path) /
|
||||
std::to_string(segment_id) / std::to_string(info.field_id);
|
||||
auto dir = filepath.parent_path();
|
||||
std::filesystem::create_directories(dir);
|
||||
|
||||
int fd =
|
||||
open(filepath.c_str(), O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR);
|
||||
AssertInfo(fd != -1,
|
||||
fmt::format("failed to create mmap file {}", filepath.c_str()));
|
||||
|
||||
auto data_type = field_meta.get_data_type();
|
||||
size_t size = field_meta.get_sizeof() * info.row_count;
|
||||
auto written = WriteFieldData(fd, data_type, info.field_data, size);
|
||||
AssertInfo(
|
||||
written == size ||
|
||||
written != -1 && datatype_is_variable(field_meta.get_data_type()),
|
||||
fmt::format(
|
||||
"failed to write data file {}, written {} but total {}, err: {}",
|
||||
filepath.c_str(),
|
||||
written,
|
||||
size,
|
||||
strerror(errno)));
|
||||
int ok = fsync(fd);
|
||||
AssertInfo(ok == 0,
|
||||
fmt::format("failed to fsync mmap data file {}, err: {}",
|
||||
filepath.c_str(),
|
||||
strerror(errno)));
|
||||
|
||||
// Empty field
|
||||
if (written == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto map = mmap(nullptr, written + padding, PROT_READ, mmap_flags, fd, 0);
|
||||
AssertInfo(map != MAP_FAILED,
|
||||
fmt::format("failed to create map for data file {}, err: {}",
|
||||
filepath.c_str(),
|
||||
strerror(errno)));
|
||||
|
||||
#ifndef MAP_POPULATE
|
||||
// Manually access the mapping to populate it
|
||||
const size_t page_size = getpagesize();
|
||||
char* begin = (char*)map;
|
||||
char* end = begin + written;
|
||||
for (char* page = begin; page < end; page += page_size) {
|
||||
char value = page[0];
|
||||
}
|
||||
#endif
|
||||
// unlink this data file so
|
||||
// then it will be auto removed after we don't need it again
|
||||
ok = unlink(filepath.c_str());
|
||||
AssertInfo(ok == 0,
|
||||
fmt::format("failed to unlink mmap data file {}, err: {}",
|
||||
filepath.c_str(),
|
||||
strerror(errno)));
|
||||
ok = close(fd);
|
||||
AssertInfo(ok == 0,
|
||||
fmt::format("failed to close data file {}, err: {}",
|
||||
filepath.c_str(),
|
||||
strerror(errno)));
|
||||
return map;
|
||||
inline bool
|
||||
is_in_disk_list(const IndexType& index_type) {
|
||||
return is_in_list<IndexType>(index_type, DISK_INDEX_LIST);
|
||||
}
|
||||
|
||||
} // namespace milvus
|
||||
|
||||
@ -20,36 +20,24 @@
|
||||
#include "common/init_c.h"
|
||||
|
||||
#include <string>
|
||||
#include "config/ConfigChunkManager.h"
|
||||
#include "common/Slice.h"
|
||||
#include "common/Common.h"
|
||||
#include "common/Tracer.h"
|
||||
#include "log/Log.h"
|
||||
|
||||
std::once_flag flag1, flag2, flag3, flag4;
|
||||
std::once_flag flag1, flag2, flag3;
|
||||
std::once_flag traceFlag;
|
||||
|
||||
void
|
||||
InitLocalRootPath(const char* root_path) {
|
||||
std::string local_path_root(root_path);
|
||||
std::call_once(
|
||||
flag1,
|
||||
[](std::string path) {
|
||||
milvus::ChunkMangerConfig::SetLocalRootPath(path);
|
||||
},
|
||||
local_path_root);
|
||||
}
|
||||
|
||||
void
|
||||
InitIndexSliceSize(const int64_t size) {
|
||||
std::call_once(
|
||||
flag2, [](int64_t size) { milvus::SetIndexSliceSize(size); }, size);
|
||||
flag1, [](int64_t size) { milvus::SetIndexSliceSize(size); }, size);
|
||||
}
|
||||
|
||||
void
|
||||
InitThreadCoreCoefficient(const int64_t value) {
|
||||
std::call_once(
|
||||
flag3,
|
||||
flag2,
|
||||
[](int64_t value) { milvus::SetThreadCoreCoefficient(value); },
|
||||
value);
|
||||
}
|
||||
@ -57,7 +45,7 @@ InitThreadCoreCoefficient(const int64_t value) {
|
||||
void
|
||||
InitCpuNum(const int value) {
|
||||
std::call_once(
|
||||
flag4, [](int value) { milvus::SetCpuNum(value); }, value);
|
||||
flag3, [](int value) { milvus::SetCpuNum(value); }, value);
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@ -33,9 +33,6 @@ InitThreadCoreCoefficient(const int64_t);
|
||||
void
|
||||
InitCpuNum(const int);
|
||||
|
||||
void
|
||||
InitLocalRootPath(const char*);
|
||||
|
||||
void
|
||||
InitTrace(CTraceConfig* config);
|
||||
|
||||
|
||||
@ -69,16 +69,6 @@ typedef struct CProto {
|
||||
int64_t proto_size;
|
||||
} CProto;
|
||||
|
||||
typedef struct CLoadFieldDataInfo {
|
||||
int64_t field_id;
|
||||
const uint8_t* blob;
|
||||
uint64_t blob_size;
|
||||
int64_t row_count;
|
||||
// Set null to disable mmap,
|
||||
// mmap file path will be {mmap_dir_path}/{segment_id}/{field_id}
|
||||
const char* mmap_dir_path;
|
||||
} CLoadFieldDataInfo;
|
||||
|
||||
typedef struct CLoadDeletedRecordInfo {
|
||||
void* timestamps;
|
||||
const uint8_t* primary_keys;
|
||||
@ -91,7 +81,7 @@ typedef struct CStorageConfig {
|
||||
const char* bucket_name;
|
||||
const char* access_key_id;
|
||||
const char* access_key_value;
|
||||
const char* remote_root_path;
|
||||
const char* root_path;
|
||||
const char* storage_type;
|
||||
const char* iam_endpoint;
|
||||
bool useSSL;
|
||||
|
||||
@ -22,7 +22,6 @@ endif()
|
||||
|
||||
set(CONFIG_SRC
|
||||
ConfigKnowhere.cpp
|
||||
ConfigChunkManager.cpp
|
||||
)
|
||||
|
||||
add_library(milvus_config STATIC ${CONFIG_SRC})
|
||||
|
||||
@ -22,12 +22,10 @@
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
//// TODO: optimize here.
|
||||
class BoolIndex : public ScalarIndexSort<bool> {};
|
||||
using BoolIndexPtr = std::shared_ptr<BoolIndex>;
|
||||
using BoolIndexPtr = std::shared_ptr<ScalarIndexSort<bool>>;
|
||||
|
||||
inline BoolIndexPtr
|
||||
CreateBoolIndex() {
|
||||
return std::make_unique<BoolIndex>();
|
||||
CreateBoolIndex(storage::FileManagerImplPtr file_manager = nullptr) {
|
||||
return std::make_unique<ScalarIndexSort<bool>>(file_manager);
|
||||
}
|
||||
} // namespace milvus::index
|
||||
|
||||
@ -33,6 +33,9 @@ class IndexBase {
|
||||
virtual void
|
||||
Load(const BinarySet& binary_set, const Config& config = {}) = 0;
|
||||
|
||||
virtual void
|
||||
Load(const Config& config = {}) = 0;
|
||||
|
||||
virtual void
|
||||
BuildWithRawData(size_t n,
|
||||
const void* values,
|
||||
@ -41,9 +44,15 @@ class IndexBase {
|
||||
virtual void
|
||||
BuildWithDataset(const DatasetPtr& dataset, const Config& config = {}) = 0;
|
||||
|
||||
virtual void
|
||||
Build(const Config& config = {}) = 0;
|
||||
|
||||
virtual int64_t
|
||||
Count() = 0;
|
||||
|
||||
virtual BinarySet
|
||||
Upload(const Config& config = {}) = 0;
|
||||
|
||||
protected:
|
||||
IndexType index_type_ = "";
|
||||
};
|
||||
|
||||
@ -23,8 +23,9 @@ namespace milvus::index {
|
||||
|
||||
template <typename T>
|
||||
inline ScalarIndexPtr<T>
|
||||
IndexFactory::CreateScalarIndex(const IndexType& index_type) {
|
||||
return CreateScalarIndexSort<T>();
|
||||
IndexFactory::CreateScalarIndex(const IndexType& index_type,
|
||||
storage::FileManagerImplPtr file_manager) {
|
||||
return CreateScalarIndexSort<T>(file_manager);
|
||||
}
|
||||
|
||||
// template <>
|
||||
@ -35,9 +36,10 @@ IndexFactory::CreateScalarIndex(const IndexType& index_type) {
|
||||
|
||||
template <>
|
||||
inline ScalarIndexPtr<std::string>
|
||||
IndexFactory::CreateScalarIndex(const IndexType& index_type) {
|
||||
IndexFactory::CreateScalarIndex(const IndexType& index_type,
|
||||
storage::FileManagerImplPtr file_manager) {
|
||||
#if defined(__linux__) || defined(__APPLE__)
|
||||
return CreateStringIndexMarisa();
|
||||
return CreateStringIndexMarisa(file_manager);
|
||||
#else
|
||||
throw std::runtime_error("unsupported platform");
|
||||
#endif
|
||||
|
||||
@ -33,35 +33,36 @@ IndexFactory::CreateIndex(const CreateIndexInfo& create_index_info,
|
||||
return CreateVectorIndex(create_index_info, file_manager);
|
||||
}
|
||||
|
||||
return CreateScalarIndex(create_index_info);
|
||||
return CreateScalarIndex(create_index_info, file_manager);
|
||||
}
|
||||
|
||||
IndexBasePtr
|
||||
IndexFactory::CreateScalarIndex(const CreateIndexInfo& create_index_info) {
|
||||
IndexFactory::CreateScalarIndex(const CreateIndexInfo& create_index_info,
|
||||
storage::FileManagerImplPtr file_manager) {
|
||||
auto data_type = create_index_info.field_type;
|
||||
auto index_type = create_index_info.index_type;
|
||||
|
||||
switch (data_type) {
|
||||
// create scalar index
|
||||
case DataType::BOOL:
|
||||
return CreateScalarIndex<bool>(index_type);
|
||||
return CreateScalarIndex<bool>(index_type, file_manager);
|
||||
case DataType::INT8:
|
||||
return CreateScalarIndex<int8_t>(index_type);
|
||||
return CreateScalarIndex<int8_t>(index_type, file_manager);
|
||||
case DataType::INT16:
|
||||
return CreateScalarIndex<int16_t>(index_type);
|
||||
return CreateScalarIndex<int16_t>(index_type, file_manager);
|
||||
case DataType::INT32:
|
||||
return CreateScalarIndex<int32_t>(index_type);
|
||||
return CreateScalarIndex<int32_t>(index_type, file_manager);
|
||||
case DataType::INT64:
|
||||
return CreateScalarIndex<int64_t>(index_type);
|
||||
return CreateScalarIndex<int64_t>(index_type, file_manager);
|
||||
case DataType::FLOAT:
|
||||
return CreateScalarIndex<float>(index_type);
|
||||
return CreateScalarIndex<float>(index_type, file_manager);
|
||||
case DataType::DOUBLE:
|
||||
return CreateScalarIndex<double>(index_type);
|
||||
return CreateScalarIndex<double>(index_type, file_manager);
|
||||
|
||||
// create string index
|
||||
case DataType::STRING:
|
||||
case DataType::VARCHAR:
|
||||
return CreateScalarIndex<std::string>(index_type);
|
||||
return CreateScalarIndex<std::string>(index_type, file_manager);
|
||||
default:
|
||||
throw std::invalid_argument(
|
||||
std::string("invalid data type to build index: ") +
|
||||
@ -93,10 +94,12 @@ IndexFactory::CreateVectorIndex(const CreateIndexInfo& create_index_info,
|
||||
#endif
|
||||
|
||||
if (is_in_nm_list(index_type)) {
|
||||
return std::make_unique<VectorMemNMIndex>(index_type, metric_type);
|
||||
return std::make_unique<VectorMemNMIndex>(
|
||||
index_type, metric_type, file_manager);
|
||||
}
|
||||
// create mem index
|
||||
return std::make_unique<VectorMemIndex>(index_type, metric_type);
|
||||
return std::make_unique<VectorMemIndex>(
|
||||
index_type, metric_type, file_manager);
|
||||
}
|
||||
|
||||
} // namespace milvus::index
|
||||
|
||||
@ -21,7 +21,6 @@
|
||||
#include <shared_mutex>
|
||||
|
||||
#include "common/type_c.h"
|
||||
#include "config/ConfigChunkManager.h"
|
||||
#include "index/Index.h"
|
||||
#include "index/ScalarIndex.h"
|
||||
#include "index/VectorIndex.h"
|
||||
@ -29,11 +28,6 @@
|
||||
#include "storage/Types.h"
|
||||
#include "storage/FileManager.h"
|
||||
|
||||
#ifdef BUILD_DISK_ANN
|
||||
#include "storage/LocalChunkManager.h"
|
||||
#include "storage/MinioChunkManager.h"
|
||||
#endif
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
class IndexFactory {
|
||||
@ -61,14 +55,16 @@ class IndexFactory {
|
||||
storage::FileManagerImplPtr file_manager);
|
||||
|
||||
IndexBasePtr
|
||||
CreateScalarIndex(const CreateIndexInfo& create_index_info);
|
||||
CreateScalarIndex(const CreateIndexInfo& create_index_info,
|
||||
storage::FileManagerImplPtr file_manager = nullptr);
|
||||
|
||||
// IndexBasePtr
|
||||
// CreateIndex(DataType dtype, const IndexType& index_type);
|
||||
private:
|
||||
template <typename T>
|
||||
ScalarIndexPtr<T>
|
||||
CreateScalarIndex(const IndexType& index_type);
|
||||
CreateScalarIndex(const IndexType& index_type,
|
||||
storage::FileManagerImplPtr file_manager = nullptr);
|
||||
};
|
||||
|
||||
} // namespace milvus::index
|
||||
|
||||
@ -24,22 +24,64 @@
|
||||
#include "Meta.h"
|
||||
#include "common/Utils.h"
|
||||
#include "common/Slice.h"
|
||||
#include "index/Utils.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
template <typename T>
|
||||
inline ScalarIndexSort<T>::ScalarIndexSort() : is_built_(false), data_() {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline ScalarIndexSort<T>::ScalarIndexSort(const size_t n, const T* values)
|
||||
: is_built_(false) {
|
||||
ScalarIndexSort<T>::BuildWithDataset(n, values);
|
||||
inline ScalarIndexSort<T>::ScalarIndexSort(
|
||||
storage::FileManagerImplPtr file_manager)
|
||||
: is_built_(false), data_() {
|
||||
if (file_manager != nullptr) {
|
||||
file_manager_ = std::dynamic_pointer_cast<storage::MemFileManagerImpl>(
|
||||
file_manager);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void
|
||||
ScalarIndexSort<T>::Build(const size_t n, const T* values) {
|
||||
ScalarIndexSort<T>::Build(const Config& config) {
|
||||
if (is_built_)
|
||||
return;
|
||||
auto insert_files =
|
||||
GetValueFromConfig<std::vector<std::string>>(config, "insert_files");
|
||||
AssertInfo(insert_files.has_value(),
|
||||
"insert file paths is empty when build index");
|
||||
auto field_datas =
|
||||
file_manager_->CacheRawDataToMemory(insert_files.value());
|
||||
|
||||
int64_t total_num_rows = 0;
|
||||
for (auto data : field_datas) {
|
||||
total_num_rows += data->get_num_rows();
|
||||
}
|
||||
if (total_num_rows == 0) {
|
||||
// todo: throw an exception
|
||||
throw std::invalid_argument(
|
||||
"ScalarIndexSort cannot build null values!");
|
||||
}
|
||||
|
||||
data_.reserve(total_num_rows);
|
||||
int64_t offset = 0;
|
||||
for (auto data : field_datas) {
|
||||
auto slice_num = data->get_num_rows();
|
||||
for (size_t i = 0; i < slice_num; ++i) {
|
||||
auto value = reinterpret_cast<const T*>(data->RawValue(i));
|
||||
data_.emplace_back(IndexStructure(*value, offset));
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(data_.begin(), data_.end());
|
||||
idx_to_offsets_.resize(total_num_rows);
|
||||
for (size_t i = 0; i < total_num_rows; ++i) {
|
||||
idx_to_offsets_[data_[i].idx_] = i;
|
||||
}
|
||||
is_built_ = true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void
|
||||
ScalarIndexSort<T>::Build(size_t n, const T* values) {
|
||||
if (is_built_)
|
||||
return;
|
||||
if (n == 0) {
|
||||
@ -82,11 +124,26 @@ ScalarIndexSort<T>::Serialize(const Config& config) {
|
||||
return res_set;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline BinarySet
|
||||
ScalarIndexSort<T>::Upload(const Config& config) {
|
||||
auto binary_set = Serialize(config);
|
||||
file_manager_->AddFile(binary_set);
|
||||
|
||||
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
|
||||
BinarySet ret;
|
||||
for (auto& file : remote_paths_to_size) {
|
||||
ret.Append(file.first, nullptr, file.second);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void
|
||||
ScalarIndexSort<T>::Load(const BinarySet& index_binary, const Config& config) {
|
||||
ScalarIndexSort<T>::LoadWithoutAssemble(const BinarySet& index_binary,
|
||||
const Config& config) {
|
||||
size_t index_size;
|
||||
milvus::Assemble(const_cast<BinarySet&>(index_binary));
|
||||
auto index_length = index_binary.GetByName("index_length");
|
||||
memcpy(&index_size, index_length->data.get(), (size_t)index_length->size);
|
||||
|
||||
@ -100,6 +157,34 @@ ScalarIndexSort<T>::Load(const BinarySet& index_binary, const Config& config) {
|
||||
is_built_ = true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void
|
||||
ScalarIndexSort<T>::Load(const BinarySet& index_binary, const Config& config) {
|
||||
milvus::Assemble(const_cast<BinarySet&>(index_binary));
|
||||
LoadWithoutAssemble(index_binary, config);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void
|
||||
ScalarIndexSort<T>::Load(const Config& config) {
|
||||
auto index_files =
|
||||
GetValueFromConfig<std::vector<std::string>>(config, "index_files");
|
||||
AssertInfo(index_files.has_value(),
|
||||
"index file paths is empty when load disk ann index");
|
||||
auto index_datas = file_manager_->LoadIndexToMemory(index_files.value());
|
||||
AssembleIndexDatas(index_datas);
|
||||
BinarySet binary_set;
|
||||
for (auto& [key, data] : index_datas) {
|
||||
auto size = data->Size();
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
auto buf = std::shared_ptr<uint8_t[]>(
|
||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
||||
binary_set.Append(key, buf, size);
|
||||
}
|
||||
|
||||
LoadWithoutAssemble(binary_set, config);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline const TargetBitmap
|
||||
ScalarIndexSort<T>::In(const size_t n, const T* values) {
|
||||
|
||||
@ -21,16 +21,19 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
#include "index/IndexStructure.h"
|
||||
#include "index/ScalarIndex.h"
|
||||
#include "storage/MemFileManagerImpl.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
template <typename T>
|
||||
class ScalarIndexSort : public ScalarIndex<T> {
|
||||
public:
|
||||
ScalarIndexSort();
|
||||
ScalarIndexSort(size_t n, const T* values);
|
||||
explicit ScalarIndexSort(
|
||||
storage::FileManagerImplPtr file_manager = nullptr);
|
||||
|
||||
BinarySet
|
||||
Serialize(const Config& config) override;
|
||||
@ -38,6 +41,9 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
||||
void
|
||||
Load(const BinarySet& index_binary, const Config& config = {}) override;
|
||||
|
||||
void
|
||||
Load(const Config& config = {}) override;
|
||||
|
||||
int64_t
|
||||
Count() override {
|
||||
return data_.size();
|
||||
@ -46,6 +52,9 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
||||
void
|
||||
Build(size_t n, const T* values) override;
|
||||
|
||||
void
|
||||
Build(const Config& config = {}) override;
|
||||
|
||||
const TargetBitmap
|
||||
In(size_t n, const T* values) override;
|
||||
|
||||
@ -69,6 +78,9 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
||||
return (int64_t)data_.size();
|
||||
}
|
||||
|
||||
BinarySet
|
||||
Upload(const Config& config = {}) override;
|
||||
|
||||
public:
|
||||
const std::vector<IndexStructure<T>>&
|
||||
GetData() {
|
||||
@ -80,11 +92,15 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
||||
return is_built_;
|
||||
}
|
||||
|
||||
void
|
||||
LoadWithoutAssemble(const BinarySet& binary_set, const Config& config);
|
||||
|
||||
private:
|
||||
bool is_built_;
|
||||
Config config_;
|
||||
std::vector<int32_t> idx_to_offsets_; // used to retrieve.
|
||||
std::vector<IndexStructure<T>> data_;
|
||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
@ -97,7 +113,7 @@ using ScalarIndexSortPtr = std::unique_ptr<ScalarIndexSort<T>>;
|
||||
namespace milvus::index {
|
||||
template <typename T>
|
||||
inline ScalarIndexSortPtr<T>
|
||||
CreateScalarIndexSort() {
|
||||
return std::make_unique<ScalarIndexSort<T>>();
|
||||
CreateScalarIndexSort(storage::FileManagerImplPtr file_manager = nullptr) {
|
||||
return std::make_unique<ScalarIndexSort<T>>(file_manager);
|
||||
}
|
||||
} // namespace milvus::index
|
||||
|
||||
@ -31,11 +31,77 @@ namespace milvus::index {
|
||||
|
||||
#if defined(__linux__) || defined(__APPLE__)
|
||||
|
||||
class UnistdException : public std::runtime_error {
|
||||
public:
|
||||
explicit UnistdException(const std::string& msg) : std::runtime_error(msg) {
|
||||
}
|
||||
|
||||
virtual ~UnistdException() {
|
||||
}
|
||||
};
|
||||
|
||||
StringIndexMarisa::StringIndexMarisa(storage::FileManagerImplPtr file_manager) {
|
||||
if (file_manager != nullptr) {
|
||||
file_manager_ = std::dynamic_pointer_cast<storage::MemFileManagerImpl>(
|
||||
file_manager);
|
||||
}
|
||||
}
|
||||
|
||||
int64_t
|
||||
StringIndexMarisa::Size() {
|
||||
return trie_.size();
|
||||
}
|
||||
|
||||
bool
|
||||
valid_str_id(size_t str_id) {
|
||||
return str_id >= 0 && str_id != MARISA_INVALID_KEY_ID;
|
||||
}
|
||||
|
||||
void
|
||||
StringIndexMarisa::Build(const Config& config) {
|
||||
if (built_) {
|
||||
throw std::runtime_error("index has been built");
|
||||
}
|
||||
|
||||
auto insert_files =
|
||||
GetValueFromConfig<std::vector<std::string>>(config, "insert_files");
|
||||
AssertInfo(insert_files.has_value(),
|
||||
"insert file paths is empty when build index");
|
||||
auto field_datas =
|
||||
file_manager_->CacheRawDataToMemory(insert_files.value());
|
||||
int64_t total_num_rows = 0;
|
||||
|
||||
// fill key set.
|
||||
marisa::Keyset keyset;
|
||||
for (auto data : field_datas) {
|
||||
auto slice_num = data->get_num_rows();
|
||||
for (size_t i = 0; i < slice_num; ++i) {
|
||||
keyset.push_back(
|
||||
(*static_cast<const std::string*>(data->RawValue(i))).c_str());
|
||||
}
|
||||
total_num_rows += slice_num;
|
||||
}
|
||||
trie_.build(keyset);
|
||||
|
||||
// fill str_ids_
|
||||
str_ids_.resize(total_num_rows);
|
||||
int64_t offset = 0;
|
||||
for (auto data : field_datas) {
|
||||
auto slice_num = data->get_num_rows();
|
||||
for (size_t i = 0; i < slice_num; ++i) {
|
||||
auto str_id =
|
||||
lookup(*static_cast<const std::string*>(data->RawValue(i)));
|
||||
AssertInfo(valid_str_id(str_id), "invalid marisa key");
|
||||
str_ids_[offset++] = str_id;
|
||||
}
|
||||
}
|
||||
|
||||
// fill str_ids_to_offsets_
|
||||
fill_offsets();
|
||||
|
||||
built_ = true;
|
||||
}
|
||||
|
||||
void
|
||||
StringIndexMarisa::Build(size_t n, const std::string* values) {
|
||||
if (built_) {
|
||||
@ -68,15 +134,17 @@ StringIndexMarisa::Serialize(const Config& config) {
|
||||
trie_.write(fd);
|
||||
|
||||
auto size = get_file_size(fd);
|
||||
auto buf = new uint8_t[size];
|
||||
auto index_data = std::shared_ptr<uint8_t[]>(new uint8_t[size]);
|
||||
|
||||
while (read(fd, buf, size) != size) {
|
||||
lseek(fd, 0, SEEK_SET);
|
||||
}
|
||||
std::shared_ptr<uint8_t[]> index_data(buf);
|
||||
lseek(fd, 0, SEEK_SET);
|
||||
auto status = read(fd, index_data.get(), size);
|
||||
|
||||
close(fd);
|
||||
remove(file.c_str());
|
||||
if (status != size) {
|
||||
throw UnistdException("read index from fd error, errorCode is " +
|
||||
std::to_string(status));
|
||||
}
|
||||
|
||||
auto str_ids_len = str_ids_.size() * sizeof(size_t);
|
||||
std::shared_ptr<uint8_t[]> str_ids(new uint8_t[str_ids_len]);
|
||||
@ -86,15 +154,28 @@ StringIndexMarisa::Serialize(const Config& config) {
|
||||
res_set.Append(MARISA_TRIE_INDEX, index_data, size);
|
||||
res_set.Append(MARISA_STR_IDS, str_ids, str_ids_len);
|
||||
|
||||
milvus::Disassemble(res_set);
|
||||
Disassemble(res_set);
|
||||
|
||||
return res_set;
|
||||
}
|
||||
|
||||
void
|
||||
StringIndexMarisa::Load(const BinarySet& set, const Config& config) {
|
||||
milvus::Assemble(const_cast<BinarySet&>(set));
|
||||
BinarySet
|
||||
StringIndexMarisa::Upload(const Config& config) {
|
||||
auto binary_set = Serialize(config);
|
||||
file_manager_->AddFile(binary_set);
|
||||
|
||||
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
|
||||
BinarySet ret;
|
||||
for (auto& file : remote_paths_to_size) {
|
||||
ret.Append(file.first, nullptr, file.second);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
StringIndexMarisa::LoadWithoutAssemble(const BinarySet& set,
|
||||
const Config& config) {
|
||||
auto uuid = boost::uuids::random_generator()();
|
||||
auto uuid_string = boost::uuids::to_string(uuid);
|
||||
auto file = std::string("/tmp/") + uuid_string;
|
||||
@ -105,8 +186,13 @@ StringIndexMarisa::Load(const BinarySet& set, const Config& config) {
|
||||
auto fd = open(
|
||||
file.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IXUSR);
|
||||
lseek(fd, 0, SEEK_SET);
|
||||
while (write(fd, index->data.get(), len) != len) {
|
||||
lseek(fd, 0, SEEK_SET);
|
||||
|
||||
auto status = write(fd, index->data.get(), len);
|
||||
if (status != len) {
|
||||
close(fd);
|
||||
remove(file.c_str());
|
||||
throw UnistdException("write index to fd error, errorCode is " +
|
||||
std::to_string(status));
|
||||
}
|
||||
|
||||
lseek(fd, 0, SEEK_SET);
|
||||
@ -122,9 +208,30 @@ StringIndexMarisa::Load(const BinarySet& set, const Config& config) {
|
||||
fill_offsets();
|
||||
}
|
||||
|
||||
bool
|
||||
valid_str_id(size_t str_id) {
|
||||
return str_id >= 0 && str_id != MARISA_INVALID_KEY_ID;
|
||||
void
|
||||
StringIndexMarisa::Load(const BinarySet& set, const Config& config) {
|
||||
milvus::Assemble(const_cast<BinarySet&>(set));
|
||||
LoadWithoutAssemble(set, config);
|
||||
}
|
||||
|
||||
void
|
||||
StringIndexMarisa::Load(const Config& config) {
|
||||
auto index_files =
|
||||
GetValueFromConfig<std::vector<std::string>>(config, "index_files");
|
||||
AssertInfo(index_files.has_value(),
|
||||
"index file paths is empty when load index");
|
||||
auto index_datas = file_manager_->LoadIndexToMemory(index_files.value());
|
||||
AssembleIndexDatas(index_datas);
|
||||
BinarySet binary_set;
|
||||
for (auto& [key, data] : index_datas) {
|
||||
auto size = data->Size();
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
auto buf = std::shared_ptr<uint8_t[]>(
|
||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
||||
binary_set.Append(key, buf, size);
|
||||
}
|
||||
|
||||
LoadWithoutAssemble(binary_set, config);
|
||||
}
|
||||
|
||||
const TargetBitmap
|
||||
@ -248,7 +355,7 @@ StringIndexMarisa::fill_str_ids(size_t n, const std::string* values) {
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
auto str = values[i];
|
||||
auto str_id = lookup(str);
|
||||
assert(valid_str_id(str_id));
|
||||
AssertInfo(valid_str_id(str_id), "invalid marisa key");
|
||||
str_ids_[i] = str_id;
|
||||
}
|
||||
}
|
||||
|
||||
@ -24,12 +24,14 @@
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include "storage/MemFileManagerImpl.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
class StringIndexMarisa : public StringIndex {
|
||||
public:
|
||||
StringIndexMarisa() = default;
|
||||
explicit StringIndexMarisa(
|
||||
storage::FileManagerImplPtr file_manager = nullptr);
|
||||
|
||||
int64_t
|
||||
Size() override;
|
||||
@ -40,6 +42,9 @@ class StringIndexMarisa : public StringIndex {
|
||||
void
|
||||
Load(const BinarySet& set, const Config& config = {}) override;
|
||||
|
||||
void
|
||||
Load(const Config& config = {}) override;
|
||||
|
||||
int64_t
|
||||
Count() override {
|
||||
return str_ids_.size();
|
||||
@ -48,6 +53,9 @@ class StringIndexMarisa : public StringIndex {
|
||||
void
|
||||
Build(size_t n, const std::string* values) override;
|
||||
|
||||
void
|
||||
Build(const Config& config = {}) override;
|
||||
|
||||
const TargetBitmap
|
||||
In(size_t n, const std::string* values) override;
|
||||
|
||||
@ -69,6 +77,9 @@ class StringIndexMarisa : public StringIndex {
|
||||
std::string
|
||||
Reverse_Lookup(size_t offset) const override;
|
||||
|
||||
BinarySet
|
||||
Upload(const Config& config = {}) override;
|
||||
|
||||
private:
|
||||
void
|
||||
fill_str_ids(size_t n, const std::string* values);
|
||||
@ -83,19 +94,23 @@ class StringIndexMarisa : public StringIndex {
|
||||
std::vector<size_t>
|
||||
prefix_match(const std::string_view prefix);
|
||||
|
||||
void
|
||||
LoadWithoutAssemble(const BinarySet& binary_set, const Config& config);
|
||||
|
||||
private:
|
||||
Config config_;
|
||||
marisa::Trie trie_;
|
||||
std::vector<size_t> str_ids_; // used to retrieve.
|
||||
std::map<size_t, std::vector<size_t>> str_ids_to_offsets_;
|
||||
bool built_ = false;
|
||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||
};
|
||||
|
||||
using StringIndexMarisaPtr = std::unique_ptr<StringIndexMarisa>;
|
||||
|
||||
inline StringIndexPtr
|
||||
CreateStringIndexMarisa() {
|
||||
return std::make_unique<StringIndexMarisa>();
|
||||
CreateStringIndexMarisa(storage::FileManagerImplPtr file_manager = nullptr) {
|
||||
return std::make_unique<StringIndexMarisa>(file_manager);
|
||||
}
|
||||
|
||||
} // namespace milvus::index
|
||||
|
||||
@ -25,6 +25,9 @@
|
||||
#include <google/protobuf/text_format.h>
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "knowhere/comp/index_param.h"
|
||||
#include "common/Slice.h"
|
||||
#include "storage/Util.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
size_t
|
||||
@ -51,14 +54,6 @@ BIN_List() {
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<IndexType>
|
||||
DISK_LIST() {
|
||||
static std::vector<IndexType> ret{
|
||||
knowhere::IndexEnum::INDEX_DISKANN,
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<std::tuple<IndexType, MetricType>>
|
||||
unsupported_index_combinations() {
|
||||
static std::vector<std::tuple<IndexType, MetricType>> ret{
|
||||
@ -78,11 +73,6 @@ is_in_nm_list(const IndexType& index_type) {
|
||||
return is_in_list<IndexType>(index_type, NM_List);
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_disk_list(const IndexType& index_type) {
|
||||
return is_in_list<IndexType>(index_type, DISK_LIST);
|
||||
}
|
||||
|
||||
bool
|
||||
is_unsupported(const IndexType& index_type, const MetricType& metric_type) {
|
||||
return is_in_list<std::tuple<IndexType, MetricType>>(
|
||||
@ -197,4 +187,36 @@ ParseConfigFromIndexParams(
|
||||
return config;
|
||||
}
|
||||
|
||||
void
|
||||
AssembleIndexDatas(std::map<std::string, storage::FieldDataPtr>& index_datas) {
|
||||
if (index_datas.find(INDEX_FILE_SLICE_META) != index_datas.end()) {
|
||||
auto slice_meta = index_datas.at(INDEX_FILE_SLICE_META);
|
||||
Config meta_data = Config::parse(std::string(
|
||||
static_cast<const char*>(slice_meta->Data()), slice_meta->Size()));
|
||||
|
||||
for (auto& item : meta_data[META]) {
|
||||
std::string prefix = item[NAME];
|
||||
int slice_num = item[SLICE_NUM];
|
||||
auto total_len = static_cast<size_t>(item[TOTAL_LEN]);
|
||||
|
||||
auto new_field_data =
|
||||
storage::CreateFieldData(DataType::INT8, 1, total_len);
|
||||
|
||||
for (auto i = 0; i < slice_num; ++i) {
|
||||
std::string file_name = GenSlicedFileName(prefix, i);
|
||||
AssertInfo(index_datas.find(file_name) != index_datas.end(),
|
||||
"lost index slice data");
|
||||
auto data = index_datas.at(file_name);
|
||||
auto len = data->Size();
|
||||
new_field_data->FillFieldData(data->Data(), len);
|
||||
index_datas.erase(file_name);
|
||||
}
|
||||
AssertInfo(
|
||||
new_field_data->IsFull(),
|
||||
"index len is inconsistent after disassemble and assemble");
|
||||
index_datas[prefix] = new_field_data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace milvus::index
|
||||
|
||||
@ -29,6 +29,7 @@
|
||||
#include "common/Types.h"
|
||||
#include "index/IndexInfo.h"
|
||||
#include "storage/Types.h"
|
||||
#include "storage/FieldData.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
@ -44,22 +45,12 @@ BIN_List();
|
||||
std::vector<std::tuple<IndexType, MetricType>>
|
||||
unsupported_index_combinations();
|
||||
|
||||
template <typename T>
|
||||
inline bool
|
||||
is_in_list(const T& t, std::function<std::vector<T>()> list_func) {
|
||||
auto l = list_func();
|
||||
return std::find(l.begin(), l.end(), t) != l.end();
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_bin_list(const IndexType& index_type);
|
||||
|
||||
bool
|
||||
is_in_nm_list(const IndexType& index_type);
|
||||
|
||||
bool
|
||||
is_in_disk_list(const IndexType& index_type);
|
||||
|
||||
bool
|
||||
is_unsupported(const IndexType& index_type, const MetricType& metric_type);
|
||||
|
||||
@ -118,4 +109,7 @@ Config
|
||||
ParseConfigFromIndexParams(
|
||||
const std::map<std::string, std::string>& index_params);
|
||||
|
||||
void
|
||||
AssembleIndexDatas(std::map<std::string, storage::FieldDataPtr>& index_datas);
|
||||
|
||||
} // namespace milvus::index
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
#include "config/ConfigKnowhere.h"
|
||||
#include "index/Meta.h"
|
||||
#include "index/Utils.h"
|
||||
#include "storage/LocalChunkManager.h"
|
||||
#include "storage/LocalChunkManagerSingleton.h"
|
||||
#include "storage/Util.h"
|
||||
#include "common/Consts.h"
|
||||
#include "common/RangeSearchHelper.h"
|
||||
@ -42,17 +42,18 @@ VectorDiskAnnIndex<T>::VectorDiskAnnIndex(
|
||||
: VectorIndex(index_type, metric_type) {
|
||||
file_manager_ =
|
||||
std::dynamic_pointer_cast<storage::DiskFileManagerImpl>(file_manager);
|
||||
auto& local_chunk_manager = storage::LocalChunkManager::GetInstance();
|
||||
auto local_chunk_manager =
|
||||
storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
|
||||
|
||||
// As we have guarded dup-load in QueryNode,
|
||||
// this assertion failed only if the Milvus rebooted in the same pod,
|
||||
// need to remove these files then re-load the segment
|
||||
if (local_chunk_manager.Exist(local_index_path_prefix)) {
|
||||
local_chunk_manager.RemoveDir(local_index_path_prefix);
|
||||
if (local_chunk_manager->Exist(local_index_path_prefix)) {
|
||||
local_chunk_manager->RemoveDir(local_index_path_prefix);
|
||||
}
|
||||
|
||||
local_chunk_manager.CreateDir(local_index_path_prefix);
|
||||
local_chunk_manager->CreateDir(local_index_path_prefix);
|
||||
auto diskann_index_pack =
|
||||
knowhere::Pack(std::shared_ptr<knowhere::FileManager>(file_manager));
|
||||
index_ = knowhere::IndexFactory::Instance().Create(GetIndexType(),
|
||||
@ -63,6 +64,12 @@ template <typename T>
|
||||
void
|
||||
VectorDiskAnnIndex<T>::Load(const BinarySet& binary_set /* not used */,
|
||||
const Config& config) {
|
||||
Load(config);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorDiskAnnIndex<T>::Load(const Config& config) {
|
||||
knowhere::Json load_config = update_load_json(config);
|
||||
|
||||
auto index_files =
|
||||
@ -80,18 +87,65 @@ VectorDiskAnnIndex<T>::Load(const BinarySet& binary_set /* not used */,
|
||||
SetDim(index_.Dim());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
BinarySet
|
||||
VectorDiskAnnIndex<T>::Upload(const Config& config) {
|
||||
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
|
||||
BinarySet ret;
|
||||
for (auto& file : remote_paths_to_size) {
|
||||
ret.Append(file.first, nullptr, file.second);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorDiskAnnIndex<T>::Build(const Config& config) {
|
||||
auto local_chunk_manager =
|
||||
storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
knowhere::Json build_config;
|
||||
build_config.update(config);
|
||||
|
||||
auto segment_id = file_manager_->GetFieldDataMeta().segment_id;
|
||||
auto insert_files =
|
||||
GetValueFromConfig<std::vector<std::string>>(config, "insert_files");
|
||||
AssertInfo(insert_files.has_value(),
|
||||
"insert file paths is empty when build disk ann index");
|
||||
auto local_data_path =
|
||||
file_manager_->CacheRawDataToDisk(insert_files.value());
|
||||
build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path;
|
||||
|
||||
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
|
||||
build_config[DISK_ANN_PREFIX_PATH] = local_index_path_prefix;
|
||||
|
||||
auto num_threads = GetValueFromConfig<std::string>(
|
||||
build_config, DISK_ANN_BUILD_THREAD_NUM);
|
||||
AssertInfo(num_threads.has_value(),
|
||||
"param " + std::string(DISK_ANN_BUILD_THREAD_NUM) + "is empty");
|
||||
build_config[DISK_ANN_THREADS_NUM] = std::atoi(num_threads.value().c_str());
|
||||
knowhere::DataSet* ds_ptr = nullptr;
|
||||
build_config.erase("insert_files");
|
||||
index_.Build(*ds_ptr, build_config);
|
||||
|
||||
local_chunk_manager->RemoveDir(
|
||||
storage::GetSegmentRawDataPathPrefix(local_chunk_manager, segment_id));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
VectorDiskAnnIndex<T>::BuildWithDataset(const DatasetPtr& dataset,
|
||||
const Config& config) {
|
||||
auto& local_chunk_manager = storage::LocalChunkManager::GetInstance();
|
||||
auto local_chunk_manager =
|
||||
storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
knowhere::Json build_config;
|
||||
build_config.update(config);
|
||||
// set data path
|
||||
auto segment_id = file_manager_->GetFileDataMeta().segment_id;
|
||||
auto field_id = file_manager_->GetFileDataMeta().field_id;
|
||||
auto local_data_path =
|
||||
storage::GenFieldRawDataPathPrefix(segment_id, field_id) + "raw_data";
|
||||
auto segment_id = file_manager_->GetFieldDataMeta().segment_id;
|
||||
auto field_id = file_manager_->GetFieldDataMeta().field_id;
|
||||
auto local_data_path = storage::GenFieldRawDataPathPrefix(
|
||||
local_chunk_manager, segment_id, field_id) +
|
||||
"raw_data";
|
||||
build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path;
|
||||
|
||||
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
|
||||
@ -103,30 +157,31 @@ VectorDiskAnnIndex<T>::BuildWithDataset(const DatasetPtr& dataset,
|
||||
"param " + std::string(DISK_ANN_BUILD_THREAD_NUM) + "is empty");
|
||||
build_config[DISK_ANN_THREADS_NUM] = std::atoi(num_threads.value().c_str());
|
||||
|
||||
if (!local_chunk_manager.Exist(local_data_path)) {
|
||||
local_chunk_manager.CreateFile(local_data_path);
|
||||
if (!local_chunk_manager->Exist(local_data_path)) {
|
||||
local_chunk_manager->CreateFile(local_data_path);
|
||||
}
|
||||
|
||||
int64_t offset = 0;
|
||||
auto num = uint32_t(milvus::GetDatasetRows(dataset));
|
||||
local_chunk_manager.Write(local_data_path, offset, &num, sizeof(num));
|
||||
local_chunk_manager->Write(local_data_path, offset, &num, sizeof(num));
|
||||
offset += sizeof(num);
|
||||
|
||||
auto dim = uint32_t(milvus::GetDatasetDim(dataset));
|
||||
local_chunk_manager.Write(local_data_path, offset, &dim, sizeof(dim));
|
||||
local_chunk_manager->Write(local_data_path, offset, &dim, sizeof(dim));
|
||||
offset += sizeof(dim);
|
||||
|
||||
auto data_size = num * dim * sizeof(float);
|
||||
auto raw_data = const_cast<void*>(milvus::GetDatasetTensor(dataset));
|
||||
local_chunk_manager.Write(local_data_path, offset, raw_data, data_size);
|
||||
local_chunk_manager->Write(local_data_path, offset, raw_data, data_size);
|
||||
|
||||
knowhere::DataSet* ds_ptr = nullptr;
|
||||
auto stat = index_.Build(*ds_ptr, build_config);
|
||||
if (stat != knowhere::Status::success)
|
||||
PanicCodeInfo(ErrorCodeEnum::BuildIndexError,
|
||||
"failed to build index, " + MatchKnowhereError(stat));
|
||||
local_chunk_manager.RemoveDir(
|
||||
storage::GetSegmentRawDataPathPrefix(segment_id));
|
||||
local_chunk_manager->RemoveDir(
|
||||
storage::GetSegmentRawDataPathPrefix(local_chunk_manager, segment_id));
|
||||
|
||||
// TODO ::
|
||||
// SetDim(index_->Dim());
|
||||
}
|
||||
@ -263,9 +318,11 @@ VectorDiskAnnIndex<T>::GetVector(const DatasetPtr dataset) const {
|
||||
template <typename T>
|
||||
void
|
||||
VectorDiskAnnIndex<T>::CleanLocalData() {
|
||||
auto& local_chunk_manager = storage::LocalChunkManager::GetInstance();
|
||||
local_chunk_manager.RemoveDir(file_manager_->GetLocalIndexObjectPrefix());
|
||||
local_chunk_manager.RemoveDir(file_manager_->GetLocalRawDataObjectPrefix());
|
||||
auto local_chunk_manager =
|
||||
storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
local_chunk_manager->RemoveDir(file_manager_->GetLocalIndexObjectPrefix());
|
||||
local_chunk_manager->RemoveDir(
|
||||
file_manager_->GetLocalRawDataObjectPrefix());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
||||
@ -33,7 +33,7 @@ class VectorDiskAnnIndex : public VectorIndex {
|
||||
const MetricType& metric_type,
|
||||
storage::FileManagerImplPtr file_manager);
|
||||
BinarySet
|
||||
Serialize(const Config& config) override {
|
||||
Serialize(const Config& config) override { // deprecated
|
||||
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
|
||||
BinarySet binary_set;
|
||||
for (auto& file : remote_paths_to_size) {
|
||||
@ -43,6 +43,9 @@ class VectorDiskAnnIndex : public VectorIndex {
|
||||
return binary_set;
|
||||
}
|
||||
|
||||
BinarySet
|
||||
Upload(const Config& config = {}) override;
|
||||
|
||||
int64_t
|
||||
Count() override {
|
||||
return index_.Count();
|
||||
@ -52,10 +55,16 @@ class VectorDiskAnnIndex : public VectorIndex {
|
||||
Load(const BinarySet& binary_set /* not used */,
|
||||
const Config& config = {}) override;
|
||||
|
||||
void
|
||||
Load(const Config& config = {}) override;
|
||||
|
||||
void
|
||||
BuildWithDataset(const DatasetPtr& dataset,
|
||||
const Config& config = {}) override;
|
||||
|
||||
void
|
||||
Build(const Config& config = {}) override;
|
||||
|
||||
std::unique_ptr<SearchResult>
|
||||
Query(const DatasetPtr dataset,
|
||||
const SearchInfo& search_info,
|
||||
|
||||
@ -33,14 +33,32 @@
|
||||
namespace milvus::index {
|
||||
|
||||
VectorMemIndex::VectorMemIndex(const IndexType& index_type,
|
||||
const MetricType& metric_type)
|
||||
const MetricType& metric_type,
|
||||
storage::FileManagerImplPtr file_manager)
|
||||
: VectorIndex(index_type, metric_type) {
|
||||
AssertInfo(!is_unsupported(index_type, metric_type),
|
||||
index_type + " doesn't support metric: " + metric_type);
|
||||
|
||||
if (file_manager != nullptr) {
|
||||
file_manager_ = std::dynamic_pointer_cast<storage::MemFileManagerImpl>(
|
||||
file_manager);
|
||||
}
|
||||
index_ = knowhere::IndexFactory::Instance().Create(GetIndexType());
|
||||
}
|
||||
|
||||
BinarySet
|
||||
VectorMemIndex::Upload(const Config& config) {
|
||||
auto binary_set = Serialize(config);
|
||||
file_manager_->AddFile(binary_set);
|
||||
|
||||
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
|
||||
BinarySet ret;
|
||||
for (auto& file : remote_paths_to_size) {
|
||||
ret.Append(file.first, nullptr, file.second);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
BinarySet
|
||||
VectorMemIndex::Serialize(const Config& config) {
|
||||
knowhere::BinarySet ret;
|
||||
@ -48,14 +66,14 @@ VectorMemIndex::Serialize(const Config& config) {
|
||||
if (stat != knowhere::Status::success)
|
||||
PanicCodeInfo(ErrorCodeEnum::UnexpectedError,
|
||||
"failed to serialize index, " + MatchKnowhereError(stat));
|
||||
milvus::Disassemble(ret);
|
||||
Disassemble(ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
VectorMemIndex::Load(const BinarySet& binary_set, const Config& config) {
|
||||
milvus::Assemble(const_cast<BinarySet&>(binary_set));
|
||||
VectorMemIndex::LoadWithoutAssemble(const BinarySet& binary_set,
|
||||
const Config& config) {
|
||||
auto stat = index_.Deserialize(binary_set);
|
||||
if (stat != knowhere::Status::success)
|
||||
PanicCodeInfo(
|
||||
@ -64,6 +82,31 @@ VectorMemIndex::Load(const BinarySet& binary_set, const Config& config) {
|
||||
SetDim(index_.Dim());
|
||||
}
|
||||
|
||||
void
|
||||
VectorMemIndex::Load(const BinarySet& binary_set, const Config& config) {
|
||||
milvus::Assemble(const_cast<BinarySet&>(binary_set));
|
||||
LoadWithoutAssemble(binary_set, config);
|
||||
}
|
||||
|
||||
void
|
||||
VectorMemIndex::Load(const Config& config) {
|
||||
auto index_files =
|
||||
GetValueFromConfig<std::vector<std::string>>(config, "index_files");
|
||||
AssertInfo(index_files.has_value(),
|
||||
"index file paths is empty when load index");
|
||||
auto index_datas = file_manager_->LoadIndexToMemory(index_files.value());
|
||||
AssembleIndexDatas(index_datas);
|
||||
BinarySet binary_set;
|
||||
for (auto& [key, data] : index_datas) {
|
||||
auto size = data->Size();
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
auto buf = std::shared_ptr<uint8_t[]>(
|
||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
||||
binary_set.Append(key, buf, size);
|
||||
}
|
||||
LoadWithoutAssemble(binary_set, config);
|
||||
}
|
||||
|
||||
void
|
||||
VectorMemIndex::BuildWithDataset(const DatasetPtr& dataset,
|
||||
const Config& config) {
|
||||
@ -81,6 +124,43 @@ VectorMemIndex::BuildWithDataset(const DatasetPtr& dataset,
|
||||
SetDim(index_.Dim());
|
||||
}
|
||||
|
||||
void
|
||||
VectorMemIndex::Build(const Config& config) {
|
||||
auto insert_files =
|
||||
GetValueFromConfig<std::vector<std::string>>(config, "insert_files");
|
||||
AssertInfo(insert_files.has_value(),
|
||||
"insert file paths is empty when build disk ann index");
|
||||
auto field_datas =
|
||||
file_manager_->CacheRawDataToMemory(insert_files.value());
|
||||
|
||||
int64_t total_size = 0;
|
||||
int64_t total_num_rows = 0;
|
||||
int64_t dim = 0;
|
||||
for (auto data : field_datas) {
|
||||
total_size += data->Size();
|
||||
total_num_rows += data->get_num_rows();
|
||||
AssertInfo(dim == 0 || dim == data->get_dim(),
|
||||
"inconsistent dim value between field datas!");
|
||||
dim = data->get_dim();
|
||||
}
|
||||
|
||||
auto buf = std::shared_ptr<uint8_t[]>(new uint8_t[total_size]);
|
||||
int64_t offset = 0;
|
||||
for (auto data : field_datas) {
|
||||
std::memcpy(buf.get() + offset, data->Data(), data->Size());
|
||||
offset += data->Size();
|
||||
data.reset();
|
||||
}
|
||||
field_datas.clear();
|
||||
|
||||
Config build_config;
|
||||
build_config.update(config);
|
||||
build_config.erase("insert_files");
|
||||
|
||||
auto dataset = GenDataset(total_num_rows, dim, buf.get());
|
||||
BuildWithDataset(dataset, build_config);
|
||||
}
|
||||
|
||||
void
|
||||
VectorMemIndex::AddWithDataset(const DatasetPtr& dataset,
|
||||
const Config& config) {
|
||||
|
||||
@ -23,13 +23,15 @@
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#include "knowhere/factory.h"
|
||||
#include "index/VectorIndex.h"
|
||||
#include "storage/MemFileManagerImpl.h"
|
||||
|
||||
namespace milvus::index {
|
||||
|
||||
class VectorMemIndex : public VectorIndex {
|
||||
public:
|
||||
explicit VectorMemIndex(const IndexType& index_type,
|
||||
const MetricType& metric_type);
|
||||
const MetricType& metric_type,
|
||||
storage::FileManagerImplPtr file_manager = nullptr);
|
||||
|
||||
BinarySet
|
||||
Serialize(const Config& config) override;
|
||||
@ -37,10 +39,16 @@ class VectorMemIndex : public VectorIndex {
|
||||
void
|
||||
Load(const BinarySet& binary_set, const Config& config = {}) override;
|
||||
|
||||
void
|
||||
Load(const Config& config = {}) override;
|
||||
|
||||
void
|
||||
BuildWithDataset(const DatasetPtr& dataset,
|
||||
const Config& config = {}) override;
|
||||
|
||||
void
|
||||
Build(const Config& config = {}) override;
|
||||
|
||||
void
|
||||
AddWithDataset(const DatasetPtr& dataset, const Config& config) override;
|
||||
|
||||
@ -60,9 +68,17 @@ class VectorMemIndex : public VectorIndex {
|
||||
const std::vector<uint8_t>
|
||||
GetVector(const DatasetPtr dataset) const override;
|
||||
|
||||
BinarySet
|
||||
Upload(const Config& config = {}) override;
|
||||
|
||||
protected:
|
||||
virtual void
|
||||
LoadWithoutAssemble(const BinarySet& binary_set, const Config& config);
|
||||
|
||||
protected:
|
||||
Config config_;
|
||||
knowhere::Index<knowhere::IndexNode> index_;
|
||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||
};
|
||||
|
||||
using VectorMemIndexPtr = std::unique_ptr<VectorMemIndex>;
|
||||
|
||||
@ -38,7 +38,7 @@ VectorMemNMIndex::Serialize(const Config& config) {
|
||||
auto raw_data = std::shared_ptr<uint8_t[]>(
|
||||
static_cast<uint8_t*>(raw_data_.data()), deleter);
|
||||
ret.Append(RAW_DATA, raw_data, raw_data_.size());
|
||||
milvus::Disassemble(ret);
|
||||
Disassemble(ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -52,6 +52,17 @@ VectorMemNMIndex::BuildWithDataset(const DatasetPtr& dataset,
|
||||
rc.ElapseFromBegin("Done");
|
||||
}
|
||||
|
||||
void
|
||||
VectorMemNMIndex::LoadWithoutAssemble(const BinarySet& binary_set,
|
||||
const Config& config) {
|
||||
VectorMemIndex::LoadWithoutAssemble(binary_set, config);
|
||||
if (binary_set.Contains(RAW_DATA)) {
|
||||
std::call_once(raw_data_loaded_, [&]() {
|
||||
LOG_SEGCORE_INFO_ << "NM index load raw data done!";
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
VectorMemNMIndex::AddWithDataset(const DatasetPtr& /*dataset*/,
|
||||
const Config& /*config*/) {
|
||||
|
||||
@ -28,9 +28,11 @@ namespace milvus::index {
|
||||
|
||||
class VectorMemNMIndex : public VectorMemIndex {
|
||||
public:
|
||||
explicit VectorMemNMIndex(const IndexType& index_type,
|
||||
const MetricType& metric_type)
|
||||
: VectorMemIndex(index_type, metric_type) {
|
||||
explicit VectorMemNMIndex(
|
||||
const IndexType& index_type,
|
||||
const MetricType& metric_type,
|
||||
storage::FileManagerImplPtr file_manager = nullptr)
|
||||
: VectorMemIndex(index_type, metric_type, file_manager) {
|
||||
AssertInfo(is_in_nm_list(index_type), "not valid nm index type");
|
||||
}
|
||||
|
||||
@ -52,6 +54,10 @@ class VectorMemNMIndex : public VectorMemIndex {
|
||||
const SearchInfo& search_info,
|
||||
const BitsetView& bitset) override;
|
||||
|
||||
void
|
||||
LoadWithoutAssemble(const BinarySet& binary_set,
|
||||
const Config& config) override;
|
||||
|
||||
private:
|
||||
void
|
||||
store_raw_data(const DatasetPtr& dataset);
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
|
||||
#include <memory>
|
||||
#include "common/Types.h"
|
||||
#include "storage/FileManager.h"
|
||||
|
||||
namespace milvus::indexbuilder {
|
||||
class IndexCreatorBase {
|
||||
@ -22,12 +23,18 @@ class IndexCreatorBase {
|
||||
virtual void
|
||||
Build(const milvus::DatasetPtr& dataset) = 0;
|
||||
|
||||
virtual void
|
||||
Build() = 0;
|
||||
|
||||
virtual milvus::BinarySet
|
||||
Serialize() = 0;
|
||||
|
||||
// used for test.
|
||||
virtual void
|
||||
Load(const milvus::BinarySet&) = 0;
|
||||
|
||||
virtual BinarySet
|
||||
Upload() = 0;
|
||||
};
|
||||
|
||||
using IndexCreatorBasePtr = std::unique_ptr<IndexCreatorBase>;
|
||||
|
||||
@ -13,13 +13,15 @@
|
||||
|
||||
#include <pb/schema.pb.h>
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "indexbuilder/IndexCreatorBase.h"
|
||||
#include "indexbuilder/ScalarIndexCreator.h"
|
||||
#include "indexbuilder/VecIndexCreator.h"
|
||||
#include "indexbuilder/type_c.h"
|
||||
#include "storage/Types.h"
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "storage/FileManager.h"
|
||||
|
||||
namespace milvus::indexbuilder {
|
||||
|
||||
@ -40,15 +42,13 @@ class IndexFactory {
|
||||
}
|
||||
|
||||
IndexCreatorBasePtr
|
||||
CreateIndex(CDataType dtype,
|
||||
const char* type_params,
|
||||
const char* index_params,
|
||||
const storage::StorageConfig& storage_config) {
|
||||
auto real_dtype = DataType(dtype);
|
||||
auto invalid_dtype_msg = std::string("invalid data type: ") +
|
||||
std::to_string(int(real_dtype));
|
||||
CreateIndex(DataType type,
|
||||
Config& config,
|
||||
storage::FileManagerImplPtr file_manager) {
|
||||
auto invalid_dtype_msg =
|
||||
std::string("invalid data type: ") + std::to_string(int(type));
|
||||
|
||||
switch (real_dtype) {
|
||||
switch (type) {
|
||||
case DataType::BOOL:
|
||||
case DataType::INT8:
|
||||
case DataType::INT16:
|
||||
@ -58,12 +58,12 @@ class IndexFactory {
|
||||
case DataType::DOUBLE:
|
||||
case DataType::VARCHAR:
|
||||
case DataType::STRING:
|
||||
return CreateScalarIndex(real_dtype, type_params, index_params);
|
||||
return CreateScalarIndex(type, config, file_manager);
|
||||
|
||||
case DataType::VECTOR_FLOAT:
|
||||
case DataType::VECTOR_BINARY:
|
||||
return std::make_unique<VecIndexCreator>(
|
||||
real_dtype, type_params, index_params, storage_config);
|
||||
type, config, file_manager);
|
||||
default:
|
||||
throw std::invalid_argument(invalid_dtype_msg);
|
||||
}
|
||||
|
||||
@ -21,30 +21,14 @@
|
||||
namespace milvus::indexbuilder {
|
||||
|
||||
ScalarIndexCreator::ScalarIndexCreator(DataType dtype,
|
||||
const char* type_params,
|
||||
const char* index_params)
|
||||
: dtype_(dtype) {
|
||||
// TODO: move parse-related logic to a common interface.
|
||||
proto::indexcgo::TypeParams type_params_;
|
||||
proto::indexcgo::IndexParams index_params_;
|
||||
milvus::index::ParseFromString(type_params_, std::string(type_params));
|
||||
milvus::index::ParseFromString(index_params_, std::string(index_params));
|
||||
|
||||
for (auto i = 0; i < type_params_.params_size(); ++i) {
|
||||
const auto& param = type_params_.params(i);
|
||||
config_[param.key()] = param.value();
|
||||
}
|
||||
|
||||
for (auto i = 0; i < index_params_.params_size(); ++i) {
|
||||
const auto& param = index_params_.params(i);
|
||||
config_[param.key()] = param.value();
|
||||
}
|
||||
|
||||
Config& config,
|
||||
storage::FileManagerImplPtr file_manager)
|
||||
: dtype_(dtype), config_(config) {
|
||||
milvus::index::CreateIndexInfo index_info;
|
||||
index_info.field_type = dtype_;
|
||||
index_info.index_type = index_type();
|
||||
index_ =
|
||||
index::IndexFactory::GetInstance().CreateIndex(index_info, nullptr);
|
||||
index_ = index::IndexFactory::GetInstance().CreateIndex(index_info,
|
||||
file_manager);
|
||||
}
|
||||
|
||||
void
|
||||
@ -54,6 +38,11 @@ ScalarIndexCreator::Build(const milvus::DatasetPtr& dataset) {
|
||||
index_->BuildWithRawData(size, data);
|
||||
}
|
||||
|
||||
void
|
||||
ScalarIndexCreator::Build() {
|
||||
index_->Build(config_);
|
||||
}
|
||||
|
||||
milvus::BinarySet
|
||||
ScalarIndexCreator::Serialize() {
|
||||
return index_->Serialize(config_);
|
||||
@ -70,4 +59,9 @@ ScalarIndexCreator::index_type() {
|
||||
return "sort";
|
||||
}
|
||||
|
||||
BinarySet
|
||||
ScalarIndexCreator::Upload() {
|
||||
return index_->Upload();
|
||||
}
|
||||
|
||||
} // namespace milvus::indexbuilder
|
||||
|
||||
@ -23,18 +23,24 @@ namespace milvus::indexbuilder {
|
||||
class ScalarIndexCreator : public IndexCreatorBase {
|
||||
public:
|
||||
ScalarIndexCreator(DataType data_type,
|
||||
const char* type_params,
|
||||
const char* index_params);
|
||||
Config& config,
|
||||
storage::FileManagerImplPtr file_manager);
|
||||
|
||||
void
|
||||
Build(const milvus::DatasetPtr& dataset) override;
|
||||
|
||||
void
|
||||
Build() override;
|
||||
|
||||
milvus::BinarySet
|
||||
Serialize() override;
|
||||
|
||||
void
|
||||
Load(const milvus::BinarySet&) override;
|
||||
|
||||
BinarySet
|
||||
Upload() override;
|
||||
|
||||
private:
|
||||
std::string
|
||||
index_type();
|
||||
@ -49,10 +55,9 @@ using ScalarIndexCreatorPtr = std::unique_ptr<ScalarIndexCreator>;
|
||||
|
||||
inline ScalarIndexCreatorPtr
|
||||
CreateScalarIndex(DataType dtype,
|
||||
const char* type_params,
|
||||
const char* index_params) {
|
||||
return std::make_unique<ScalarIndexCreator>(
|
||||
dtype, type_params, index_params);
|
||||
Config& config,
|
||||
storage::FileManagerImplPtr file_manager) {
|
||||
return std::make_unique<ScalarIndexCreator>(dtype, config, file_manager);
|
||||
}
|
||||
|
||||
} // namespace milvus::indexbuilder
|
||||
|
||||
@ -17,50 +17,17 @@
|
||||
#include "index/IndexFactory.h"
|
||||
#include "pb/index_cgo_msg.pb.h"
|
||||
|
||||
#ifdef BUILD_DISK_ANN
|
||||
#include "storage/DiskFileManagerImpl.h"
|
||||
#endif
|
||||
|
||||
namespace milvus::indexbuilder {
|
||||
|
||||
VecIndexCreator::VecIndexCreator(DataType data_type,
|
||||
const char* serialized_type_params,
|
||||
const char* serialized_index_params,
|
||||
const storage::StorageConfig& storage_config)
|
||||
: data_type_(data_type) {
|
||||
proto::indexcgo::TypeParams type_params_;
|
||||
proto::indexcgo::IndexParams index_params_;
|
||||
milvus::index::ParseFromString(type_params_,
|
||||
std::string(serialized_type_params));
|
||||
milvus::index::ParseFromString(index_params_,
|
||||
std::string(serialized_index_params));
|
||||
|
||||
for (auto i = 0; i < type_params_.params_size(); ++i) {
|
||||
const auto& param = type_params_.params(i);
|
||||
config_[param.key()] = param.value();
|
||||
}
|
||||
|
||||
for (auto i = 0; i < index_params_.params_size(); ++i) {
|
||||
const auto& param = index_params_.params(i);
|
||||
config_[param.key()] = param.value();
|
||||
}
|
||||
|
||||
Config& config,
|
||||
storage::FileManagerImplPtr file_manager)
|
||||
: data_type_(data_type), config_(config) {
|
||||
index::CreateIndexInfo index_info;
|
||||
index_info.field_type = data_type_;
|
||||
index_info.index_type = index::GetIndexTypeFromConfig(config_);
|
||||
index_info.metric_type = index::GetMetricTypeFromConfig(config_);
|
||||
|
||||
std::shared_ptr<storage::FileManagerImpl> file_manager = nullptr;
|
||||
#ifdef BUILD_DISK_ANN
|
||||
if (index::is_in_disk_list(index_info.index_type)) {
|
||||
// For now, only support diskann index
|
||||
file_manager = std::make_shared<storage::DiskFileManagerImpl>(
|
||||
index::GetFieldDataMetaFromConfig(config_),
|
||||
index::GetIndexMetaFromConfig(config_),
|
||||
storage_config);
|
||||
}
|
||||
#endif
|
||||
|
||||
index_ = index::IndexFactory::GetInstance().CreateIndex(index_info,
|
||||
file_manager);
|
||||
AssertInfo(index_ != nullptr,
|
||||
@ -77,6 +44,11 @@ VecIndexCreator::Build(const milvus::DatasetPtr& dataset) {
|
||||
index_->BuildWithDataset(dataset, config_);
|
||||
}
|
||||
|
||||
void
|
||||
VecIndexCreator::Build() {
|
||||
index_->Build(config_);
|
||||
}
|
||||
|
||||
milvus::BinarySet
|
||||
VecIndexCreator::Serialize() {
|
||||
return index_->Serialize(config_);
|
||||
@ -95,6 +67,11 @@ VecIndexCreator::Query(const milvus::DatasetPtr& dataset,
|
||||
return vector_index->Query(dataset, search_info, bitset);
|
||||
}
|
||||
|
||||
BinarySet
|
||||
VecIndexCreator::Upload() {
|
||||
return index_->Upload();
|
||||
}
|
||||
|
||||
void
|
||||
VecIndexCreator::CleanLocalData() {
|
||||
auto vector_index = dynamic_cast<index::VectorIndex*>(index_.get());
|
||||
|
||||
@ -27,13 +27,15 @@ namespace milvus::indexbuilder {
|
||||
class VecIndexCreator : public IndexCreatorBase {
|
||||
public:
|
||||
explicit VecIndexCreator(DataType data_type,
|
||||
const char* serialized_type_params,
|
||||
const char* serialized_index_params,
|
||||
const storage::StorageConfig& storage_config);
|
||||
Config& config,
|
||||
storage::FileManagerImplPtr file_manager);
|
||||
|
||||
void
|
||||
Build(const milvus::DatasetPtr& dataset) override;
|
||||
|
||||
void
|
||||
Build() override;
|
||||
|
||||
milvus::BinarySet
|
||||
Serialize() override;
|
||||
|
||||
@ -48,6 +50,9 @@ class VecIndexCreator : public IndexCreatorBase {
|
||||
const SearchInfo& search_info,
|
||||
const BitsetView& bitset);
|
||||
|
||||
BinarySet
|
||||
Upload() override;
|
||||
|
||||
public:
|
||||
void
|
||||
CleanLocalData();
|
||||
|
||||
@ -21,41 +21,40 @@
|
||||
#include "indexbuilder/IndexFactory.h"
|
||||
#include "common/type_c.h"
|
||||
#include "storage/Types.h"
|
||||
#include "indexbuilder/types.h"
|
||||
#include "index/Utils.h"
|
||||
#include "pb/index_cgo_msg.pb.h"
|
||||
#include "storage/Util.h"
|
||||
|
||||
CStatus
|
||||
CreateIndex(enum CDataType dtype,
|
||||
const char* serialized_type_params,
|
||||
const char* serialized_index_params,
|
||||
CIndex* res_index,
|
||||
CStorageConfig c_storage_config) {
|
||||
CIndex* res_index) {
|
||||
auto status = CStatus();
|
||||
try {
|
||||
AssertInfo(res_index, "failed to create index, passed index was null");
|
||||
|
||||
std::string address(c_storage_config.address);
|
||||
std::string bucket_name(c_storage_config.bucket_name);
|
||||
std::string access_key(c_storage_config.access_key_id);
|
||||
std::string access_value(c_storage_config.access_key_value);
|
||||
std::string remote_root_path(c_storage_config.remote_root_path);
|
||||
std::string storage_type(c_storage_config.storage_type);
|
||||
std::string iam_endpoint(c_storage_config.iam_endpoint);
|
||||
auto storage_config =
|
||||
milvus::storage::StorageConfig{address,
|
||||
bucket_name,
|
||||
access_key,
|
||||
access_value,
|
||||
remote_root_path,
|
||||
storage_type,
|
||||
iam_endpoint,
|
||||
c_storage_config.useSSL,
|
||||
c_storage_config.useIAM};
|
||||
milvus::proto::indexcgo::TypeParams type_params;
|
||||
milvus::proto::indexcgo::IndexParams index_params;
|
||||
milvus::index::ParseFromString(type_params, serialized_type_params);
|
||||
milvus::index::ParseFromString(index_params, serialized_index_params);
|
||||
|
||||
milvus::Config config;
|
||||
for (auto i = 0; i < type_params.params_size(); ++i) {
|
||||
const auto& param = type_params.params(i);
|
||||
config[param.key()] = param.value();
|
||||
}
|
||||
|
||||
for (auto i = 0; i < index_params.params_size(); ++i) {
|
||||
const auto& param = index_params.params(i);
|
||||
config[param.key()] = param.value();
|
||||
}
|
||||
|
||||
auto& index_factory = milvus::indexbuilder::IndexFactory::GetInstance();
|
||||
auto index =
|
||||
milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex(
|
||||
dtype,
|
||||
serialized_type_params,
|
||||
serialized_index_params,
|
||||
storage_config);
|
||||
index_factory.CreateIndex(milvus::DataType(dtype), config, nullptr);
|
||||
|
||||
*res_index = index.release();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
@ -66,6 +65,65 @@ CreateIndex(enum CDataType dtype,
|
||||
return status;
|
||||
}
|
||||
|
||||
CStatus
|
||||
CreateIndexV2(CIndex* res_index, CBuildIndexInfo c_build_index_info) {
|
||||
try {
|
||||
auto build_index_info = (BuildIndexInfo*)c_build_index_info;
|
||||
auto field_type = build_index_info->field_type;
|
||||
|
||||
milvus::index::CreateIndexInfo index_info;
|
||||
index_info.field_type = build_index_info->field_type;
|
||||
|
||||
auto& config = build_index_info->config;
|
||||
config["insert_files"] = build_index_info->insert_files;
|
||||
|
||||
// get index type
|
||||
auto index_type = milvus::index::GetValueFromConfig<std::string>(
|
||||
config, "index_type");
|
||||
AssertInfo(index_type.has_value(), "index type is empty");
|
||||
index_info.index_type = index_type.value();
|
||||
|
||||
// get metric type
|
||||
if (milvus::datatype_is_vector(field_type)) {
|
||||
auto metric_type = milvus::index::GetValueFromConfig<std::string>(
|
||||
config, "metric_type");
|
||||
AssertInfo(metric_type.has_value(), "metric type is empty");
|
||||
index_info.metric_type = metric_type.value();
|
||||
}
|
||||
|
||||
// init file manager
|
||||
milvus::storage::FieldDataMeta field_meta{
|
||||
build_index_info->collection_id,
|
||||
build_index_info->partition_id,
|
||||
build_index_info->segment_id,
|
||||
build_index_info->field_id};
|
||||
milvus::storage::IndexMeta index_meta{build_index_info->segment_id,
|
||||
build_index_info->field_id,
|
||||
build_index_info->index_build_id,
|
||||
build_index_info->index_version};
|
||||
auto chunk_manager = milvus::storage::CreateChunkManager(
|
||||
build_index_info->storage_config);
|
||||
auto file_manager = milvus::storage::CreateFileManager(
|
||||
index_info.index_type, field_meta, index_meta, chunk_manager);
|
||||
AssertInfo(file_manager != nullptr, "create file manager failed!");
|
||||
|
||||
auto index =
|
||||
milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex(
|
||||
build_index_info->field_type, config, file_manager);
|
||||
index->Build();
|
||||
*res_index = index.release();
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
DeleteIndex(CIndex index) {
|
||||
auto status = CStatus();
|
||||
@ -219,3 +277,187 @@ CleanLocalData(CIndex index) {
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
CStatus
|
||||
NewBuildIndexInfo(CBuildIndexInfo* c_build_index_info,
|
||||
CStorageConfig c_storage_config) {
|
||||
try {
|
||||
auto build_index_info = std::make_unique<BuildIndexInfo>();
|
||||
auto& storage_config = build_index_info->storage_config;
|
||||
storage_config.address = std::string(c_storage_config.address);
|
||||
storage_config.bucket_name = std::string(c_storage_config.bucket_name);
|
||||
storage_config.access_key_id =
|
||||
std::string(c_storage_config.access_key_id);
|
||||
storage_config.access_key_value =
|
||||
std::string(c_storage_config.access_key_value);
|
||||
storage_config.root_path = std::string(c_storage_config.root_path);
|
||||
storage_config.storage_type =
|
||||
std::string(c_storage_config.storage_type);
|
||||
storage_config.iam_endpoint =
|
||||
std::string(c_storage_config.iam_endpoint);
|
||||
storage_config.useSSL = c_storage_config.useSSL;
|
||||
storage_config.useIAM = c_storage_config.useIAM;
|
||||
|
||||
*c_build_index_info = build_index_info.release();
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
DeleteBuildIndexInfo(CBuildIndexInfo c_build_index_info) {
|
||||
auto info = (BuildIndexInfo*)c_build_index_info;
|
||||
delete info;
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendBuildIndexParam(CBuildIndexInfo c_build_index_info,
|
||||
const uint8_t* serialized_index_params,
|
||||
const uint64_t len) {
|
||||
try {
|
||||
auto build_index_info = (BuildIndexInfo*)c_build_index_info;
|
||||
auto index_params =
|
||||
std::make_unique<milvus::proto::indexcgo::IndexParams>();
|
||||
auto res = index_params->ParseFromArray(serialized_index_params, len);
|
||||
AssertInfo(res, "Unmarshall index params failed");
|
||||
for (auto i = 0; i < index_params->params_size(); ++i) {
|
||||
const auto& param = index_params->params(i);
|
||||
build_index_info->config[param.key()] = param.value();
|
||||
}
|
||||
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendBuildTypeParam(CBuildIndexInfo c_build_index_info,
|
||||
const uint8_t* serialized_type_params,
|
||||
const uint64_t len) {
|
||||
try {
|
||||
auto build_index_info = (BuildIndexInfo*)c_build_index_info;
|
||||
auto type_params =
|
||||
std::make_unique<milvus::proto::indexcgo::TypeParams>();
|
||||
auto res = type_params->ParseFromArray(serialized_type_params, len);
|
||||
AssertInfo(res, "Unmarshall index build type params failed");
|
||||
for (auto i = 0; i < type_params->params_size(); ++i) {
|
||||
const auto& param = type_params->params(i);
|
||||
build_index_info->config[param.key()] = param.value();
|
||||
}
|
||||
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendFieldMetaInfo(CBuildIndexInfo c_build_index_info,
|
||||
int64_t collection_id,
|
||||
int64_t partition_id,
|
||||
int64_t segment_id,
|
||||
int64_t field_id,
|
||||
enum CDataType field_type) {
|
||||
try {
|
||||
auto build_index_info = (BuildIndexInfo*)c_build_index_info;
|
||||
build_index_info->collection_id = collection_id;
|
||||
build_index_info->partition_id = partition_id;
|
||||
build_index_info->segment_id = segment_id;
|
||||
build_index_info->field_id = field_id;
|
||||
build_index_info->field_type = milvus::DataType(field_type);
|
||||
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendIndexMetaInfo(CBuildIndexInfo c_build_index_info,
|
||||
int64_t index_id,
|
||||
int64_t build_id,
|
||||
int64_t version) {
|
||||
try {
|
||||
auto build_index_info = (BuildIndexInfo*)c_build_index_info;
|
||||
build_index_info->index_id = index_id;
|
||||
build_index_info->index_build_id = build_id;
|
||||
build_index_info->index_version = version;
|
||||
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendInsertFilePath(CBuildIndexInfo c_build_index_info,
|
||||
const char* c_file_path) {
|
||||
try {
|
||||
auto build_index_info = (BuildIndexInfo*)c_build_index_info;
|
||||
std::string insert_file_path(c_file_path);
|
||||
build_index_info->insert_files.emplace_back(insert_file_path);
|
||||
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
SerializeIndexAndUpLoad(CIndex index, CBinarySet* c_binary_set) {
|
||||
auto status = CStatus();
|
||||
try {
|
||||
AssertInfo(
|
||||
index,
|
||||
"failed to serialize index to binary set, passed index was null");
|
||||
auto real_index =
|
||||
reinterpret_cast<milvus::indexbuilder::IndexCreatorBase*>(index);
|
||||
auto binary =
|
||||
std::make_unique<knowhere::BinarySet>(real_index->Upload());
|
||||
*c_binary_set = binary.release();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
} catch (std::exception& e) {
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -24,8 +24,7 @@ CStatus
|
||||
CreateIndex(enum CDataType dtype,
|
||||
const char* serialized_type_params,
|
||||
const char* serialized_index_params,
|
||||
CIndex* res_index,
|
||||
CStorageConfig storage_config);
|
||||
CIndex* res_index);
|
||||
|
||||
CStatus
|
||||
DeleteIndex(CIndex index);
|
||||
@ -53,6 +52,46 @@ LoadIndexFromBinarySet(CIndex index, CBinarySet c_binary_set);
|
||||
CStatus
|
||||
CleanLocalData(CIndex index);
|
||||
|
||||
CStatus
|
||||
NewBuildIndexInfo(CBuildIndexInfo* c_build_index_info,
|
||||
CStorageConfig c_storage_config);
|
||||
|
||||
void
|
||||
DeleteBuildIndexInfo(CBuildIndexInfo c_build_index_info);
|
||||
|
||||
CStatus
|
||||
AppendBuildIndexParam(CBuildIndexInfo c_build_index_info,
|
||||
const uint8_t* serialized_type_params,
|
||||
const uint64_t len);
|
||||
|
||||
CStatus
|
||||
AppendBuildTypeParam(CBuildIndexInfo c_build_index_info,
|
||||
const uint8_t* serialized_type_params,
|
||||
const uint64_t len);
|
||||
|
||||
CStatus
|
||||
AppendFieldMetaInfo(CBuildIndexInfo c_build_index_info,
|
||||
int64_t collection_id,
|
||||
int64_t partition_id,
|
||||
int64_t segment_id,
|
||||
int64_t field_id,
|
||||
enum CDataType field_type);
|
||||
|
||||
CStatus
|
||||
AppendIndexMetaInfo(CBuildIndexInfo c_build_index_info,
|
||||
int64_t index_id,
|
||||
int64_t build_id,
|
||||
int64_t version);
|
||||
|
||||
CStatus
|
||||
AppendInsertFilePath(CBuildIndexInfo c_build_index_info, const char* file_path);
|
||||
|
||||
CStatus
|
||||
CreateIndexV2(CIndex* res_index, CBuildIndexInfo c_build_index_info);
|
||||
|
||||
CStatus
|
||||
SerializeIndexAndUpLoad(CIndex index, CBinarySet* c_binary_set);
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
@ -15,3 +15,4 @@
|
||||
|
||||
typedef void* CIndex;
|
||||
typedef void* CIndexQueryResult;
|
||||
typedef void* CBuildIndexInfo;
|
||||
|
||||
@ -14,35 +14,23 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "common/Types.h"
|
||||
#include "index/Index.h"
|
||||
#include "storage/Types.h"
|
||||
|
||||
#include "storage/FieldData.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
class FieldDataFactory {
|
||||
private:
|
||||
FieldDataFactory() = default;
|
||||
FieldDataFactory(const FieldDataFactory&) = delete;
|
||||
FieldDataFactory
|
||||
operator=(const FieldDataFactory&) = delete;
|
||||
|
||||
public:
|
||||
static FieldDataFactory&
|
||||
GetInstance() {
|
||||
static FieldDataFactory inst;
|
||||
return inst;
|
||||
}
|
||||
|
||||
std::string
|
||||
GetName() const {
|
||||
return "FieldDataFactory";
|
||||
}
|
||||
|
||||
FieldDataPtr
|
||||
CreateFieldData(const DataType& type, const int64_t dim = 1);
|
||||
};
|
||||
|
||||
} // namespace milvus::storage
|
||||
struct BuildIndexInfo {
|
||||
int64_t collection_id;
|
||||
int64_t partition_id;
|
||||
int64_t segment_id;
|
||||
int64_t field_id;
|
||||
milvus::DataType field_type;
|
||||
int64_t index_id;
|
||||
int64_t index_build_id;
|
||||
int64_t index_version;
|
||||
std::vector<std::string> insert_files;
|
||||
milvus::storage::StorageConfig storage_config;
|
||||
milvus::Config config;
|
||||
};
|
||||
@ -1,14 +1,18 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
|
||||
#include <sys/mman.h>
|
||||
@ -21,17 +25,9 @@
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "common/FieldMeta.h"
|
||||
#include "common/LoadInfo.h"
|
||||
#include "common/Span.h"
|
||||
#include "common/Types.h"
|
||||
#include "common/Utils.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "fmt/core.h"
|
||||
#include "log/Log.h"
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "mmap/Utils.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
namespace milvus {
|
||||
|
||||
struct Entry {
|
||||
char* data;
|
||||
@ -79,7 +75,7 @@ class Column : public ColumnBase {
|
||||
public:
|
||||
Column(int64_t segment_id,
|
||||
const FieldMeta& field_meta,
|
||||
const LoadFieldDataInfo& info) {
|
||||
const FieldDataInfo& info) {
|
||||
data_ = static_cast<char*>(CreateMap(segment_id, field_meta, info));
|
||||
size_ = field_meta.get_sizeof() * info.row_count;
|
||||
row_count_ = info.row_count;
|
||||
@ -109,20 +105,13 @@ class VariableColumn : public ColumnBase {
|
||||
|
||||
VariableColumn(int64_t segment_id,
|
||||
const FieldMeta& field_meta,
|
||||
const LoadFieldDataInfo& info) {
|
||||
auto begin = FIELD_DATA(info.field_data, string).begin();
|
||||
auto end = FIELD_DATA(info.field_data, string).end();
|
||||
if constexpr (std::is_same_v<T, Json>) {
|
||||
begin = FIELD_DATA(info.field_data, json).begin();
|
||||
end = FIELD_DATA(info.field_data, json).end();
|
||||
}
|
||||
|
||||
size_ = 0;
|
||||
const FieldDataInfo& info) {
|
||||
indices_.reserve(info.row_count);
|
||||
while (begin != end) {
|
||||
indices_.push_back(size_);
|
||||
size_ += begin->length();
|
||||
begin++;
|
||||
for (auto data : info.datas) {
|
||||
for (ssize_t idx = 0; idx < data->get_num_rows(); ++idx) {
|
||||
indices_.emplace_back(size_);
|
||||
size_ += data->Size(idx);
|
||||
}
|
||||
}
|
||||
|
||||
data_ = static_cast<char*>(CreateMap(segment_id, field_meta, info));
|
||||
@ -177,4 +166,4 @@ class VariableColumn : public ColumnBase {
|
||||
// Compatible with current Span type
|
||||
std::vector<ViewType> views_{};
|
||||
};
|
||||
} // namespace milvus::segcore
|
||||
} // namespace milvus
|
||||
@ -13,17 +13,19 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unistd.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "storage/FieldData.h"
|
||||
|
||||
namespace milvus::ChunkMangerConfig {
|
||||
namespace milvus {
|
||||
|
||||
void
|
||||
SetLocalRootPath(const std::string_view path_prefix);
|
||||
|
||||
std::string
|
||||
GetLocalRootPath();
|
||||
|
||||
} // namespace milvus::ChunkMangerConfig
|
||||
struct FieldDataInfo {
|
||||
int64_t field_id;
|
||||
int64_t row_count;
|
||||
std::vector<storage::FieldDataPtr> datas;
|
||||
std::string mmap_dir_path;
|
||||
};
|
||||
} // namespace milvus
|
||||
232
internal/core/src/mmap/Utils.h
Normal file
232
internal/core/src/mmap/Utils.h
Normal file
@ -0,0 +1,232 @@
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#include <cstring>
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "common/FieldMeta.h"
|
||||
#include "mmap/Types.h"
|
||||
#include "storage/Util.h"
|
||||
|
||||
namespace milvus {
|
||||
|
||||
inline size_t
|
||||
GetDataSize(const std::vector<storage::FieldDataPtr>& datas) {
|
||||
size_t total_size{0};
|
||||
for (auto data : datas) {
|
||||
total_size += data->Size();
|
||||
}
|
||||
|
||||
return total_size;
|
||||
}
|
||||
|
||||
inline void*
|
||||
FillField(DataType data_type, const storage::FieldDataPtr data, void* dst) {
|
||||
char* dest = reinterpret_cast<char*>(dst);
|
||||
if (datatype_is_variable(data_type)) {
|
||||
switch (data_type) {
|
||||
case DataType::STRING:
|
||||
case DataType::VARCHAR: {
|
||||
for (ssize_t i = 0; i < data->get_num_rows(); ++i) {
|
||||
auto str =
|
||||
static_cast<const std::string*>(data->RawValue(i));
|
||||
memcpy(dest, str->data(), str->size());
|
||||
dest += str->size();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::JSON: {
|
||||
for (ssize_t i = 0; i < data->get_num_rows(); ++i) {
|
||||
auto padded_string =
|
||||
static_cast<const Json*>(data->RawValue(i))->data();
|
||||
memcpy(dest, padded_string.data(), padded_string.size());
|
||||
dest += padded_string.size();
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
PanicInfo(fmt::format("not supported data type {}",
|
||||
datatype_name(data_type)));
|
||||
}
|
||||
} else {
|
||||
memcpy(dst, data->Data(), data->Size());
|
||||
dest += data->Size();
|
||||
}
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
inline ssize_t
|
||||
WriteFieldData(int fd, DataType data_type, const storage::FieldDataPtr data) {
|
||||
ssize_t total_written{0};
|
||||
if (datatype_is_variable(data_type)) {
|
||||
switch (data_type) {
|
||||
case DataType::VARCHAR:
|
||||
case DataType::STRING: {
|
||||
for (ssize_t i = 0; i < data->get_num_rows(); ++i) {
|
||||
auto str =
|
||||
static_cast<const std::string*>(data->RawValue(i));
|
||||
ssize_t written = write(fd, str->data(), str->size());
|
||||
if (written < str->size()) {
|
||||
break;
|
||||
}
|
||||
total_written += written;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::JSON: {
|
||||
for (ssize_t i = 0; i < data->get_num_rows(); ++i) {
|
||||
auto padded_string =
|
||||
static_cast<const Json*>(data->RawValue(i))->data();
|
||||
ssize_t written =
|
||||
write(fd, padded_string.data(), padded_string.size());
|
||||
if (written < padded_string.size()) {
|
||||
break;
|
||||
}
|
||||
total_written += written;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
PanicInfo(fmt::format("not supported data type {}",
|
||||
datatype_name(data_type)));
|
||||
}
|
||||
} else {
|
||||
total_written += write(fd, data->Data(), data->Size());
|
||||
}
|
||||
|
||||
return total_written;
|
||||
}
|
||||
|
||||
// CreateMap creates a memory mapping,
|
||||
// if mmap enabled, this writes field data to disk and create a map to the file,
|
||||
// otherwise this just alloc memory
|
||||
inline void*
|
||||
CreateMap(int64_t segment_id,
|
||||
const FieldMeta& field_meta,
|
||||
const FieldDataInfo& info) {
|
||||
static int mmap_flags = MAP_PRIVATE;
|
||||
#ifdef MAP_POPULATE
|
||||
// macOS doesn't support MAP_POPULATE
|
||||
mmap_flags |= MAP_POPULATE;
|
||||
#endif
|
||||
|
||||
// simdjson requires a padding following the json data
|
||||
size_t padding = field_meta.get_data_type() == DataType::JSON
|
||||
? simdjson::SIMDJSON_PADDING
|
||||
: 0;
|
||||
auto data_size = GetDataSize(info.datas);
|
||||
// Allocate memory
|
||||
if (info.mmap_dir_path.empty()) {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
if (data_size == 0)
|
||||
return nullptr;
|
||||
|
||||
// Use anon mapping so we are able to free these memory with munmap only
|
||||
void* map = mmap(nullptr,
|
||||
data_size + padding,
|
||||
PROT_READ | PROT_WRITE,
|
||||
mmap_flags | MAP_ANON,
|
||||
-1,
|
||||
0);
|
||||
AssertInfo(
|
||||
map != MAP_FAILED,
|
||||
fmt::format("failed to create anon map, err: {}", strerror(errno)));
|
||||
auto dst = map;
|
||||
for (auto data : info.datas) {
|
||||
dst = FillField(data_type, data, dst);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
auto filepath = std::filesystem::path(info.mmap_dir_path) /
|
||||
std::to_string(segment_id) / std::to_string(info.field_id);
|
||||
auto dir = filepath.parent_path();
|
||||
std::filesystem::create_directories(dir);
|
||||
|
||||
int fd =
|
||||
open(filepath.c_str(), O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR);
|
||||
AssertInfo(fd != -1,
|
||||
fmt::format("failed to create mmap file {}", filepath.c_str()));
|
||||
|
||||
auto data_type = field_meta.get_data_type();
|
||||
ssize_t total_written{0};
|
||||
for (auto data : info.datas) {
|
||||
auto written = WriteFieldData(fd, data_type, data);
|
||||
if (written != data->Size()) {
|
||||
break;
|
||||
}
|
||||
total_written += written;
|
||||
}
|
||||
AssertInfo(
|
||||
total_written == data_size ||
|
||||
total_written != -1 &&
|
||||
datatype_is_variable(field_meta.get_data_type()),
|
||||
fmt::format(
|
||||
"failed to write data file {}, written {} but total {}, err: {}",
|
||||
filepath.c_str(),
|
||||
total_written,
|
||||
data_size,
|
||||
strerror(errno)));
|
||||
int ok = fsync(fd);
|
||||
AssertInfo(ok == 0,
|
||||
fmt::format("failed to fsync mmap data file {}, err: {}",
|
||||
filepath.c_str(),
|
||||
strerror(errno)));
|
||||
|
||||
// Empty field
|
||||
if (total_written == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto map =
|
||||
mmap(nullptr, total_written + padding, PROT_READ, mmap_flags, fd, 0);
|
||||
AssertInfo(map != MAP_FAILED,
|
||||
fmt::format("failed to create map for data file {}, err: {}",
|
||||
filepath.c_str(),
|
||||
strerror(errno)));
|
||||
|
||||
#ifndef MAP_POPULATE
|
||||
// Manually access the mapping to populate it
|
||||
const size_t page_size = getpagesize();
|
||||
char* begin = (char*)map;
|
||||
char* end = begin + total_written;
|
||||
for (char* page = begin; page < end; page += page_size) {
|
||||
char value = page[0];
|
||||
}
|
||||
#endif
|
||||
// unlink this data file so
|
||||
// then it will be auto removed after we don't need it again
|
||||
ok = unlink(filepath.c_str());
|
||||
AssertInfo(ok == 0,
|
||||
fmt::format("failed to unlink mmap data file {}, err: {}",
|
||||
filepath.c_str(),
|
||||
strerror(errno)));
|
||||
ok = close(fd);
|
||||
AssertInfo(ok == 0,
|
||||
fmt::format("failed to close data file {}, err: {}",
|
||||
filepath.c_str(),
|
||||
strerror(errno)));
|
||||
return map;
|
||||
}
|
||||
} // namespace milvus
|
||||
@ -30,6 +30,7 @@ set(SEGCORE_FILES
|
||||
plan_c.cpp
|
||||
reduce_c.cpp
|
||||
load_index_c.cpp
|
||||
load_field_data_c.cpp
|
||||
SegmentInterface.cpp
|
||||
SegcoreConfig.cpp
|
||||
IndexConfigGenerator.cpp
|
||||
|
||||
@ -91,80 +91,4 @@ VectorBase::set_data_raw(ssize_t element_offset,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
VectorBase::fill_chunk_data(ssize_t element_count,
|
||||
const DataArray* data,
|
||||
const FieldMeta& field_meta) {
|
||||
if (field_meta.is_vector()) {
|
||||
if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
return fill_chunk_data(VEC_FIELD_DATA(data, float).data(),
|
||||
element_count);
|
||||
} else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
return fill_chunk_data(VEC_FIELD_DATA(data, binary), element_count);
|
||||
} else {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
|
||||
switch (field_meta.get_data_type()) {
|
||||
case DataType::BOOL: {
|
||||
return fill_chunk_data(FIELD_DATA(data, bool).data(),
|
||||
element_count);
|
||||
}
|
||||
case DataType::INT8: {
|
||||
auto& src_data = FIELD_DATA(data, int);
|
||||
std::vector<int8_t> data_raw(src_data.size());
|
||||
std::copy_n(src_data.data(), src_data.size(), data_raw.data());
|
||||
return fill_chunk_data(data_raw.data(), element_count);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
auto& src_data = FIELD_DATA(data, int);
|
||||
std::vector<int16_t> data_raw(src_data.size());
|
||||
std::copy_n(src_data.data(), src_data.size(), data_raw.data());
|
||||
return fill_chunk_data(data_raw.data(), element_count);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
return fill_chunk_data(FIELD_DATA(data, int).data(), element_count);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
return fill_chunk_data(FIELD_DATA(data, long).data(),
|
||||
element_count);
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
return fill_chunk_data(FIELD_DATA(data, float).data(),
|
||||
element_count);
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
return fill_chunk_data(FIELD_DATA(data, double).data(),
|
||||
element_count);
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto vec = static_cast<ConcurrentVector<std::string>*>(this);
|
||||
auto count = FIELD_DATA(data, string).size();
|
||||
vec->grow_on_demand(count);
|
||||
auto& chunk = vec->get_chunk(0);
|
||||
|
||||
size_t index = 0;
|
||||
for (auto& str : FIELD_DATA(data, string)) {
|
||||
chunk[index++] = str;
|
||||
}
|
||||
return;
|
||||
}
|
||||
case DataType::JSON: {
|
||||
auto vec = static_cast<ConcurrentVector<Json>*>(this);
|
||||
auto count = FIELD_DATA(data, json).size();
|
||||
vec->grow_on_demand(count);
|
||||
auto& chunk = vec->get_chunk(0);
|
||||
|
||||
size_t index = 0;
|
||||
for (auto& str : FIELD_DATA(data, json)) {
|
||||
chunk[index++] = Json(simdjson::padded_string(str));
|
||||
}
|
||||
return;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace milvus::segcore
|
||||
|
||||
@ -31,6 +31,7 @@
|
||||
#include "common/Types.h"
|
||||
#include "common/Utils.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "storage/FieldData.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
@ -100,6 +101,10 @@ class VectorBase {
|
||||
const void* source,
|
||||
ssize_t element_count) = 0;
|
||||
|
||||
virtual void
|
||||
set_data_raw(ssize_t element_offset,
|
||||
const std::vector<storage::FieldDataPtr>& data) = 0;
|
||||
|
||||
void
|
||||
set_data_raw(ssize_t element_offset,
|
||||
ssize_t element_count,
|
||||
@ -107,12 +112,7 @@ class VectorBase {
|
||||
const FieldMeta& field_meta);
|
||||
|
||||
virtual void
|
||||
fill_chunk_data(const void* source, ssize_t element_count) = 0;
|
||||
|
||||
void
|
||||
fill_chunk_data(ssize_t element_count,
|
||||
const DataArray* data,
|
||||
const FieldMeta& field_meta);
|
||||
fill_chunk_data(const std::vector<storage::FieldDataPtr>& data) = 0;
|
||||
|
||||
virtual SpanBase
|
||||
get_span_base(int64_t chunk_id) const = 0;
|
||||
@ -196,13 +196,32 @@ class ConcurrentVectorImpl : public VectorBase {
|
||||
}
|
||||
|
||||
void
|
||||
fill_chunk_data(const void* source, ssize_t element_count) override {
|
||||
if (element_count == 0) {
|
||||
return;
|
||||
}
|
||||
fill_chunk_data(const std::vector<storage::FieldDataPtr>& datas)
|
||||
override { // used only for sealed segment
|
||||
AssertInfo(chunks_.size() == 0, "no empty concurrent vector");
|
||||
|
||||
int64_t element_count = 0;
|
||||
for (auto& field_data : datas) {
|
||||
element_count += field_data->get_num_rows();
|
||||
}
|
||||
chunks_.emplace_to_at_least(1, Dim * element_count);
|
||||
set_data(0, static_cast<const Type*>(source), element_count);
|
||||
int64_t offset = 0;
|
||||
for (auto& field_data : datas) {
|
||||
auto num_rows = field_data->get_num_rows();
|
||||
set_data(
|
||||
offset, static_cast<const Type*>(field_data->Data()), num_rows);
|
||||
offset += num_rows;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
set_data_raw(ssize_t element_offset,
|
||||
const std::vector<storage::FieldDataPtr>& datas) override {
|
||||
for (auto& field_data : datas) {
|
||||
auto num_rows = field_data->get_num_rows();
|
||||
set_data_raw(element_offset, field_data->Data(), num_rows);
|
||||
element_offset += num_rows;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@ -276,6 +276,27 @@ class IndexingRecord {
|
||||
}
|
||||
}
|
||||
|
||||
// concurrent, reentrant
|
||||
template <bool is_sealed>
|
||||
void
|
||||
AppendingIndex(int64_t reserved_offset,
|
||||
int64_t size,
|
||||
FieldId fieldId,
|
||||
const storage::FieldDataPtr data,
|
||||
const InsertRecord<is_sealed>& record) {
|
||||
if (is_in(fieldId)) {
|
||||
auto& indexing = field_indexings_.at(fieldId);
|
||||
if (indexing->get_field_meta().is_vector() &&
|
||||
indexing->get_field_meta().get_data_type() ==
|
||||
DataType::VECTOR_FLOAT &&
|
||||
reserved_offset + size >= indexing->get_build_threshold()) {
|
||||
auto vec_base = record.get_field_data_base(fieldId);
|
||||
indexing->AppendSegmentIndex(
|
||||
reserved_offset, size, vec_base, data->Data());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GetDataFromIndex(FieldId fieldId,
|
||||
const int64_t* seg_offsets,
|
||||
|
||||
@ -247,6 +247,37 @@ struct InsertRecord {
|
||||
return res_offsets;
|
||||
}
|
||||
|
||||
void
|
||||
insert_pks(const std::vector<storage::FieldDataPtr>& field_datas) {
|
||||
std::lock_guard lck(shared_mutex_);
|
||||
int64_t offset = 0;
|
||||
for (auto& data : field_datas) {
|
||||
int64_t row_count = data->get_num_rows();
|
||||
auto data_type = data->get_data_type();
|
||||
switch (data_type) {
|
||||
case DataType::INT64: {
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
pk2offset_->insert(
|
||||
*static_cast<const int64_t*>(data->RawValue(i)),
|
||||
offset++);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
pk2offset_->insert(
|
||||
*static_cast<const std::string*>(data->RawValue(i)),
|
||||
offset++);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported primary key data type");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<SegOffset>
|
||||
search_pk(const PkType& pk, int64_t insert_barrier) const {
|
||||
std::shared_lock lck(shared_mutex_);
|
||||
|
||||
@ -23,6 +23,8 @@
|
||||
#include "query/SearchOnSealed.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "segcore/Utils.h"
|
||||
#include "storage/RemoteChunkManagerSingleton.h"
|
||||
#include "storage/Util.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
@ -112,6 +114,77 @@ SegmentGrowingImpl::Insert(int64_t reserved_offset,
|
||||
reserved_offset + size);
|
||||
}
|
||||
|
||||
void
|
||||
SegmentGrowingImpl::LoadFieldData(const LoadFieldDataInfo& infos) {
|
||||
// schema don't include system field
|
||||
AssertInfo(infos.field_infos.size() == schema_->size() + 2,
|
||||
"lost some field data when load for growing segment");
|
||||
AssertInfo(infos.field_infos.find(TimestampFieldID.get()) !=
|
||||
infos.field_infos.end(),
|
||||
"timestamps field data should be included");
|
||||
AssertInfo(
|
||||
infos.field_infos.find(RowFieldID.get()) != infos.field_infos.end(),
|
||||
"rowID field data should be included");
|
||||
auto primary_field_id =
|
||||
schema_->get_primary_field_id().value_or(FieldId(-1));
|
||||
AssertInfo(primary_field_id.get() != INVALID_FIELD_ID, "Primary key is -1");
|
||||
AssertInfo(infos.field_infos.find(primary_field_id.get()) !=
|
||||
infos.field_infos.end(),
|
||||
"primary field data should be included");
|
||||
|
||||
int64_t num_rows = 0;
|
||||
for (auto& field : infos.field_infos) {
|
||||
num_rows = field.second.row_count;
|
||||
break;
|
||||
}
|
||||
auto reserved_offset = PreInsert(num_rows);
|
||||
for (auto& [id, info] : infos.field_infos) {
|
||||
auto field_id = FieldId(id);
|
||||
auto insert_files = info.insert_files;
|
||||
auto field_datas = LoadFieldDatasFromRemote(insert_files);
|
||||
AssertInfo(
|
||||
num_rows == storage::GetTotalNumRowsForFieldDatas(field_datas),
|
||||
"inconsistent num row between multi fields");
|
||||
|
||||
if (field_id == TimestampFieldID) {
|
||||
// step 2: sort timestamp
|
||||
// query node already guarantees that the timestamp is ordered, avoid field data copy in c++
|
||||
|
||||
// step 3: fill into Segment.ConcurrentVector
|
||||
insert_record_.timestamps_.set_data_raw(reserved_offset,
|
||||
field_datas);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (field_id == RowFieldID) {
|
||||
insert_record_.row_ids_.set_data_raw(reserved_offset, field_datas);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!indexing_record_.SyncDataWithIndex(field_id)) {
|
||||
insert_record_.get_field_data_base(field_id)->set_data_raw(
|
||||
reserved_offset, field_datas);
|
||||
}
|
||||
if (segcore_config_.get_enable_growing_segment_index()) {
|
||||
auto offset = reserved_offset;
|
||||
for (auto data : field_datas) {
|
||||
auto row_count = data->get_num_rows();
|
||||
indexing_record_.AppendingIndex(
|
||||
offset, row_count, field_id, data, insert_record_);
|
||||
offset += row_count;
|
||||
}
|
||||
}
|
||||
|
||||
if (field_id == primary_field_id) {
|
||||
insert_record_.insert_pks(field_datas);
|
||||
}
|
||||
}
|
||||
|
||||
// step 5: update small indexes
|
||||
insert_record_.ack_responder_.AddSegment(reserved_offset,
|
||||
reserved_offset + num_rows);
|
||||
}
|
||||
|
||||
Status
|
||||
SegmentGrowingImpl::Delete(int64_t reserved_begin,
|
||||
int64_t size,
|
||||
|
||||
@ -62,6 +62,9 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
||||
void
|
||||
LoadDeletedRecord(const LoadDeletedRecordInfo& info) override;
|
||||
|
||||
void
|
||||
LoadFieldData(const LoadFieldDataInfo& info) override;
|
||||
|
||||
std::string
|
||||
debug() const override;
|
||||
|
||||
|
||||
@ -83,6 +83,9 @@ class SegmentInterface {
|
||||
virtual void
|
||||
LoadDeletedRecord(const LoadDeletedRecordInfo& info) = 0;
|
||||
|
||||
virtual void
|
||||
LoadFieldData(const LoadFieldDataInfo& info) = 0;
|
||||
|
||||
virtual int64_t
|
||||
get_segment_id() const = 0;
|
||||
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
#include "pb/segcore.pb.h"
|
||||
#include "segcore/SegmentInterface.h"
|
||||
#include "segcore/Types.h"
|
||||
#include "mmap/Column.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
@ -28,11 +29,11 @@ class SegmentSealed : public SegmentInternalInterface {
|
||||
virtual void
|
||||
LoadSegmentMeta(const milvus::proto::segcore::LoadSegmentMeta& meta) = 0;
|
||||
virtual void
|
||||
LoadFieldData(const LoadFieldDataInfo& info) = 0;
|
||||
virtual void
|
||||
DropIndex(const FieldId field_id) = 0;
|
||||
virtual void
|
||||
DropFieldData(const FieldId field_id) = 0;
|
||||
virtual void
|
||||
LoadFieldData(FieldId field_id, const FieldDataInfo& data_info) = 0;
|
||||
|
||||
SegmentType
|
||||
type() const override {
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
|
||||
#include "Utils.h"
|
||||
#include "Types.h"
|
||||
#include "common/Column.h"
|
||||
#include "mmap/Column.h"
|
||||
#include "common/Consts.h"
|
||||
#include "common/FieldMeta.h"
|
||||
#include "common/Types.h"
|
||||
@ -29,7 +29,7 @@
|
||||
#include "query/ScalarIndex.h"
|
||||
#include "query/SearchBruteForce.h"
|
||||
#include "query/SearchOnSealed.h"
|
||||
#include "index/Utils.h"
|
||||
#include "storage/Util.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
@ -166,52 +166,73 @@ SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
||||
}
|
||||
|
||||
void
|
||||
SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& info) {
|
||||
SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& load_info) {
|
||||
// print(info);
|
||||
// NOTE: lock only when data is ready to avoid starvation
|
||||
AssertInfo(info.row_count > 0, "The row count of field data is 0");
|
||||
auto field_id = FieldId(info.field_id);
|
||||
AssertInfo(info.field_data != nullptr, "Field info blob is null");
|
||||
auto size = info.row_count;
|
||||
// only one field for now, parallel load field data in golang
|
||||
for (auto& [id, info] : load_info.field_infos) {
|
||||
AssertInfo(info.row_count > 0, "The row count of field data is 0");
|
||||
auto field_id = FieldId(id);
|
||||
auto insert_files = info.insert_files;
|
||||
auto field_datas = LoadFieldDatasFromRemote(insert_files);
|
||||
int64_t num_rows = storage::GetTotalNumRowsForFieldDatas(field_datas);
|
||||
AssertInfo(num_rows == info.row_count,
|
||||
"inconsistent field data row count with meta");
|
||||
auto field_data_info = FieldDataInfo{
|
||||
field_id.get(), num_rows, field_datas, load_info.mmap_dir_path};
|
||||
LoadFieldData(field_id, field_data_info);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SegmentSealedImpl::LoadFieldData(FieldId field_id,
|
||||
const FieldDataInfo& data_info) {
|
||||
auto num_rows = data_info.row_count;
|
||||
if (row_count_opt_.has_value()) {
|
||||
AssertInfo(
|
||||
row_count_opt_.value() == size,
|
||||
fmt::format(
|
||||
"field {} has different row count {} to other column's {}",
|
||||
field_id.get(),
|
||||
size,
|
||||
row_count_opt_.value()));
|
||||
AssertInfo(row_count_opt_.value() == num_rows,
|
||||
"field (" + std::to_string(field_id.get()) +
|
||||
") data has different row count (" +
|
||||
std::to_string(num_rows) +
|
||||
") than other column's row count (" +
|
||||
std::to_string(row_count_opt_.value()) + ")");
|
||||
}
|
||||
|
||||
if (SystemProperty::Instance().IsSystem(field_id)) {
|
||||
auto system_field_type =
|
||||
SystemProperty::Instance().GetSystemFieldType(field_id);
|
||||
if (system_field_type == SystemFieldType::Timestamp) {
|
||||
auto timestamps = reinterpret_cast<const Timestamp*>(
|
||||
FIELD_DATA(info.field_data, long).data());
|
||||
std::vector<Timestamp> timestamps(num_rows);
|
||||
int64_t offset = 0;
|
||||
for (auto& data : data_info.datas) {
|
||||
int64_t row_count = data->get_num_rows();
|
||||
std::copy_n(static_cast<const Timestamp*>(data->Data()),
|
||||
row_count,
|
||||
timestamps.data() + offset);
|
||||
offset += row_count;
|
||||
}
|
||||
|
||||
TimestampIndex index;
|
||||
auto min_slice_length = size < 4096 ? 1 : 4096;
|
||||
auto meta = GenerateFakeSlices(timestamps, size, min_slice_length);
|
||||
auto min_slice_length = num_rows < 4096 ? 1 : 4096;
|
||||
auto meta = GenerateFakeSlices(
|
||||
timestamps.data(), num_rows, min_slice_length);
|
||||
index.set_length_meta(std::move(meta));
|
||||
index.build_with(timestamps, size);
|
||||
// todo ::opt to avoid copy timestamps from field data
|
||||
index.build_with(timestamps.data(), num_rows);
|
||||
|
||||
// use special index
|
||||
std::unique_lock lck(mutex_);
|
||||
AssertInfo(insert_record_.timestamps_.empty(), "already exists");
|
||||
insert_record_.timestamps_.fill_chunk_data(timestamps, size);
|
||||
insert_record_.timestamps_.fill_chunk_data(data_info.datas);
|
||||
insert_record_.timestamp_index_ = std::move(index);
|
||||
AssertInfo(insert_record_.timestamps_.num_chunk() == 1,
|
||||
"num chunk not equal to 1 for sealed segment");
|
||||
} else {
|
||||
AssertInfo(system_field_type == SystemFieldType::RowId,
|
||||
"System field type of id column is not RowId");
|
||||
auto row_ids = reinterpret_cast<const idx_t*>(
|
||||
FIELD_DATA(info.field_data, long).data());
|
||||
// write data under lock
|
||||
std::unique_lock lck(mutex_);
|
||||
AssertInfo(insert_record_.row_ids_.empty(), "already exists");
|
||||
insert_record_.row_ids_.fill_chunk_data(row_ids, size);
|
||||
insert_record_.row_ids_.fill_chunk_data(data_info.datas);
|
||||
AssertInfo(insert_record_.row_ids_.num_chunk() == 1,
|
||||
"num chunk not equal to 1 for sealed segment");
|
||||
}
|
||||
@ -220,36 +241,33 @@ SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& info) {
|
||||
// prepare data
|
||||
auto& field_meta = (*schema_)[field_id];
|
||||
auto data_type = field_meta.get_data_type();
|
||||
AssertInfo(data_type == DataType(info.field_data->type()),
|
||||
"field type of load data is inconsistent with the schema");
|
||||
|
||||
// Don't allow raw data and index exist at the same time
|
||||
AssertInfo(!get_bit(index_ready_bitset_, field_id),
|
||||
"field data can't be loaded when indexing exists");
|
||||
|
||||
size_t size = 0;
|
||||
if (datatype_is_variable(data_type)) {
|
||||
std::unique_ptr<ColumnBase> column{};
|
||||
switch (data_type) {
|
||||
case milvus::DataType::STRING:
|
||||
case milvus::DataType::VARCHAR: {
|
||||
column = std::make_unique<VariableColumn<std::string>>(
|
||||
get_segment_id(), field_meta, info);
|
||||
get_segment_id(), field_meta, data_info);
|
||||
break;
|
||||
}
|
||||
case milvus::DataType::JSON: {
|
||||
column = std::make_unique<VariableColumn<Json>>(
|
||||
get_segment_id(), field_meta, info);
|
||||
get_segment_id(), field_meta, data_info);
|
||||
}
|
||||
default: {
|
||||
}
|
||||
}
|
||||
size = column->size();
|
||||
|
||||
std::unique_lock lck(mutex_);
|
||||
variable_fields_.emplace(field_id, std::move(column));
|
||||
} else {
|
||||
auto column = Column(get_segment_id(), field_meta, info);
|
||||
size = column.size();
|
||||
auto column = Column(get_segment_id(), field_meta, data_info);
|
||||
|
||||
std::unique_lock lck(mutex_);
|
||||
fixed_fields_.emplace(field_id, std::move(column));
|
||||
}
|
||||
@ -258,19 +276,15 @@ SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& info) {
|
||||
if (schema_->get_primary_field_id() == field_id) {
|
||||
AssertInfo(field_id.get() != -1, "Primary key is -1");
|
||||
AssertInfo(insert_record_.empty_pks(), "already exists");
|
||||
std::vector<PkType> pks(info.row_count);
|
||||
ParsePksFromFieldData(pks, *info.field_data);
|
||||
|
||||
for (int i = 0; i < info.row_count; ++i) {
|
||||
insert_record_.insert_pk(pks[i], i);
|
||||
}
|
||||
insert_record_.insert_pks(data_info.datas);
|
||||
insert_record_.seal_pks();
|
||||
}
|
||||
|
||||
std::unique_lock lck(mutex_);
|
||||
set_bit(field_data_ready_bitset_, field_id, true);
|
||||
}
|
||||
std::unique_lock lck(mutex_);
|
||||
update_row_count(info.row_count);
|
||||
update_row_count(num_rows);
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@ -28,7 +28,7 @@
|
||||
#include "SealedIndexingRecord.h"
|
||||
#include "SegmentSealed.h"
|
||||
#include "TimestampIndex.h"
|
||||
#include "common/Column.h"
|
||||
#include "mmap/Column.h"
|
||||
#include "index/ScalarIndex.h"
|
||||
#include "sys/mman.h"
|
||||
|
||||
@ -55,6 +55,8 @@ class SegmentSealedImpl : public SegmentSealed {
|
||||
HasIndex(FieldId field_id) const override;
|
||||
bool
|
||||
HasFieldData(FieldId field_id) const override;
|
||||
void
|
||||
LoadFieldData(FieldId field_id, const FieldDataInfo& data_info) override;
|
||||
|
||||
int64_t
|
||||
get_segment_id() const override {
|
||||
|
||||
@ -41,7 +41,6 @@ struct LoadIndexInfo {
|
||||
std::map<std::string, std::string> index_params;
|
||||
std::vector<std::string> index_files;
|
||||
index::IndexBasePtr index;
|
||||
storage::StorageConfig storage_config;
|
||||
};
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
||||
@ -12,8 +12,11 @@
|
||||
#include "segcore/Utils.h"
|
||||
#include <string>
|
||||
|
||||
#include "common/Utils.h"
|
||||
#include "index/ScalarIndex.h"
|
||||
#include "storage/RemoteChunkManagerSingleton.h"
|
||||
#include "common/Common.h"
|
||||
#include "storage/Util.h"
|
||||
#include "mmap/Utils.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
@ -37,6 +40,37 @@ ParsePksFromFieldData(std::vector<PkType>& pks, const DataArray& data) {
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ParsePksFromFieldData(DataType data_type,
|
||||
std::vector<PkType>& pks,
|
||||
const std::vector<storage::FieldDataPtr>& datas) {
|
||||
int64_t offset = 0;
|
||||
|
||||
for (auto& field_data : datas) {
|
||||
AssertInfo(data_type == field_data->get_data_type(),
|
||||
"inconsistent data type when parse pk from field data");
|
||||
int64_t row_count = field_data->get_num_rows();
|
||||
switch (data_type) {
|
||||
case DataType::INT64: {
|
||||
std::copy_n(static_cast<const int64_t*>(field_data->Data()),
|
||||
row_count,
|
||||
pks.data() + offset);
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
std::copy_n(static_cast<const std::string*>(field_data->Data()),
|
||||
row_count,
|
||||
pks.data() + offset);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
offset += row_count;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ParsePksFromIDs(std::vector<PkType>& pks,
|
||||
DataType data_type,
|
||||
@ -509,5 +543,47 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
||||
|
||||
return data_array;
|
||||
}
|
||||
// init segcore storage config first, and create default remote chunk manager
|
||||
// segcore use default remote chunk manager to load data from minio/s3
|
||||
std::vector<storage::FieldDataPtr>
|
||||
LoadFieldDatasFromRemote(std::vector<std::string>& remote_files) {
|
||||
auto rcm = storage::RemoteChunkManagerSingleton::GetInstance()
|
||||
.GetRemoteChunkManager();
|
||||
std::sort(remote_files.begin(),
|
||||
remote_files.end(),
|
||||
[](const std::string& a, const std::string& b) {
|
||||
return std::stol(a.substr(a.find_last_of("/") + 1)) <
|
||||
std::stol(b.substr(b.find_last_of("/") + 1));
|
||||
});
|
||||
|
||||
auto parallel_degree =
|
||||
uint64_t(DEFAULT_FIELD_MAX_MEMORY_LIMIT / FILE_SLICE_SIZE);
|
||||
std::vector<std::string> batch_files;
|
||||
std::vector<storage::FieldDataPtr> field_datas;
|
||||
|
||||
auto FetchRawData = [&]() {
|
||||
auto raw_datas = GetObjectData(rcm.get(), batch_files);
|
||||
for (auto& data : raw_datas) {
|
||||
field_datas.emplace_back(data);
|
||||
}
|
||||
};
|
||||
|
||||
for (auto& file : remote_files) {
|
||||
if (batch_files.size() >= parallel_degree) {
|
||||
FetchRawData();
|
||||
batch_files.clear();
|
||||
}
|
||||
|
||||
batch_files.emplace_back(file);
|
||||
}
|
||||
|
||||
if (batch_files.size() > 0) {
|
||||
FetchRawData();
|
||||
}
|
||||
|
||||
AssertInfo(field_datas.size() == remote_files.size(),
|
||||
"inconsistent file num and raw data num!");
|
||||
return field_datas;
|
||||
}
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
||||
@ -28,6 +28,11 @@ namespace milvus::segcore {
|
||||
void
|
||||
ParsePksFromFieldData(std::vector<PkType>& pks, const DataArray& data);
|
||||
|
||||
void
|
||||
ParsePksFromFieldData(DataType data_type,
|
||||
std::vector<PkType>& pks,
|
||||
const std::vector<storage::FieldDataPtr>& datas);
|
||||
|
||||
void
|
||||
ParsePksFromIDs(std::vector<PkType>& pks,
|
||||
DataType data_type,
|
||||
@ -141,4 +146,7 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
||||
int64_t count,
|
||||
const FieldMeta& field_meta);
|
||||
|
||||
std::vector<storage::FieldDataPtr>
|
||||
LoadFieldDatasFromRemote(std::vector<std::string>& remote_files);
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
||||
83
internal/core/src/segcore/load_field_data_c.cpp
Normal file
83
internal/core/src/segcore/load_field_data_c.cpp
Normal file
@ -0,0 +1,83 @@
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "common/CGoHelper.h"
|
||||
#include "common/LoadInfo.h"
|
||||
#include "segcore/load_field_data_c.h"
|
||||
|
||||
CStatus
|
||||
NewLoadFieldDataInfo(CLoadFieldDataInfo* c_load_field_data_info) {
|
||||
try {
|
||||
auto load_field_data_info = std::make_unique<LoadFieldDataInfo>();
|
||||
*c_load_field_data_info = load_field_data_info.release();
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(UnexpectedError, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
DeleteLoadFieldDataInfo(CLoadFieldDataInfo c_load_field_data_info) {
|
||||
auto info = (LoadFieldDataInfo*)c_load_field_data_info;
|
||||
delete info;
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendLoadFieldInfo(CLoadFieldDataInfo c_load_field_data_info,
|
||||
int64_t field_id,
|
||||
int64_t row_count) {
|
||||
try {
|
||||
auto load_field_data_info = (LoadFieldDataInfo*)c_load_field_data_info;
|
||||
auto iter = load_field_data_info->field_infos.find(field_id);
|
||||
if (iter != load_field_data_info->field_infos.end()) {
|
||||
throw std::runtime_error("append same field info multi times");
|
||||
}
|
||||
FieldBinlogInfo binlog_info;
|
||||
binlog_info.field_id = field_id;
|
||||
binlog_info.row_count = row_count;
|
||||
load_field_data_info->field_infos[field_id] = binlog_info;
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(UnexpectedError, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendLoadFieldDataPath(CLoadFieldDataInfo c_load_field_data_info,
|
||||
int64_t field_id,
|
||||
const char* c_file_path) {
|
||||
try {
|
||||
auto load_field_data_info = (LoadFieldDataInfo*)c_load_field_data_info;
|
||||
auto iter = load_field_data_info->field_infos.find(field_id);
|
||||
std::string file_path(c_file_path);
|
||||
if (iter == load_field_data_info->field_infos.end()) {
|
||||
throw std::runtime_error("please append field info first");
|
||||
}
|
||||
|
||||
load_field_data_info->field_infos[field_id].insert_files.emplace_back(
|
||||
file_path);
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(UnexpectedError, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
AppendMMapDirPath(CLoadFieldDataInfo c_load_field_data_info,
|
||||
const char* c_dir_path) {
|
||||
auto load_field_data_info = (LoadFieldDataInfo*)c_load_field_data_info;
|
||||
load_field_data_info->mmap_dir_path = std::string(c_dir_path);
|
||||
}
|
||||
50
internal/core/src/segcore/load_field_data_c.h
Normal file
50
internal/core/src/segcore/load_field_data_c.h
Normal file
@ -0,0 +1,50 @@
|
||||
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "common/type_c.h"
|
||||
|
||||
typedef void* CLoadFieldDataInfo;
|
||||
|
||||
CStatus
|
||||
NewLoadFieldDataInfo(CLoadFieldDataInfo* c_load_field_data_info);
|
||||
|
||||
void
|
||||
DeleteLoadFieldDataInfo(CLoadFieldDataInfo c_load_field_data_info);
|
||||
|
||||
CStatus
|
||||
AppendLoadFieldInfo(CLoadFieldDataInfo c_load_field_data_info,
|
||||
int64_t field_id,
|
||||
int64_t row_count);
|
||||
|
||||
CStatus
|
||||
AppendLoadFieldDataPath(CLoadFieldDataInfo c_load_field_data_info,
|
||||
int64_t field_id,
|
||||
const char* file_path);
|
||||
|
||||
void
|
||||
AppendMMapDirPath(CLoadFieldDataInfo c_load_field_data_info,
|
||||
const char* dir_path);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@ -11,36 +11,20 @@
|
||||
|
||||
#include "segcore/load_index_c.h"
|
||||
|
||||
#include "common/CDataType.h"
|
||||
#include "common/FieldMeta.h"
|
||||
#include "common/Utils.h"
|
||||
#include "index/IndexFactory.h"
|
||||
#include "index/Meta.h"
|
||||
#include "index/Utils.h"
|
||||
#include "segcore/Types.h"
|
||||
#include "storage/Util.h"
|
||||
#include "storage/RemoteChunkManagerSingleton.h"
|
||||
#include "storage/LocalChunkManagerSingleton.h"
|
||||
|
||||
CStatus
|
||||
NewLoadIndexInfo(CLoadIndexInfo* c_load_index_info,
|
||||
CStorageConfig c_storage_config) {
|
||||
NewLoadIndexInfo(CLoadIndexInfo* c_load_index_info) {
|
||||
try {
|
||||
auto load_index_info =
|
||||
std::make_unique<milvus::segcore::LoadIndexInfo>();
|
||||
auto& storage_config = load_index_info->storage_config;
|
||||
storage_config.address = std::string(c_storage_config.address);
|
||||
storage_config.bucket_name = std::string(c_storage_config.bucket_name);
|
||||
storage_config.access_key_id =
|
||||
std::string(c_storage_config.access_key_id);
|
||||
storage_config.access_key_value =
|
||||
std::string(c_storage_config.access_key_value);
|
||||
storage_config.remote_root_path =
|
||||
std::string(c_storage_config.remote_root_path);
|
||||
storage_config.storage_type =
|
||||
std::string(c_storage_config.storage_type);
|
||||
storage_config.iam_endpoint =
|
||||
std::string(c_storage_config.iam_endpoint);
|
||||
storage_config.useSSL = c_storage_config.useSSL;
|
||||
storage_config.useIAM = c_storage_config.useIAM;
|
||||
|
||||
*c_load_index_info = load_index_info.release();
|
||||
auto status = CStatus();
|
||||
@ -143,11 +127,15 @@ appendVecIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
|
||||
load_index_info->field_id,
|
||||
load_index_info->index_build_id,
|
||||
load_index_info->index_version};
|
||||
auto remote_chunk_manager =
|
||||
milvus::storage::RemoteChunkManagerSingleton::GetInstance()
|
||||
.GetRemoteChunkManager();
|
||||
auto file_manager =
|
||||
milvus::storage::CreateFileManager(index_info.index_type,
|
||||
field_meta,
|
||||
index_meta,
|
||||
load_index_info->storage_config);
|
||||
remote_chunk_manager);
|
||||
AssertInfo(file_manager != nullptr, "create file manager failed!");
|
||||
|
||||
auto config = milvus::index::ParseConfigFromIndexParams(
|
||||
load_index_info->index_params);
|
||||
@ -212,6 +200,69 @@ AppendIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set) {
|
||||
return appendScalarIndex(c_load_index_info, c_binary_set);
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendIndexV2(CLoadIndexInfo c_load_index_info) {
|
||||
try {
|
||||
auto load_index_info =
|
||||
(milvus::segcore::LoadIndexInfo*)c_load_index_info;
|
||||
auto& index_params = load_index_info->index_params;
|
||||
auto field_type = load_index_info->field_type;
|
||||
|
||||
milvus::index::CreateIndexInfo index_info;
|
||||
index_info.field_type = load_index_info->field_type;
|
||||
|
||||
// get index type
|
||||
AssertInfo(index_params.find("index_type") != index_params.end(),
|
||||
"index type is empty");
|
||||
index_info.index_type = index_params.at("index_type");
|
||||
|
||||
// get metric type
|
||||
if (milvus::datatype_is_vector(field_type)) {
|
||||
AssertInfo(index_params.find("metric_type") != index_params.end(),
|
||||
"metric type is empty for vector index");
|
||||
index_info.metric_type = index_params.at("metric_type");
|
||||
}
|
||||
|
||||
// init file manager
|
||||
milvus::storage::FieldDataMeta field_meta{
|
||||
load_index_info->collection_id,
|
||||
load_index_info->partition_id,
|
||||
load_index_info->segment_id,
|
||||
load_index_info->field_id};
|
||||
milvus::storage::IndexMeta index_meta{load_index_info->segment_id,
|
||||
load_index_info->field_id,
|
||||
load_index_info->index_build_id,
|
||||
load_index_info->index_version};
|
||||
auto remote_chunk_manager =
|
||||
milvus::storage::RemoteChunkManagerSingleton::GetInstance()
|
||||
.GetRemoteChunkManager();
|
||||
auto file_manager =
|
||||
milvus::storage::CreateFileManager(index_info.index_type,
|
||||
field_meta,
|
||||
index_meta,
|
||||
remote_chunk_manager);
|
||||
AssertInfo(file_manager != nullptr, "create file manager failed!");
|
||||
|
||||
auto config = milvus::index::ParseConfigFromIndexParams(
|
||||
load_index_info->index_params);
|
||||
config["index_files"] = load_index_info->index_files;
|
||||
|
||||
load_index_info->index =
|
||||
milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
index_info, file_manager);
|
||||
load_index_info->index->Load(config);
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* c_file_path) {
|
||||
try {
|
||||
@ -261,12 +312,14 @@ CleanLoadedIndex(CLoadIndexInfo c_load_index_info) {
|
||||
try {
|
||||
auto load_index_info =
|
||||
(milvus::segcore::LoadIndexInfo*)c_load_index_info;
|
||||
auto index_file_path_prefix = milvus::storage::GenLocalIndexPathPrefix(
|
||||
load_index_info->index_build_id, load_index_info->index_version);
|
||||
#ifdef BUILD_DISK_ANN
|
||||
milvus::storage::LocalChunkManager::GetInstance().RemoveDir(
|
||||
index_file_path_prefix);
|
||||
#endif
|
||||
auto local_chunk_manager =
|
||||
milvus::storage::LocalChunkManagerSingleton::GetInstance()
|
||||
.GetChunkManager();
|
||||
auto index_file_path_prefix =
|
||||
milvus::storage::GenIndexPathPrefix(local_chunk_manager,
|
||||
load_index_info->index_build_id,
|
||||
load_index_info->index_version);
|
||||
local_chunk_manager->RemoveDir(index_file_path_prefix);
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
|
||||
@ -24,8 +24,7 @@ extern "C" {
|
||||
typedef void* CLoadIndexInfo;
|
||||
|
||||
CStatus
|
||||
NewLoadIndexInfo(CLoadIndexInfo* c_load_index_info,
|
||||
CStorageConfig c_storage_config);
|
||||
NewLoadIndexInfo(CLoadIndexInfo* c_load_index_info);
|
||||
|
||||
void
|
||||
DeleteLoadIndexInfo(CLoadIndexInfo c_load_index_info);
|
||||
@ -55,6 +54,9 @@ AppendIndex(CLoadIndexInfo c_load_index_info, CBinarySet c_binary_set);
|
||||
CStatus
|
||||
AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* file_path);
|
||||
|
||||
CStatus
|
||||
AppendIndexV2(CLoadIndexInfo c_load_index_info);
|
||||
|
||||
CStatus
|
||||
CleanLoadedIndex(CLoadIndexInfo c_load_index_info);
|
||||
|
||||
|
||||
@ -17,12 +17,12 @@
|
||||
#include "common/Tracer.h"
|
||||
#include "common/type_c.h"
|
||||
#include "google/protobuf/text_format.h"
|
||||
#include "index/IndexInfo.h"
|
||||
#include "log/Log.h"
|
||||
#include "segcore/Collection.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
#include "segcore/SegcoreConfig.h"
|
||||
#include "storage/Util.h"
|
||||
#include "mmap/Types.h"
|
||||
|
||||
////////////////////////////// common interfaces //////////////////////////////
|
||||
CSegmentInterface
|
||||
@ -228,22 +228,51 @@ Delete(CSegmentInterface c_segment,
|
||||
////////////////////////////// interfaces for sealed segment //////////////////////////////
|
||||
CStatus
|
||||
LoadFieldData(CSegmentInterface c_segment,
|
||||
CLoadFieldDataInfo load_field_data_info) {
|
||||
CLoadFieldDataInfo c_load_field_data_info) {
|
||||
try {
|
||||
auto segment =
|
||||
reinterpret_cast<milvus::segcore::SegmentInterface*>(c_segment);
|
||||
AssertInfo(segment != nullptr, "segment conversion failed");
|
||||
auto load_info = (LoadFieldDataInfo*)c_load_field_data_info;
|
||||
segment->LoadFieldData(*load_info);
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(UnexpectedError, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
// just for test
|
||||
CStatus
|
||||
LoadFieldRawData(CSegmentInterface c_segment,
|
||||
int64_t field_id,
|
||||
const void* data,
|
||||
int64_t row_count) {
|
||||
try {
|
||||
auto segment_interface =
|
||||
reinterpret_cast<milvus::segcore::SegmentInterface*>(c_segment);
|
||||
auto segment =
|
||||
dynamic_cast<milvus::segcore::SegmentSealed*>(segment_interface);
|
||||
AssertInfo(segment != nullptr, "segment conversion failed");
|
||||
auto field_data = std::make_unique<milvus::DataArray>();
|
||||
auto suc = field_data->ParseFromArray(load_field_data_info.blob,
|
||||
load_field_data_info.blob_size);
|
||||
AssertInfo(suc, "unmarshal field data string failed");
|
||||
auto load_info = LoadFieldDataInfo{load_field_data_info.field_id,
|
||||
field_data.get(),
|
||||
load_field_data_info.row_count,
|
||||
load_field_data_info.mmap_dir_path};
|
||||
segment->LoadFieldData(load_info);
|
||||
milvus::DataType data_type;
|
||||
int64_t dim = 1;
|
||||
if (milvus::SystemProperty::Instance().IsSystem(
|
||||
milvus::FieldId(field_id))) {
|
||||
data_type = milvus::DataType::INT64;
|
||||
} else {
|
||||
auto field_meta = segment->get_schema()[milvus::FieldId(field_id)];
|
||||
data_type = field_meta.get_data_type();
|
||||
|
||||
if (milvus::datatype_is_vector(data_type)) {
|
||||
dim = field_meta.get_dim();
|
||||
}
|
||||
}
|
||||
auto field_data = milvus::storage::CreateFieldData(data_type, dim);
|
||||
field_data->FillFieldData(data, row_count);
|
||||
auto field_data_info = milvus::FieldDataInfo{
|
||||
field_id,
|
||||
row_count,
|
||||
std::vector<milvus::storage::FieldDataPtr>{field_data}};
|
||||
segment->LoadFieldData(milvus::FieldId(field_id), field_data_info);
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(UnexpectedError, e.what());
|
||||
|
||||
@ -22,6 +22,7 @@ extern "C" {
|
||||
#include "common/type_c.h"
|
||||
#include "segcore/plan_c.h"
|
||||
#include "segcore/load_index_c.h"
|
||||
#include "segcore/load_field_data_c.h"
|
||||
|
||||
typedef void* CSegmentInterface;
|
||||
typedef void* CSearchResult;
|
||||
@ -88,6 +89,12 @@ CStatus
|
||||
LoadFieldData(CSegmentInterface c_segment,
|
||||
CLoadFieldDataInfo load_field_data_info);
|
||||
|
||||
CStatus
|
||||
LoadFieldRawData(CSegmentInterface c_segment,
|
||||
int64_t field_id,
|
||||
const void* data,
|
||||
int64_t row_count);
|
||||
|
||||
CStatus
|
||||
LoadDeletedRecord(CSegmentInterface c_segment,
|
||||
CLoadDeletedRecordInfo deleted_record_info);
|
||||
|
||||
@ -1,3 +1,14 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
/**
|
||||
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0.
|
||||
|
||||
@ -1,3 +1,14 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
/**
|
||||
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0.
|
||||
|
||||
@ -1,3 +1,14 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
/**
|
||||
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0.
|
||||
|
||||
@ -1,3 +1,14 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
/**
|
||||
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0.
|
||||
|
||||
@ -37,8 +37,8 @@ BinlogReader::Read(int64_t nbytes) {
|
||||
Status(SERVER_UNEXPECTED_ERROR, "out range of binlog data"),
|
||||
nullptr);
|
||||
}
|
||||
auto res = std::shared_ptr<uint8_t[]>(new uint8_t[nbytes]);
|
||||
std::memcpy(res.get(), data_.get() + tell_, nbytes);
|
||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
||||
auto res = std::shared_ptr<uint8_t[]>(data_.get() + tell_, deleter);
|
||||
tell_ += nbytes;
|
||||
return std::make_pair(Status(SERVER_SUCCESS, ""), res);
|
||||
}
|
||||
|
||||
@ -31,12 +31,6 @@ class BinlogReader {
|
||||
: data_(binlog_data), size_(length), tell_(0) {
|
||||
}
|
||||
|
||||
explicit BinlogReader(const uint8_t* binlog_data, int64_t length)
|
||||
: size_(length), tell_(0) {
|
||||
data_ = std::shared_ptr<uint8_t[]>(new uint8_t[length]);
|
||||
std::memcpy(data_.get(), binlog_data, length);
|
||||
}
|
||||
|
||||
Status
|
||||
Read(int64_t nbytes, void* out);
|
||||
|
||||
|
||||
@ -31,22 +31,17 @@ set(STORAGE_FILES
|
||||
PayloadReader.cpp
|
||||
PayloadWriter.cpp
|
||||
BinlogReader.cpp
|
||||
FieldDataFactory.cpp
|
||||
IndexData.cpp
|
||||
IndexData.cpp
|
||||
InsertData.cpp
|
||||
Event.cpp
|
||||
ThreadPool.cpp
|
||||
storage_c.cpp)
|
||||
|
||||
if(BUILD_DISK_ANN STREQUAL "ON")
|
||||
set(STORAGE_FILES
|
||||
${STORAGE_FILES}
|
||||
LocalChunkManager.cpp
|
||||
MinioChunkManager.cpp
|
||||
AliyunSTSClient.cpp
|
||||
AliyunCredentialsProvider.cpp
|
||||
DiskFileManagerImpl.cpp)
|
||||
endif()
|
||||
storage_c.cpp
|
||||
MinioChunkManager.cpp
|
||||
AliyunSTSClient.cpp
|
||||
AliyunCredentialsProvider.cpp
|
||||
MemFileManagerImpl.cpp
|
||||
LocalChunkManager.cpp
|
||||
DiskFileManagerImpl.cpp)
|
||||
|
||||
add_library(milvus_storage SHARED ${STORAGE_FILES})
|
||||
|
||||
|
||||
@ -20,6 +20,7 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
@ -112,23 +113,23 @@ class ChunkManager {
|
||||
*/
|
||||
virtual std::string
|
||||
GetName() const = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief RemoteChunkManager is responsible for read and write Remote file
|
||||
* that inherited from ChunkManager.
|
||||
*/
|
||||
|
||||
class RemoteChunkManager : public ChunkManager {
|
||||
public:
|
||||
virtual ~RemoteChunkManager() {
|
||||
}
|
||||
/**
|
||||
* @brief Get the Root Path
|
||||
* @return std::string
|
||||
*/
|
||||
virtual std::string
|
||||
GetName() const {
|
||||
return "RemoteChunkManager";
|
||||
}
|
||||
GetRootPath() const = 0;
|
||||
};
|
||||
|
||||
using RemoteChunkManagerPtr = std::unique_ptr<RemoteChunkManager>;
|
||||
using ChunkManagerPtr = std::shared_ptr<ChunkManager>;
|
||||
|
||||
enum ChunkManagerType : int8_t {
|
||||
None_CM = 0,
|
||||
Local = 1,
|
||||
Minio = 2,
|
||||
};
|
||||
|
||||
extern std::map<std::string, ChunkManagerType> ChunkManagerType_Map;
|
||||
|
||||
} // namespace milvus::storage
|
||||
|
||||
@ -40,7 +40,7 @@ DeserializeRemoteFileData(BinlogReaderPtr reader) {
|
||||
switch (header.event_type_) {
|
||||
case EventType::InsertEvent: {
|
||||
auto event_data_length =
|
||||
header.event_length_ - header.next_position_;
|
||||
header.event_length_ - GetEventHeaderSize(header);
|
||||
auto insert_event_data =
|
||||
InsertEventData(reader, event_data_length, data_type);
|
||||
auto insert_data =
|
||||
@ -52,11 +52,26 @@ DeserializeRemoteFileData(BinlogReaderPtr reader) {
|
||||
}
|
||||
case EventType::IndexFileEvent: {
|
||||
auto event_data_length =
|
||||
header.event_length_ - header.next_position_;
|
||||
header.event_length_ - GetEventHeaderSize(header);
|
||||
auto index_event_data =
|
||||
IndexEventData(reader, event_data_length, data_type);
|
||||
auto index_data =
|
||||
std::make_unique<IndexData>(index_event_data.field_data);
|
||||
auto field_data = index_event_data.field_data;
|
||||
// for compatible with golang indexcode.Serialize, which set dataType to String
|
||||
if (data_type == DataType::STRING) {
|
||||
AssertInfo(field_data->get_data_type() == DataType::STRING,
|
||||
"wrong index type in index binlog file");
|
||||
AssertInfo(
|
||||
field_data->get_num_rows() == 1,
|
||||
"wrong length of string num in old index binlog file");
|
||||
auto new_field_data = CreateFieldData(DataType::INT8);
|
||||
new_field_data->FillFieldData(
|
||||
(*static_cast<const std::string*>(field_data->RawValue(0)))
|
||||
.c_str(),
|
||||
field_data->Size());
|
||||
field_data = new_field_data;
|
||||
}
|
||||
|
||||
auto index_data = std::make_unique<IndexData>(field_data);
|
||||
index_data->SetFieldDataMeta(data_meta);
|
||||
IndexMeta index_meta;
|
||||
index_meta.segment_id = data_meta.segment_id;
|
||||
|
||||
@ -22,59 +22,28 @@
|
||||
#include "common/Common.h"
|
||||
#include "common/Slice.h"
|
||||
#include "log/Log.h"
|
||||
#include "config/ConfigKnowhere.h"
|
||||
|
||||
#include "storage/DiskFileManagerImpl.h"
|
||||
#include "storage/LocalChunkManager.h"
|
||||
#include "storage/MinioChunkManager.h"
|
||||
#include "storage/LocalChunkManagerSingleton.h"
|
||||
#include "storage/Exception.h"
|
||||
#include "storage/FieldData.h"
|
||||
#include "storage/IndexData.h"
|
||||
#include "storage/ThreadPool.h"
|
||||
#include "storage/Util.h"
|
||||
#include "storage/FieldDataFactory.h"
|
||||
|
||||
#define FILEMANAGER_TRY try {
|
||||
#define FILEMANAGER_CATCH \
|
||||
} \
|
||||
catch (LocalChunkManagerException & e) { \
|
||||
LOG_SEGCORE_ERROR_ << "LocalChunkManagerException:" << e.what(); \
|
||||
return false; \
|
||||
} \
|
||||
catch (MinioException & e) { \
|
||||
LOG_SEGCORE_ERROR_ << "milvus::storage::MinioException:" << e.what(); \
|
||||
return false; \
|
||||
} \
|
||||
catch (DiskANNFileManagerException & e) { \
|
||||
LOG_SEGCORE_ERROR_ << "milvus::storage::DiskANNFileManagerException:" \
|
||||
<< e.what(); \
|
||||
return false; \
|
||||
} \
|
||||
catch (ArrowException & e) { \
|
||||
LOG_SEGCORE_ERROR_ << "milvus::storage::ArrowException:" << e.what(); \
|
||||
return false; \
|
||||
} \
|
||||
catch (std::exception & e) { \
|
||||
LOG_SEGCORE_ERROR_ << "Exception:" << e.what(); \
|
||||
return false;
|
||||
#define FILEMANAGER_END }
|
||||
|
||||
using ReadLock = std::shared_lock<std::shared_mutex>;
|
||||
using WriteLock = std::lock_guard<std::shared_mutex>;
|
||||
#include "storage/ThreadPool.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
DiskFileManagerImpl::DiskFileManagerImpl(const FieldDataMeta& field_meta,
|
||||
DiskFileManagerImpl::DiskFileManagerImpl(const FieldDataMeta& field_mata,
|
||||
IndexMeta index_meta,
|
||||
const StorageConfig& storage_config)
|
||||
: field_meta_(field_meta), index_meta_(std::move(index_meta)) {
|
||||
remote_root_path_ = storage_config.remote_root_path;
|
||||
rcm_ = std::make_unique<MinioChunkManager>(storage_config);
|
||||
ChunkManagerPtr remote_chunk_manager)
|
||||
: FileManagerImpl(field_mata, index_meta) {
|
||||
rcm_ = remote_chunk_manager;
|
||||
}
|
||||
|
||||
DiskFileManagerImpl::~DiskFileManagerImpl() {
|
||||
auto& local_chunk_manager = LocalChunkManager::GetInstance();
|
||||
local_chunk_manager.RemoveDir(
|
||||
GetLocalIndexPathPrefixWithBuildID(index_meta_.build_id));
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
local_chunk_manager->RemoveDir(GetIndexPathPrefixWithBuildID(
|
||||
local_chunk_manager, index_meta_.build_id));
|
||||
}
|
||||
|
||||
bool
|
||||
@ -82,38 +51,19 @@ DiskFileManagerImpl::LoadFile(const std::string& file) noexcept {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::pair<std::string, size_t>
|
||||
EncodeAndUploadIndexSlice(RemoteChunkManager* remote_chunk_manager,
|
||||
const std::string& file,
|
||||
int64_t offset,
|
||||
int64_t batch_size,
|
||||
const IndexMeta& index_meta,
|
||||
FieldDataMeta field_meta,
|
||||
std::string object_key) {
|
||||
auto& local_chunk_manager = LocalChunkManager::GetInstance();
|
||||
auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[batch_size]);
|
||||
local_chunk_manager.Read(file, offset, buf.get(), batch_size);
|
||||
|
||||
auto field_data =
|
||||
milvus::storage::FieldDataFactory::GetInstance().CreateFieldData(
|
||||
DataType::INT8);
|
||||
field_data->FillFieldData(buf.get(), batch_size);
|
||||
auto indexData = std::make_shared<IndexData>(field_data);
|
||||
indexData->set_index_meta(index_meta);
|
||||
indexData->SetFieldDataMeta(field_meta);
|
||||
auto serialized_index_data = indexData->serialize_to_remote_file();
|
||||
auto serialized_index_size = serialized_index_data.size();
|
||||
remote_chunk_manager->Write(
|
||||
object_key, serialized_index_data.data(), serialized_index_size);
|
||||
return std::make_pair(std::move(object_key), serialized_index_size);
|
||||
std::string
|
||||
DiskFileManagerImpl::GetRemoteIndexPath(const std::string& file_name,
|
||||
int64_t slice_num) const {
|
||||
auto remote_prefix = GetRemoteIndexObjectPrefix();
|
||||
return remote_prefix + "/" + file_name + "_" + std::to_string(slice_num);
|
||||
}
|
||||
|
||||
bool
|
||||
DiskFileManagerImpl::AddFile(const std::string& file) noexcept {
|
||||
auto& local_chunk_manager = LocalChunkManager::GetInstance();
|
||||
auto& pool = ThreadPool::GetInstance();
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
FILEMANAGER_TRY
|
||||
if (!local_chunk_manager.Exist(file)) {
|
||||
if (!local_chunk_manager->Exist(file)) {
|
||||
LOG_SEGCORE_ERROR_ << "local file: " << file << " does not exist ";
|
||||
return false;
|
||||
}
|
||||
@ -122,15 +72,15 @@ DiskFileManagerImpl::AddFile(const std::string& file) noexcept {
|
||||
local_paths_.emplace_back(file);
|
||||
|
||||
auto fileName = GetFileName(file);
|
||||
auto fileSize = local_chunk_manager.Size(file);
|
||||
auto fileSize = local_chunk_manager->Size(file);
|
||||
|
||||
std::vector<std::string> batch_remote_files;
|
||||
std::vector<int64_t> remote_file_sizes;
|
||||
std::vector<int64_t> local_file_offsets;
|
||||
|
||||
int slice_num = 0;
|
||||
auto parallel_degree = uint64_t(DEFAULT_DISK_INDEX_MAX_MEMORY_LIMIT /
|
||||
(index_file_slice_size << 20));
|
||||
auto parallel_degree =
|
||||
uint64_t(DEFAULT_FIELD_MAX_MEMORY_LIMIT / FILE_SLICE_SIZE);
|
||||
for (int64_t offset = 0; offset < fileSize; slice_num++) {
|
||||
if (batch_remote_files.size() >= parallel_degree) {
|
||||
AddBatchIndexFiles(file,
|
||||
@ -142,10 +92,9 @@ DiskFileManagerImpl::AddFile(const std::string& file) noexcept {
|
||||
local_file_offsets.clear();
|
||||
}
|
||||
|
||||
auto batch_size =
|
||||
std::min(index_file_slice_size << 20, int64_t(fileSize) - offset);
|
||||
auto batch_size = std::min(FILE_SLICE_SIZE, int64_t(fileSize) - offset);
|
||||
batch_remote_files.emplace_back(
|
||||
GenerateRemoteIndexFile(fileName, slice_num));
|
||||
GetRemoteIndexPath(fileName, slice_num));
|
||||
remote_file_sizes.emplace_back(batch_size);
|
||||
local_file_offsets.emplace_back(offset);
|
||||
offset += batch_size;
|
||||
@ -166,35 +115,57 @@ DiskFileManagerImpl::AddBatchIndexFiles(
|
||||
const std::vector<int64_t>& local_file_offsets,
|
||||
const std::vector<std::string>& remote_files,
|
||||
const std::vector<int64_t>& remote_file_sizes) {
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
auto& pool = ThreadPool::GetInstance();
|
||||
|
||||
std::vector<std::future<std::pair<std::string, size_t>>> futures;
|
||||
auto LoadIndexFromDisk = [&](
|
||||
const std::string& file,
|
||||
const int64_t offset,
|
||||
const int64_t data_size) -> std::shared_ptr<uint8_t[]> {
|
||||
auto buf = std::shared_ptr<uint8_t[]>(new uint8_t[data_size]);
|
||||
local_chunk_manager->Read(file, offset, buf.get(), data_size);
|
||||
return buf;
|
||||
};
|
||||
|
||||
std::vector<std::future<std::shared_ptr<uint8_t[]>>> futures;
|
||||
AssertInfo(local_file_offsets.size() == remote_files.size(),
|
||||
"inconsistent size of offset slices with file slices");
|
||||
AssertInfo(remote_files.size() == remote_file_sizes.size(),
|
||||
"inconsistent size of file slices with size slices");
|
||||
|
||||
for (int64_t i = 0; i < remote_files.size(); ++i) {
|
||||
futures.push_back(pool.Submit(EncodeAndUploadIndexSlice,
|
||||
rcm_.get(),
|
||||
futures.push_back(pool.Submit(LoadIndexFromDisk,
|
||||
local_file_name,
|
||||
local_file_offsets[i],
|
||||
remote_file_sizes[i],
|
||||
index_meta_,
|
||||
field_meta_,
|
||||
remote_files[i]));
|
||||
remote_file_sizes[i]));
|
||||
}
|
||||
|
||||
// hold index data util upload index file done
|
||||
std::vector<std::shared_ptr<uint8_t[]>> index_datas;
|
||||
std::vector<const uint8_t*> data_slices;
|
||||
for (auto& future : futures) {
|
||||
auto res = future.get();
|
||||
remote_paths_to_size_[res.first] = res.second;
|
||||
index_datas.emplace_back(res);
|
||||
data_slices.emplace_back(res.get());
|
||||
}
|
||||
|
||||
auto res = PutIndexData(rcm_.get(),
|
||||
data_slices,
|
||||
remote_file_sizes,
|
||||
remote_files,
|
||||
field_meta_,
|
||||
index_meta_);
|
||||
for (auto iter = res.begin(); iter != res.end(); ++iter) {
|
||||
remote_paths_to_size_[iter->first] = iter->second;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
DiskFileManagerImpl::CacheIndexToDisk(
|
||||
const std::vector<std::string>& remote_files) {
|
||||
auto& local_chunk_manager = LocalChunkManager::GetInstance();
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
|
||||
std::map<std::string, std::vector<int>> index_slices;
|
||||
for (auto& file_path : remote_files) {
|
||||
@ -209,7 +180,7 @@ DiskFileManagerImpl::CacheIndexToDisk(
|
||||
|
||||
auto EstimateParallelDegree = [&](const std::string& file) -> uint64_t {
|
||||
auto fileSize = rcm_->Size(file);
|
||||
return uint64_t(DEFAULT_DISK_INDEX_MAX_MEMORY_LIMIT / fileSize);
|
||||
return uint64_t(DEFAULT_FIELD_MAX_MEMORY_LIMIT / fileSize);
|
||||
};
|
||||
|
||||
for (auto& slices : index_slices) {
|
||||
@ -217,7 +188,7 @@ DiskFileManagerImpl::CacheIndexToDisk(
|
||||
auto local_index_file_name =
|
||||
GetLocalIndexObjectPrefix() +
|
||||
prefix.substr(prefix.find_last_of('/') + 1);
|
||||
local_chunk_manager.CreateFile(local_index_file_name);
|
||||
local_chunk_manager->CreateFile(local_index_file_name);
|
||||
int64_t offset = 0;
|
||||
std::vector<std::string> batch_remote_files;
|
||||
uint64_t max_parallel_degree = INT_MAX;
|
||||
@ -245,72 +216,125 @@ DiskFileManagerImpl::CacheIndexToDisk(
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<DataCodec>
|
||||
DownloadAndDecodeRemoteIndexfile(RemoteChunkManager* remote_chunk_manager,
|
||||
const std::string& file) {
|
||||
auto fileSize = remote_chunk_manager->Size(file);
|
||||
auto buf = std::shared_ptr<uint8_t[]>(new uint8_t[fileSize]);
|
||||
remote_chunk_manager->Read(file, buf.get(), fileSize);
|
||||
|
||||
return DeserializeFileData(buf, fileSize);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
DiskFileManagerImpl::CacheBatchIndexFilesToDisk(
|
||||
const std::vector<std::string>& remote_files,
|
||||
const std::string& local_file_name,
|
||||
uint64_t local_file_init_offfset) {
|
||||
auto& local_chunk_manager = LocalChunkManager::GetInstance();
|
||||
auto& pool = ThreadPool::GetInstance();
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
auto index_datas = GetObjectData(rcm_.get(), remote_files);
|
||||
int batch_size = remote_files.size();
|
||||
|
||||
std::vector<std::future<std::unique_ptr<DataCodec>>> futures;
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
futures.push_back(pool.Submit(
|
||||
DownloadAndDecodeRemoteIndexfile, rcm_.get(), remote_files[i]));
|
||||
}
|
||||
AssertInfo(index_datas.size() == batch_size,
|
||||
"inconsistent file num and index data num!");
|
||||
|
||||
uint64_t offset = local_file_init_offfset;
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
auto res = futures[i].get();
|
||||
auto index_data = res->GetFieldData();
|
||||
auto index_data = index_datas[i];
|
||||
auto index_size = index_data->Size();
|
||||
local_chunk_manager.Write(
|
||||
local_file_name,
|
||||
offset,
|
||||
reinterpret_cast<uint8_t*>(const_cast<void*>(index_data->Data())),
|
||||
index_size);
|
||||
auto uint8_data =
|
||||
reinterpret_cast<uint8_t*>(const_cast<void*>(index_data->Data()));
|
||||
local_chunk_manager->Write(
|
||||
local_file_name, offset, uint8_data, index_size);
|
||||
offset += index_size;
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk(std::vector<std::string> remote_files) {
|
||||
std::sort(remote_files.begin(),
|
||||
remote_files.end(),
|
||||
[](const std::string& a, const std::string& b) {
|
||||
return std::stol(a.substr(a.find_last_of("/") + 1)) <
|
||||
std::stol(b.substr(b.find_last_of("/") + 1));
|
||||
});
|
||||
|
||||
auto segment_id = GetFieldDataMeta().segment_id;
|
||||
auto field_id = GetFieldDataMeta().field_id;
|
||||
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
auto local_data_path = storage::GenFieldRawDataPathPrefix(
|
||||
local_chunk_manager, segment_id, field_id) +
|
||||
"raw_data";
|
||||
local_chunk_manager->CreateFile(local_data_path);
|
||||
|
||||
// get batch raw data from s3 and write batch data to disk file
|
||||
// TODO: load and write of different batches at the same time
|
||||
std::vector<std::string> batch_files;
|
||||
|
||||
// file format
|
||||
// num_rows(uint32) | dim(uint32) | index_data ([]uint8_t)
|
||||
uint32_t num_rows = 0;
|
||||
uint32_t dim = 0;
|
||||
int64_t write_offset = sizeof(num_rows) + sizeof(dim);
|
||||
|
||||
auto FetchRawData = [&]() {
|
||||
auto field_datas = GetObjectData(rcm_.get(), batch_files);
|
||||
int batch_size = batch_files.size();
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
auto field_data = field_datas[i];
|
||||
num_rows += uint32_t(field_data->get_num_rows());
|
||||
AssertInfo(dim == 0 || dim == field_data->get_dim(),
|
||||
"inconsistent dim value in multi binlogs!");
|
||||
dim = field_data->get_dim();
|
||||
|
||||
auto data_size = field_data->get_num_rows() * dim * sizeof(float);
|
||||
local_chunk_manager->Write(local_data_path,
|
||||
write_offset,
|
||||
const_cast<void*>(field_data->Data()),
|
||||
data_size);
|
||||
write_offset += data_size;
|
||||
}
|
||||
};
|
||||
|
||||
auto parallel_degree =
|
||||
uint64_t(DEFAULT_FIELD_MAX_MEMORY_LIMIT / FILE_SLICE_SIZE);
|
||||
for (auto& file : remote_files) {
|
||||
if (batch_files.size() >= parallel_degree) {
|
||||
FetchRawData();
|
||||
batch_files.clear();
|
||||
}
|
||||
|
||||
batch_files.emplace_back(file);
|
||||
}
|
||||
|
||||
if (batch_files.size() > 0) {
|
||||
FetchRawData();
|
||||
}
|
||||
|
||||
// write num_rows and dim value to file header
|
||||
write_offset = 0;
|
||||
local_chunk_manager->Write(
|
||||
local_data_path, write_offset, &num_rows, sizeof(num_rows));
|
||||
write_offset += sizeof(num_rows);
|
||||
local_chunk_manager->Write(
|
||||
local_data_path, write_offset, &dim, sizeof(dim));
|
||||
|
||||
return local_data_path;
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskFileManagerImpl::GetFileName(const std::string& localfile) {
|
||||
boost::filesystem::path localPath(localfile);
|
||||
return localPath.filename().string();
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskFileManagerImpl::GetRemoteIndexObjectPrefix() const {
|
||||
return remote_root_path_ + "/" + std::string(INDEX_ROOT_PATH) + "/" +
|
||||
std::to_string(index_meta_.build_id) + "/" +
|
||||
std::to_string(index_meta_.index_version) + "/" +
|
||||
std::to_string(field_meta_.partition_id) + "/" +
|
||||
std::to_string(field_meta_.segment_id);
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskFileManagerImpl::GetLocalIndexObjectPrefix() {
|
||||
return GenLocalIndexPathPrefix(index_meta_.build_id,
|
||||
index_meta_.index_version);
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
return GenIndexPathPrefix(
|
||||
local_chunk_manager, index_meta_.build_id, index_meta_.index_version);
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskFileManagerImpl::GetLocalRawDataObjectPrefix() {
|
||||
return GenFieldRawDataPathPrefix(field_meta_.segment_id,
|
||||
field_meta_.field_id);
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
return GenFieldRawDataPathPrefix(
|
||||
local_chunk_manager, field_meta_.segment_id, field_meta_.field_id);
|
||||
}
|
||||
|
||||
bool
|
||||
@ -322,9 +346,10 @@ DiskFileManagerImpl::RemoveFile(const std::string& file) noexcept {
|
||||
std::optional<bool>
|
||||
DiskFileManagerImpl::IsExisted(const std::string& file) noexcept {
|
||||
bool isExist = false;
|
||||
auto& local_chunk_manager = LocalChunkManager::GetInstance();
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
try {
|
||||
isExist = local_chunk_manager.Exist(file);
|
||||
isExist = local_chunk_manager->Exist(file);
|
||||
} catch (LocalChunkManagerException& e) {
|
||||
// LOG_SEGCORE_DEBUG_ << "LocalChunkManagerException:"
|
||||
// << e.what();
|
||||
|
||||
@ -24,8 +24,7 @@
|
||||
|
||||
#include "storage/IndexData.h"
|
||||
#include "storage/FileManager.h"
|
||||
#include "storage/LocalChunkManager.h"
|
||||
#include "storage/MinioChunkManager.h"
|
||||
#include "storage/ChunkManager.h"
|
||||
|
||||
#include "common/Consts.h"
|
||||
|
||||
@ -33,9 +32,9 @@ namespace milvus::storage {
|
||||
|
||||
class DiskFileManagerImpl : public FileManagerImpl {
|
||||
public:
|
||||
explicit DiskFileManagerImpl(const FieldDataMeta& field_meta,
|
||||
explicit DiskFileManagerImpl(const FieldDataMeta& field_mata,
|
||||
IndexMeta index_meta,
|
||||
const StorageConfig& storage_config);
|
||||
ChunkManagerPtr remote_chunk_manager);
|
||||
|
||||
virtual ~DiskFileManagerImpl();
|
||||
|
||||
@ -57,9 +56,6 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
||||
return "DiskFileManagerImpl";
|
||||
}
|
||||
|
||||
std::string
|
||||
GetRemoteIndexObjectPrefix() const;
|
||||
|
||||
std::string
|
||||
GetLocalIndexObjectPrefix();
|
||||
|
||||
@ -76,13 +72,6 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
||||
return local_paths_;
|
||||
}
|
||||
|
||||
std::string
|
||||
GenerateRemoteIndexFile(const std::string& file_name,
|
||||
int64_t slice_num) const {
|
||||
return GetRemoteIndexObjectPrefix() + "/" + file_name + "_" +
|
||||
std::to_string(slice_num);
|
||||
}
|
||||
|
||||
void
|
||||
CacheIndexToDisk(const std::vector<std::string>& remote_files);
|
||||
|
||||
@ -97,15 +86,8 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
||||
const std::vector<std::string>& remote_files,
|
||||
const std::vector<int64_t>& remote_file_sizes);
|
||||
|
||||
FieldDataMeta
|
||||
GetFileDataMeta() const {
|
||||
return field_meta_;
|
||||
}
|
||||
|
||||
IndexMeta
|
||||
GetIndexMeta() const {
|
||||
return index_meta_;
|
||||
}
|
||||
std::string
|
||||
CacheRawDataToDisk(std::vector<std::string> remote_files);
|
||||
|
||||
private:
|
||||
int64_t
|
||||
@ -116,21 +98,15 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
||||
std::string
|
||||
GetFileName(const std::string& localfile);
|
||||
|
||||
std::string
|
||||
GetRemoteIndexPath(const std::string& file_name, int64_t slice_num) const;
|
||||
|
||||
private:
|
||||
// collection meta
|
||||
FieldDataMeta field_meta_;
|
||||
|
||||
// index meta
|
||||
IndexMeta index_meta_;
|
||||
|
||||
// local file path (abs path)
|
||||
std::vector<std::string> local_paths_;
|
||||
|
||||
// remote file path
|
||||
std::map<std::string, int64_t> remote_paths_to_size_;
|
||||
|
||||
RemoteChunkManagerPtr rcm_;
|
||||
std::string remote_root_path_;
|
||||
};
|
||||
|
||||
using DiskANNFileManagerImplPtr = std::shared_ptr<DiskFileManagerImpl>;
|
||||
|
||||
@ -15,10 +15,8 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "storage/Event.h"
|
||||
#include "storage/Util.h"
|
||||
#include "storage/PayloadReader.h"
|
||||
#include "storage/PayloadWriter.h"
|
||||
#include "storage/FieldDataFactory.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "utils/Json.h"
|
||||
#include "common/Consts.h"
|
||||
@ -219,19 +217,42 @@ BaseEventData::Serialize() {
|
||||
} else {
|
||||
payload_writer = std::make_unique<PayloadWriter>(data_type);
|
||||
}
|
||||
if (datatype_is_string(data_type)) {
|
||||
for (size_t offset = 0; offset < field_data->get_num_rows(); ++offset) {
|
||||
payload_writer->add_one_string_payload(
|
||||
reinterpret_cast<const char*>(field_data->RawValue(offset)),
|
||||
field_data->get_element_size(offset));
|
||||
switch (data_type) {
|
||||
case DataType::VARCHAR:
|
||||
case DataType::STRING: {
|
||||
for (size_t offset = 0; offset < field_data->get_num_rows();
|
||||
++offset) {
|
||||
auto str = static_cast<const std::string*>(
|
||||
field_data->RawValue(offset));
|
||||
payload_writer->add_one_string_payload(str->c_str(),
|
||||
str->size());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::ARRAY:
|
||||
case DataType::JSON: {
|
||||
for (size_t offset = 0; offset < field_data->get_num_rows();
|
||||
++offset) {
|
||||
auto string_view =
|
||||
static_cast<const Json*>(field_data->RawValue(offset))
|
||||
->data();
|
||||
payload_writer->add_one_binary_payload(
|
||||
reinterpret_cast<const uint8_t*>(
|
||||
std::string(string_view).c_str()),
|
||||
string_view.size());
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
auto payload =
|
||||
Payload{data_type,
|
||||
static_cast<const uint8_t*>(field_data->Data()),
|
||||
field_data->get_num_rows(),
|
||||
field_data->get_dim()};
|
||||
payload_writer->add_payload(payload);
|
||||
}
|
||||
} else {
|
||||
auto payload = Payload{data_type,
|
||||
static_cast<const uint8_t*>(field_data->Data()),
|
||||
field_data->get_num_rows(),
|
||||
field_data->get_dim()};
|
||||
payload_writer->add_payload(payload);
|
||||
}
|
||||
|
||||
payload_writer->finish();
|
||||
auto payload_buffer = payload_writer->get_payload_buffer();
|
||||
auto len =
|
||||
@ -250,7 +271,7 @@ BaseEventData::Serialize() {
|
||||
BaseEvent::BaseEvent(BinlogReaderPtr reader, DataType data_type) {
|
||||
event_header = EventHeader(reader);
|
||||
auto event_data_length =
|
||||
event_header.event_length_ - event_header.next_position_;
|
||||
event_header.event_length_ - GetEventHeaderSize(event_header);
|
||||
event_data = BaseEventData(reader, event_data_length, data_type);
|
||||
}
|
||||
|
||||
@ -259,8 +280,8 @@ BaseEvent::Serialize() {
|
||||
auto data = event_data.Serialize();
|
||||
int data_size = data.size();
|
||||
|
||||
event_header.next_position_ = GetEventHeaderSize(event_header);
|
||||
event_header.event_length_ = event_header.next_position_ + data_size;
|
||||
event_header.event_length_ = GetEventHeaderSize(event_header) + data_size;
|
||||
event_header.next_position_ = event_header.event_length_ + event_offset;
|
||||
auto header = event_header.Serialize();
|
||||
int header_size = header.size();
|
||||
|
||||
@ -281,12 +302,11 @@ DescriptorEvent::DescriptorEvent(BinlogReaderPtr reader) {
|
||||
|
||||
std::vector<uint8_t>
|
||||
DescriptorEvent::Serialize() {
|
||||
event_header.event_type_ = EventType::DescriptorEvent;
|
||||
auto data = event_data.Serialize();
|
||||
int data_size = data.size();
|
||||
|
||||
event_header.event_type_ = EventType::DescriptorEvent;
|
||||
event_header.next_position_ = GetEventHeaderSize(event_header);
|
||||
event_header.event_length_ = event_header.next_position_ + data_size;
|
||||
event_header.event_length_ = GetEventHeaderSize(event_header) + data_size;
|
||||
auto header = event_header.Serialize();
|
||||
int header_size = header.size();
|
||||
|
||||
@ -298,6 +318,8 @@ DescriptorEvent::Serialize() {
|
||||
memcpy(res.data() + offset, header.data(), header_size);
|
||||
offset += header_size;
|
||||
memcpy(res.data() + offset, data.data(), data_size);
|
||||
offset += data_size;
|
||||
event_header.next_position_ = offset;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -99,6 +99,7 @@ struct DescriptorEvent {
|
||||
struct BaseEvent {
|
||||
EventHeader event_header;
|
||||
BaseEventData event_data;
|
||||
int64_t event_offset;
|
||||
|
||||
BaseEvent() = default;
|
||||
explicit BaseEvent(BinlogReaderPtr reader, DataType data_type);
|
||||
|
||||
@ -15,6 +15,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "storage/FieldData.h"
|
||||
#include "common/Json.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
@ -22,14 +23,28 @@ template <typename Type, bool is_scalar>
|
||||
void
|
||||
FieldDataImpl<Type, is_scalar>::FillFieldData(const void* source,
|
||||
ssize_t element_count) {
|
||||
AssertInfo(element_count % dim_ == 0, "invalid element count");
|
||||
if (element_count == 0) {
|
||||
return;
|
||||
}
|
||||
AssertInfo(field_data_.size() == 0, "no empty field vector");
|
||||
field_data_.resize(element_count);
|
||||
std::copy_n(
|
||||
static_cast<const Type*>(source), element_count, field_data_.data());
|
||||
|
||||
std::lock_guard lck(tell_mutex_);
|
||||
if (tell_ + element_count > get_num_rows()) {
|
||||
resize_field_data(tell_ + element_count);
|
||||
}
|
||||
std::copy_n(static_cast<const Type*>(source),
|
||||
element_count * dim_,
|
||||
field_data_.data() + tell_ * dim_);
|
||||
tell_ += element_count;
|
||||
}
|
||||
|
||||
template <typename ArrayType, arrow::Type::type ArrayDataType>
|
||||
std::pair<const void*, int64_t>
|
||||
GetDataInfoFromArray(const std::shared_ptr<arrow::Array> array) {
|
||||
AssertInfo(array->type()->id() == ArrayDataType, "inconsistent data type");
|
||||
auto typed_array = std::dynamic_pointer_cast<ArrayType>(array);
|
||||
auto element_count = array->length();
|
||||
|
||||
return std::make_pair(typed_array->raw_values(), element_count);
|
||||
}
|
||||
|
||||
template <typename Type, bool is_scalar>
|
||||
@ -37,7 +52,7 @@ void
|
||||
FieldDataImpl<Type, is_scalar>::FillFieldData(
|
||||
const std::shared_ptr<arrow::Array> array) {
|
||||
AssertInfo(array != nullptr, "null arrow array");
|
||||
auto element_count = array->length() * dim_;
|
||||
auto element_count = array->length();
|
||||
if (element_count == 0) {
|
||||
return;
|
||||
}
|
||||
@ -54,46 +69,40 @@ FieldDataImpl<Type, is_scalar>::FillFieldData(
|
||||
return FillFieldData(values.data(), element_count);
|
||||
}
|
||||
case DataType::INT8: {
|
||||
AssertInfo(array->type()->id() == arrow::Type::type::INT8,
|
||||
"inconsistent data type");
|
||||
auto int8_array =
|
||||
std::dynamic_pointer_cast<arrow::Int8Array>(array);
|
||||
return FillFieldData(int8_array->raw_values(), element_count);
|
||||
auto array_info =
|
||||
GetDataInfoFromArray<arrow::Int8Array, arrow::Type::type::INT8>(
|
||||
array);
|
||||
return FillFieldData(array_info.first, array_info.second);
|
||||
}
|
||||
case DataType::INT16: {
|
||||
AssertInfo(array->type()->id() == arrow::Type::type::INT16,
|
||||
"inconsistent data type");
|
||||
auto int16_array =
|
||||
std::dynamic_pointer_cast<arrow::Int16Array>(array);
|
||||
return FillFieldData(int16_array->raw_values(), element_count);
|
||||
auto array_info =
|
||||
GetDataInfoFromArray<arrow::Int16Array,
|
||||
arrow::Type::type::INT16>(array);
|
||||
return FillFieldData(array_info.first, array_info.second);
|
||||
}
|
||||
case DataType::INT32: {
|
||||
AssertInfo(array->type()->id() == arrow::Type::type::INT32,
|
||||
"inconsistent data type");
|
||||
auto int32_array =
|
||||
std::dynamic_pointer_cast<arrow::Int32Array>(array);
|
||||
return FillFieldData(int32_array->raw_values(), element_count);
|
||||
auto array_info =
|
||||
GetDataInfoFromArray<arrow::Int32Array,
|
||||
arrow::Type::type::INT32>(array);
|
||||
return FillFieldData(array_info.first, array_info.second);
|
||||
}
|
||||
case DataType::INT64: {
|
||||
AssertInfo(array->type()->id() == arrow::Type::type::INT64,
|
||||
"inconsistent data type");
|
||||
auto int64_array =
|
||||
std::dynamic_pointer_cast<arrow::Int64Array>(array);
|
||||
return FillFieldData(int64_array->raw_values(), element_count);
|
||||
auto array_info =
|
||||
GetDataInfoFromArray<arrow::Int64Array,
|
||||
arrow::Type::type::INT64>(array);
|
||||
return FillFieldData(array_info.first, array_info.second);
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
AssertInfo(array->type()->id() == arrow::Type::type::FLOAT,
|
||||
"inconsistent data type");
|
||||
auto float_array =
|
||||
std::dynamic_pointer_cast<arrow::FloatArray>(array);
|
||||
return FillFieldData(float_array->raw_values(), element_count);
|
||||
auto array_info =
|
||||
GetDataInfoFromArray<arrow::FloatArray,
|
||||
arrow::Type::type::FLOAT>(array);
|
||||
return FillFieldData(array_info.first, array_info.second);
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
AssertInfo(array->type()->id() == arrow::Type::type::DOUBLE,
|
||||
"inconsistent data type");
|
||||
auto double_array =
|
||||
std::dynamic_pointer_cast<arrow::DoubleArray>(array);
|
||||
return FillFieldData(double_array->raw_values(), element_count);
|
||||
auto array_info =
|
||||
GetDataInfoFromArray<arrow::DoubleArray,
|
||||
arrow::Type::type::DOUBLE>(array);
|
||||
return FillFieldData(array_info.first, array_info.second);
|
||||
}
|
||||
case DataType::STRING:
|
||||
case DataType::VARCHAR: {
|
||||
@ -107,21 +116,25 @@ FieldDataImpl<Type, is_scalar>::FillFieldData(
|
||||
}
|
||||
return FillFieldData(values.data(), element_count);
|
||||
}
|
||||
case DataType::VECTOR_FLOAT: {
|
||||
AssertInfo(
|
||||
array->type()->id() == arrow::Type::type::FIXED_SIZE_BINARY,
|
||||
"inconsistent data type");
|
||||
auto vector_array =
|
||||
std::dynamic_pointer_cast<arrow::FixedSizeBinaryArray>(array);
|
||||
return FillFieldData(vector_array->raw_values(), element_count);
|
||||
case DataType::JSON: {
|
||||
AssertInfo(array->type()->id() == arrow::Type::type::BINARY,
|
||||
"inconsistent data type");
|
||||
auto json_array =
|
||||
std::dynamic_pointer_cast<arrow::BinaryArray>(array);
|
||||
std::vector<Json> values(element_count);
|
||||
for (size_t index = 0; index < element_count; ++index) {
|
||||
values[index] =
|
||||
Json(simdjson::padded_string(json_array->GetString(index)));
|
||||
}
|
||||
return FillFieldData(values.data(), element_count);
|
||||
}
|
||||
case DataType::VECTOR_FLOAT:
|
||||
case DataType::VECTOR_BINARY: {
|
||||
AssertInfo(
|
||||
array->type()->id() == arrow::Type::type::FIXED_SIZE_BINARY,
|
||||
"inconsistent data type");
|
||||
auto vector_array =
|
||||
std::dynamic_pointer_cast<arrow::FixedSizeBinaryArray>(array);
|
||||
return FillFieldData(vector_array->raw_values(), element_count);
|
||||
auto array_info =
|
||||
GetDataInfoFromArray<arrow::FixedSizeBinaryArray,
|
||||
arrow::Type::type::FIXED_SIZE_BINARY>(
|
||||
array);
|
||||
return FillFieldData(array_info.first, array_info.second);
|
||||
}
|
||||
default: {
|
||||
throw NotSupportedDataTypeException(GetName() + "::FillFieldData" +
|
||||
@ -141,9 +154,10 @@ template class FieldDataImpl<int64_t, true>;
|
||||
template class FieldDataImpl<float, true>;
|
||||
template class FieldDataImpl<double, true>;
|
||||
template class FieldDataImpl<std::string, true>;
|
||||
template class FieldDataImpl<Json, true>;
|
||||
|
||||
// vector data
|
||||
template class FieldDataImpl<int8_t, false>;
|
||||
template class FieldDataImpl<float, false>;
|
||||
|
||||
} // namespace milvus::storage
|
||||
} // namespace milvus::storage
|
||||
@ -27,8 +27,9 @@ template <typename Type>
|
||||
class FieldData : public FieldDataImpl<Type, true> {
|
||||
public:
|
||||
static_assert(IsScalar<Type> || std::is_same_v<Type, PkType>);
|
||||
explicit FieldData(DataType data_type)
|
||||
: FieldDataImpl<Type, true>::FieldDataImpl(1, data_type) {
|
||||
explicit FieldData(DataType data_type, int64_t buffered_num_rows = 0)
|
||||
: FieldDataImpl<Type, true>::FieldDataImpl(
|
||||
1, data_type, buffered_num_rows) {
|
||||
}
|
||||
};
|
||||
|
||||
@ -36,23 +37,39 @@ template <>
|
||||
class FieldData<std::string> : public FieldDataStringImpl {
|
||||
public:
|
||||
static_assert(IsScalar<std::string> || std::is_same_v<std::string, PkType>);
|
||||
explicit FieldData(DataType data_type) : FieldDataStringImpl(data_type) {
|
||||
explicit FieldData(DataType data_type, int64_t buffered_num_rows = 0)
|
||||
: FieldDataStringImpl(data_type, buffered_num_rows) {
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class FieldData<Json> : public FieldDataJsonImpl {
|
||||
public:
|
||||
static_assert(IsScalar<std::string> || std::is_same_v<std::string, PkType>);
|
||||
explicit FieldData(DataType data_type, int64_t buffered_num_rows = 0)
|
||||
: FieldDataJsonImpl(data_type, buffered_num_rows) {
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class FieldData<FloatVector> : public FieldDataImpl<float, false> {
|
||||
public:
|
||||
explicit FieldData(int64_t dim, DataType data_type)
|
||||
: FieldDataImpl<float, false>::FieldDataImpl(dim, data_type) {
|
||||
explicit FieldData(int64_t dim,
|
||||
DataType data_type,
|
||||
int64_t buffered_num_rows = 0)
|
||||
: FieldDataImpl<float, false>::FieldDataImpl(
|
||||
dim, data_type, buffered_num_rows) {
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class FieldData<BinaryVector> : public FieldDataImpl<uint8_t, false> {
|
||||
public:
|
||||
explicit FieldData(int64_t dim, DataType data_type)
|
||||
: binary_dim_(dim), FieldDataImpl(dim / 8, data_type) {
|
||||
explicit FieldData(int64_t dim,
|
||||
DataType data_type,
|
||||
int64_t buffered_num_rows = 0)
|
||||
: binary_dim_(dim),
|
||||
FieldDataImpl(dim / 8, data_type, buffered_num_rows) {
|
||||
Assert(dim % 8 == 0);
|
||||
}
|
||||
|
||||
@ -66,4 +83,5 @@ class FieldData<BinaryVector> : public FieldDataImpl<uint8_t, false> {
|
||||
};
|
||||
|
||||
using FieldDataPtr = std::shared_ptr<FieldDataBase>;
|
||||
} // namespace milvus::storage
|
||||
|
||||
} // namespace milvus::storage
|
||||
@ -1,53 +0,0 @@
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "storage/FieldDataFactory.h"
|
||||
#include "storage/Exception.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
FieldDataPtr
|
||||
FieldDataFactory::CreateFieldData(const DataType& type, const int64_t dim) {
|
||||
switch (type) {
|
||||
case DataType::BOOL:
|
||||
return std::make_shared<FieldData<bool>>(type);
|
||||
case DataType::INT8:
|
||||
return std::make_shared<FieldData<int8_t>>(type);
|
||||
case DataType::INT16:
|
||||
return std::make_shared<FieldData<int16_t>>(type);
|
||||
case DataType::INT32:
|
||||
return std::make_shared<FieldData<int32_t>>(type);
|
||||
case DataType::INT64:
|
||||
return std::make_shared<FieldData<int64_t>>(type);
|
||||
case DataType::FLOAT:
|
||||
return std::make_shared<FieldData<float>>(type);
|
||||
case DataType::DOUBLE:
|
||||
return std::make_shared<FieldData<double>>(type);
|
||||
case DataType::STRING:
|
||||
case DataType::VARCHAR:
|
||||
return std::make_shared<FieldData<std::string>>(type);
|
||||
case DataType::VECTOR_FLOAT:
|
||||
return std::make_shared<FieldData<FloatVector>>(dim, type);
|
||||
case DataType::VECTOR_BINARY:
|
||||
return std::make_shared<FieldData<BinaryVector>>(dim, type);
|
||||
default:
|
||||
throw NotSupportedDataTypeException(
|
||||
GetName() + "::CreateFieldData" + " not support data type " +
|
||||
datatype_name(type));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace milvus::storage
|
||||
@ -20,6 +20,8 @@
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <mutex>
|
||||
#include <shared_mutex>
|
||||
|
||||
#include "arrow/api.h"
|
||||
#include "common/FieldMeta.h"
|
||||
@ -53,16 +55,19 @@ class FieldDataBase {
|
||||
virtual int64_t
|
||||
Size() const = 0;
|
||||
|
||||
virtual int64_t
|
||||
Size(ssize_t index) const = 0;
|
||||
|
||||
virtual bool
|
||||
IsFull() const = 0;
|
||||
|
||||
public:
|
||||
virtual int
|
||||
virtual int64_t
|
||||
get_num_rows() const = 0;
|
||||
|
||||
virtual int64_t
|
||||
get_dim() const = 0;
|
||||
|
||||
virtual int64_t
|
||||
get_element_size(ssize_t offset) const = 0;
|
||||
|
||||
DataType
|
||||
get_data_type() const {
|
||||
return data_type_;
|
||||
@ -86,8 +91,14 @@ class FieldDataImpl : public FieldDataBase {
|
||||
operator=(const FieldDataImpl&) = delete;
|
||||
|
||||
public:
|
||||
explicit FieldDataImpl(ssize_t dim, DataType data_type)
|
||||
: FieldDataBase(data_type), dim_(is_scalar ? 1 : dim) {
|
||||
explicit FieldDataImpl(ssize_t dim,
|
||||
DataType data_type,
|
||||
int64_t buffered_num_rows = 0)
|
||||
: FieldDataBase(data_type),
|
||||
dim_(is_scalar ? 1 : dim),
|
||||
num_rows_(buffered_num_rows),
|
||||
tell_(0) {
|
||||
field_data_.resize(num_rows_ * dim_);
|
||||
}
|
||||
|
||||
void
|
||||
@ -108,20 +119,54 @@ class FieldDataImpl : public FieldDataBase {
|
||||
|
||||
const void*
|
||||
RawValue(ssize_t offset) const override {
|
||||
AssertInfo(offset < get_num_rows(),
|
||||
"field data subscript out of range");
|
||||
AssertInfo(offset < get_tell(),
|
||||
"subscript position don't has valid value");
|
||||
return &field_data_[offset];
|
||||
}
|
||||
|
||||
int64_t
|
||||
Size() const override {
|
||||
return sizeof(Type) * field_data_.size();
|
||||
return sizeof(Type) * get_tell() * dim_;
|
||||
}
|
||||
|
||||
int64_t
|
||||
Size(ssize_t offset) const override {
|
||||
AssertInfo(offset < get_num_rows(),
|
||||
"field data subscript out of range");
|
||||
AssertInfo(offset < get_tell(),
|
||||
"subscript position don't has valid value");
|
||||
return sizeof(Type) * dim_;
|
||||
}
|
||||
|
||||
bool
|
||||
IsFull() const override {
|
||||
auto buffered_num_rows = get_num_rows();
|
||||
auto filled_num_rows = get_tell();
|
||||
return buffered_num_rows == filled_num_rows;
|
||||
}
|
||||
|
||||
public:
|
||||
int
|
||||
int64_t
|
||||
get_num_rows() const override {
|
||||
auto len = field_data_.size();
|
||||
AssertInfo(len % dim_ == 0, "field data size not aligned");
|
||||
return len / dim_;
|
||||
std::shared_lock lck(num_rows_mutex_);
|
||||
return num_rows_;
|
||||
}
|
||||
|
||||
void
|
||||
resize_field_data(int64_t num_rows) {
|
||||
std::lock_guard lck(num_rows_mutex_);
|
||||
if (num_rows > num_rows_) {
|
||||
num_rows_ = num_rows;
|
||||
field_data_.resize(num_rows_ * dim_);
|
||||
}
|
||||
}
|
||||
|
||||
int64_t
|
||||
get_tell() const {
|
||||
std::shared_lock lck(tell_mutex_);
|
||||
return tell_;
|
||||
}
|
||||
|
||||
int64_t
|
||||
@ -129,13 +174,12 @@ class FieldDataImpl : public FieldDataBase {
|
||||
return dim_;
|
||||
}
|
||||
|
||||
int64_t
|
||||
get_element_size(ssize_t offset) const override {
|
||||
return sizeof(Type) * dim_;
|
||||
}
|
||||
|
||||
protected:
|
||||
Chunk field_data_;
|
||||
int64_t num_rows_;
|
||||
mutable std::shared_mutex num_rows_mutex_;
|
||||
int64_t tell_;
|
||||
mutable std::shared_mutex tell_mutex_;
|
||||
|
||||
private:
|
||||
const ssize_t dim_;
|
||||
@ -143,30 +187,54 @@ class FieldDataImpl : public FieldDataBase {
|
||||
|
||||
class FieldDataStringImpl : public FieldDataImpl<std::string, true> {
|
||||
public:
|
||||
explicit FieldDataStringImpl(DataType data_type)
|
||||
: FieldDataImpl<std::string, true>(1, data_type) {
|
||||
}
|
||||
|
||||
const void*
|
||||
RawValue(ssize_t offset) const {
|
||||
return field_data_[offset].c_str();
|
||||
explicit FieldDataStringImpl(DataType data_type, int64_t total_num_rows = 0)
|
||||
: FieldDataImpl<std::string, true>(1, data_type, total_num_rows) {
|
||||
}
|
||||
|
||||
int64_t
|
||||
Size() const {
|
||||
int64_t data_size = 0;
|
||||
for (size_t offset = 0; offset < field_data_.size(); ++offset) {
|
||||
data_size += get_element_size(offset);
|
||||
for (size_t offset = 0; offset < get_tell(); ++offset) {
|
||||
data_size += field_data_[offset].size();
|
||||
}
|
||||
|
||||
return data_size;
|
||||
}
|
||||
|
||||
public:
|
||||
int64_t
|
||||
get_element_size(ssize_t offset) const {
|
||||
Size(ssize_t offset) const {
|
||||
AssertInfo(offset < get_num_rows(),
|
||||
"field data subscript out of range");
|
||||
AssertInfo(offset < get_tell(),
|
||||
"subscript position don't has valid value");
|
||||
return field_data_[offset].size();
|
||||
}
|
||||
};
|
||||
|
||||
class FieldDataJsonImpl : public FieldDataImpl<Json, true> {
|
||||
public:
|
||||
explicit FieldDataJsonImpl(DataType data_type, int64_t total_num_rows = 0)
|
||||
: FieldDataImpl<Json, true>(1, data_type, total_num_rows) {
|
||||
}
|
||||
|
||||
int64_t
|
||||
Size() const {
|
||||
int64_t data_size = 0;
|
||||
for (size_t offset = 0; offset < get_tell(); ++offset) {
|
||||
data_size += field_data_[offset].data().size();
|
||||
}
|
||||
|
||||
return data_size;
|
||||
}
|
||||
|
||||
int64_t
|
||||
Size(ssize_t offset) const {
|
||||
AssertInfo(offset < get_num_rows(),
|
||||
"field data subscript out of range");
|
||||
AssertInfo(offset < get_tell(),
|
||||
"subscript position don't has valid value");
|
||||
return field_data_[offset].data().size();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace milvus::storage
|
||||
|
||||
@ -21,10 +21,45 @@
|
||||
#include <memory>
|
||||
|
||||
#include "knowhere/file_manager.h"
|
||||
#include "common/Consts.h"
|
||||
#include "storage/ChunkManager.h"
|
||||
#include "storage/Types.h"
|
||||
#include "log/Log.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
#define FILEMANAGER_TRY try {
|
||||
#define FILEMANAGER_CATCH \
|
||||
} \
|
||||
catch (LocalChunkManagerException & e) { \
|
||||
LOG_SEGCORE_ERROR_ << "LocalChunkManagerException:" << e.what(); \
|
||||
return false; \
|
||||
} \
|
||||
catch (MinioException & e) { \
|
||||
LOG_SEGCORE_ERROR_ << "milvus::storage::MinioException:" << e.what(); \
|
||||
return false; \
|
||||
} \
|
||||
catch (DiskANNFileManagerException & e) { \
|
||||
LOG_SEGCORE_ERROR_ << "milvus::storage::DiskANNFileManagerException:" \
|
||||
<< e.what(); \
|
||||
return false; \
|
||||
} \
|
||||
catch (ArrowException & e) { \
|
||||
LOG_SEGCORE_ERROR_ << "milvus::storage::ArrowException:" << e.what(); \
|
||||
return false; \
|
||||
} \
|
||||
catch (std::exception & e) { \
|
||||
LOG_SEGCORE_ERROR_ << "Exception:" << e.what(); \
|
||||
return false;
|
||||
#define FILEMANAGER_END }
|
||||
|
||||
class FileManagerImpl : public knowhere::FileManager {
|
||||
public:
|
||||
explicit FileManagerImpl(const FieldDataMeta& field_mata,
|
||||
IndexMeta index_meta)
|
||||
: field_meta_(field_mata), index_meta_(std::move(index_meta)) {
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief Load a file to the local disk, so we can use stl lib to operate it.
|
||||
@ -61,6 +96,37 @@ class FileManagerImpl : public knowhere::FileManager {
|
||||
*/
|
||||
virtual bool
|
||||
RemoveFile(const std::string& filename) noexcept = 0;
|
||||
|
||||
public:
|
||||
virtual std::string
|
||||
GetName() const = 0;
|
||||
|
||||
virtual FieldDataMeta
|
||||
GetFieldDataMeta() const {
|
||||
return field_meta_;
|
||||
}
|
||||
|
||||
virtual IndexMeta
|
||||
GetIndexMeta() const {
|
||||
return index_meta_;
|
||||
}
|
||||
|
||||
virtual std::string
|
||||
GetRemoteIndexObjectPrefix() const {
|
||||
return rcm_->GetRootPath() + "/" + std::string(INDEX_ROOT_PATH) + "/" +
|
||||
std::to_string(index_meta_.build_id) + "/" +
|
||||
std::to_string(index_meta_.index_version) + "/" +
|
||||
std::to_string(field_meta_.partition_id) + "/" +
|
||||
std::to_string(field_meta_.segment_id);
|
||||
}
|
||||
|
||||
protected:
|
||||
// collection meta
|
||||
FieldDataMeta field_meta_;
|
||||
|
||||
// index meta
|
||||
IndexMeta index_meta_;
|
||||
ChunkManagerPtr rcm_;
|
||||
};
|
||||
|
||||
using FileManagerImplPtr = std::shared_ptr<FileManagerImpl>;
|
||||
|
||||
@ -51,20 +51,6 @@ IndexData::serialize_to_remote_file() {
|
||||
AssertInfo(index_meta_.has_value(), "index meta not exist");
|
||||
AssertInfo(field_data_ != nullptr, "empty field data");
|
||||
|
||||
// create index event
|
||||
IndexEvent index_event;
|
||||
auto& index_event_data = index_event.event_data;
|
||||
index_event_data.start_timestamp = time_range_.first;
|
||||
index_event_data.end_timestamp = time_range_.second;
|
||||
index_event_data.field_data = field_data_;
|
||||
|
||||
auto& index_event_header = index_event.event_header;
|
||||
index_event_header.event_type_ = EventType::IndexFileEvent;
|
||||
// TODO :: set timestamps
|
||||
index_event_header.timestamp_ = 0;
|
||||
|
||||
// serialize insert event
|
||||
auto index_event_bytes = index_event.Serialize();
|
||||
DataType data_type = field_data_->get_data_type();
|
||||
|
||||
// create descriptor event
|
||||
@ -96,6 +82,22 @@ IndexData::serialize_to_remote_file() {
|
||||
// serialize descriptor event data
|
||||
auto des_event_bytes = descriptor_event.Serialize();
|
||||
|
||||
// create index event
|
||||
IndexEvent index_event;
|
||||
index_event.event_offset = des_event_bytes.size();
|
||||
auto& index_event_data = index_event.event_data;
|
||||
index_event_data.start_timestamp = time_range_.first;
|
||||
index_event_data.end_timestamp = time_range_.second;
|
||||
index_event_data.field_data = field_data_;
|
||||
|
||||
auto& index_event_header = index_event.event_header;
|
||||
index_event_header.event_type_ = EventType::IndexFileEvent;
|
||||
// TODO :: set timestamps
|
||||
index_event_header.timestamp_ = 0;
|
||||
|
||||
// serialize insert event
|
||||
auto index_event_bytes = index_event.Serialize();
|
||||
|
||||
des_event_bytes.insert(des_event_bytes.end(),
|
||||
index_event_bytes.begin(),
|
||||
index_event_bytes.end());
|
||||
|
||||
@ -47,20 +47,6 @@ InsertData::serialize_to_remote_file() {
|
||||
AssertInfo(field_data_meta_.has_value(), "field data not exist");
|
||||
AssertInfo(field_data_ != nullptr, "empty field data");
|
||||
|
||||
// create insert event
|
||||
InsertEvent insert_event;
|
||||
auto& insert_event_data = insert_event.event_data;
|
||||
insert_event_data.start_timestamp = time_range_.first;
|
||||
insert_event_data.end_timestamp = time_range_.second;
|
||||
insert_event_data.field_data = field_data_;
|
||||
|
||||
auto& insert_event_header = insert_event.event_header;
|
||||
// TODO :: set timestamps
|
||||
insert_event_header.timestamp_ = 0;
|
||||
insert_event_header.event_type_ = EventType::InsertEvent;
|
||||
|
||||
// serialize insert event
|
||||
auto insert_event_bytes = insert_event.Serialize();
|
||||
DataType data_type = field_data_->get_data_type();
|
||||
|
||||
// create descriptor event
|
||||
@ -90,6 +76,22 @@ InsertData::serialize_to_remote_file() {
|
||||
// serialize descriptor event data
|
||||
auto des_event_bytes = descriptor_event.Serialize();
|
||||
|
||||
// create insert event
|
||||
InsertEvent insert_event;
|
||||
insert_event.event_offset = des_event_bytes.size();
|
||||
auto& insert_event_data = insert_event.event_data;
|
||||
insert_event_data.start_timestamp = time_range_.first;
|
||||
insert_event_data.end_timestamp = time_range_.second;
|
||||
insert_event_data.field_data = field_data_;
|
||||
|
||||
auto& insert_event_header = insert_event.event_header;
|
||||
// TODO :: set timestamps
|
||||
insert_event_header.timestamp_ = 0;
|
||||
insert_event_header.event_type_ = EventType::InsertEvent;
|
||||
|
||||
// serialize insert event
|
||||
auto insert_event_bytes = insert_event.Serialize();
|
||||
|
||||
des_event_bytes.insert(des_event_bytes.end(),
|
||||
insert_event_bytes.begin(),
|
||||
insert_event_bytes.end());
|
||||
|
||||
@ -103,6 +103,11 @@ void
|
||||
LocalChunkManager::Write(const std::string& absPathStr,
|
||||
void* buf,
|
||||
uint64_t size) {
|
||||
boost::filesystem::path absPath(absPathStr);
|
||||
// if filepath not exists, will create this file automatically
|
||||
// ensure upper directory exist firstly
|
||||
boost::filesystem::create_directories(absPath.parent_path());
|
||||
|
||||
std::ofstream outfile;
|
||||
outfile.open(absPathStr.data(), std::ios_base::binary);
|
||||
if (outfile.fail()) {
|
||||
@ -124,6 +129,11 @@ LocalChunkManager::Write(const std::string& absPathStr,
|
||||
uint64_t offset,
|
||||
void* buf,
|
||||
uint64_t size) {
|
||||
boost::filesystem::path absPath(absPathStr);
|
||||
// if filepath not exists, will create this file automatically
|
||||
// ensure upper directory exist firstly
|
||||
boost::filesystem::create_directories(absPath.parent_path());
|
||||
|
||||
std::ofstream outfile;
|
||||
outfile.open(
|
||||
absPathStr.data(),
|
||||
|
||||
@ -21,7 +21,6 @@
|
||||
#include <vector>
|
||||
|
||||
#include "storage/ChunkManager.h"
|
||||
#include "config/ConfigChunkManager.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
@ -30,7 +29,7 @@ namespace milvus::storage {
|
||||
* that inherited from ChunkManager
|
||||
*/
|
||||
class LocalChunkManager : public ChunkManager {
|
||||
private:
|
||||
public:
|
||||
explicit LocalChunkManager(const std::string& path) : path_prefix_(path) {
|
||||
}
|
||||
|
||||
@ -39,14 +38,6 @@ class LocalChunkManager : public ChunkManager {
|
||||
operator=(const LocalChunkManager&);
|
||||
|
||||
public:
|
||||
static LocalChunkManager&
|
||||
GetInstance() {
|
||||
// thread-safe enough after c++ 11
|
||||
static LocalChunkManager instance(
|
||||
ChunkMangerConfig::GetLocalRootPath());
|
||||
return instance;
|
||||
}
|
||||
|
||||
virtual ~LocalChunkManager() {
|
||||
}
|
||||
|
||||
@ -110,16 +101,11 @@ class LocalChunkManager : public ChunkManager {
|
||||
return "LocalChunkManager";
|
||||
}
|
||||
|
||||
inline std::string
|
||||
GetPathPrefix() {
|
||||
virtual std::string
|
||||
GetRootPath() const {
|
||||
return path_prefix_;
|
||||
}
|
||||
|
||||
inline void
|
||||
SetPathPrefix(const std::string& path) {
|
||||
path_prefix_ = path;
|
||||
}
|
||||
|
||||
bool
|
||||
CreateFile(const std::string& filepath);
|
||||
|
||||
|
||||
67
internal/core/src/storage/LocalChunkManagerSingleton.h
Normal file
67
internal/core/src/storage/LocalChunkManagerSingleton.h
Normal file
@ -0,0 +1,67 @@
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <shared_mutex>
|
||||
|
||||
#include "storage/ChunkManager.h"
|
||||
#include "storage/LocalChunkManager.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
class LocalChunkManagerSingleton {
|
||||
private:
|
||||
LocalChunkManagerSingleton() {
|
||||
}
|
||||
|
||||
public:
|
||||
LocalChunkManagerSingleton(const LocalChunkManagerSingleton&) = delete;
|
||||
LocalChunkManagerSingleton&
|
||||
operator=(const LocalChunkManagerSingleton&) = delete;
|
||||
|
||||
static LocalChunkManagerSingleton&
|
||||
GetInstance() {
|
||||
static LocalChunkManagerSingleton instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
void
|
||||
Init(std::string root_path) {
|
||||
std::unique_lock lck(mutex_);
|
||||
if (lcm_ == nullptr) {
|
||||
lcm_ = std::make_shared<LocalChunkManager>(root_path);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Release() {
|
||||
std::unique_lock lck(mutex_);
|
||||
lcm_ = nullptr;
|
||||
}
|
||||
|
||||
LocalChunkManagerSPtr
|
||||
GetChunkManager() {
|
||||
return lcm_;
|
||||
}
|
||||
|
||||
private:
|
||||
mutable std::shared_mutex mutex_;
|
||||
LocalChunkManagerSPtr lcm_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace milvus::storage
|
||||
169
internal/core/src/storage/MemFileManagerImpl.cpp
Normal file
169
internal/core/src/storage/MemFileManagerImpl.cpp
Normal file
@ -0,0 +1,169 @@
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "storage/MemFileManagerImpl.h"
|
||||
|
||||
#include "storage/Util.h"
|
||||
#include "common/Common.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
MemFileManagerImpl::MemFileManagerImpl(const FieldDataMeta& field_mata,
|
||||
IndexMeta index_meta,
|
||||
ChunkManagerPtr remote_chunk_manager)
|
||||
: FileManagerImpl(field_mata, index_meta) {
|
||||
rcm_ = remote_chunk_manager;
|
||||
}
|
||||
|
||||
bool
|
||||
MemFileManagerImpl::AddFile(const std::string& filename /* unused */) noexcept {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
MemFileManagerImpl::AddFile(const BinarySet& binary_set) noexcept {
|
||||
std::vector<const uint8_t*> data_slices;
|
||||
std::vector<int64_t> slice_sizes;
|
||||
std::vector<std::string> slice_names;
|
||||
|
||||
auto AddBatchIndexFiles = [&]() {
|
||||
auto res = PutIndexData(rcm_.get(),
|
||||
data_slices,
|
||||
slice_sizes,
|
||||
slice_names,
|
||||
field_meta_,
|
||||
index_meta_);
|
||||
for (auto& [file, size] : res) {
|
||||
remote_paths_to_size_[file] = size;
|
||||
}
|
||||
};
|
||||
|
||||
auto remotePrefix = GetRemoteIndexObjectPrefix();
|
||||
int64_t batch_size = 0;
|
||||
for (auto iter = binary_set.binary_map_.begin();
|
||||
iter != binary_set.binary_map_.end();
|
||||
iter++) {
|
||||
if (batch_size >= DEFAULT_FIELD_MAX_MEMORY_LIMIT) {
|
||||
AddBatchIndexFiles();
|
||||
data_slices.clear();
|
||||
slice_sizes.clear();
|
||||
slice_names.clear();
|
||||
batch_size = 0;
|
||||
}
|
||||
|
||||
data_slices.emplace_back(iter->second->data.get());
|
||||
slice_sizes.emplace_back(iter->second->size);
|
||||
slice_names.emplace_back(remotePrefix + "/" + iter->first);
|
||||
batch_size += iter->second->size;
|
||||
}
|
||||
|
||||
if (data_slices.size() > 0) {
|
||||
AddBatchIndexFiles();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
MemFileManagerImpl::LoadFile(const std::string& filename) noexcept {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::map<std::string, storage::FieldDataPtr>
|
||||
MemFileManagerImpl::LoadIndexToMemory(
|
||||
const std::vector<std::string>& remote_files) {
|
||||
std::map<std::string, storage::FieldDataPtr> file_to_index_data;
|
||||
auto parallel_degree =
|
||||
uint64_t(DEFAULT_FIELD_MAX_MEMORY_LIMIT / FILE_SLICE_SIZE);
|
||||
std::vector<std::string> batch_files;
|
||||
|
||||
auto LoadBatchIndexFiles = [&]() {
|
||||
auto index_datas = GetObjectData(rcm_.get(), batch_files);
|
||||
for (size_t idx = 0; idx < batch_files.size(); ++idx) {
|
||||
auto file_name =
|
||||
batch_files[idx].substr(batch_files[idx].find_last_of("/") + 1);
|
||||
file_to_index_data[file_name] = index_datas[idx];
|
||||
}
|
||||
};
|
||||
|
||||
for (auto& file : remote_files) {
|
||||
if (batch_files.size() >= parallel_degree) {
|
||||
LoadBatchIndexFiles();
|
||||
batch_files.clear();
|
||||
}
|
||||
batch_files.emplace_back(file);
|
||||
}
|
||||
|
||||
if (batch_files.size() > 0) {
|
||||
LoadBatchIndexFiles();
|
||||
}
|
||||
|
||||
AssertInfo(file_to_index_data.size() == remote_files.size(),
|
||||
"inconsistent file num and index data num!");
|
||||
return file_to_index_data;
|
||||
}
|
||||
|
||||
std::vector<FieldDataPtr>
|
||||
MemFileManagerImpl::CacheRawDataToMemory(
|
||||
std::vector<std::string> remote_files) {
|
||||
std::sort(remote_files.begin(),
|
||||
remote_files.end(),
|
||||
[](const std::string& a, const std::string& b) {
|
||||
return std::stol(a.substr(a.find_last_of("/") + 1)) <
|
||||
std::stol(b.substr(b.find_last_of("/") + 1));
|
||||
});
|
||||
|
||||
auto parallel_degree =
|
||||
uint64_t(DEFAULT_FIELD_MAX_MEMORY_LIMIT / FILE_SLICE_SIZE);
|
||||
std::vector<std::string> batch_files;
|
||||
std::vector<FieldDataPtr> field_datas;
|
||||
|
||||
auto FetchRawData = [&]() {
|
||||
auto raw_datas = GetObjectData(rcm_.get(), batch_files);
|
||||
for (auto& data : raw_datas) {
|
||||
field_datas.emplace_back(data);
|
||||
}
|
||||
};
|
||||
|
||||
for (auto& file : remote_files) {
|
||||
if (batch_files.size() >= parallel_degree) {
|
||||
FetchRawData();
|
||||
batch_files.clear();
|
||||
}
|
||||
batch_files.emplace_back(file);
|
||||
}
|
||||
if (batch_files.size() > 0) {
|
||||
FetchRawData();
|
||||
}
|
||||
|
||||
AssertInfo(field_datas.size() == remote_files.size(),
|
||||
"inconsistent file num and raw data num!");
|
||||
return field_datas;
|
||||
}
|
||||
|
||||
std::optional<bool>
|
||||
MemFileManagerImpl::IsExisted(const std::string& filename) noexcept {
|
||||
// TODO: implement this interface
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
MemFileManagerImpl::RemoveFile(const std::string& filename) noexcept {
|
||||
// TODO: implement this interface
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace milvus::storage
|
||||
75
internal/core/src/storage/MemFileManagerImpl.h
Normal file
75
internal/core/src/storage/MemFileManagerImpl.h
Normal file
@ -0,0 +1,75 @@
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "storage/IndexData.h"
|
||||
#include "storage/FileManager.h"
|
||||
#include "storage/ChunkManager.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
class MemFileManagerImpl : public FileManagerImpl {
|
||||
public:
|
||||
explicit MemFileManagerImpl(const FieldDataMeta& field_mata,
|
||||
IndexMeta index_meta,
|
||||
ChunkManagerPtr remote_chunk_manager);
|
||||
|
||||
virtual bool
|
||||
LoadFile(const std::string& filename) noexcept;
|
||||
|
||||
virtual bool
|
||||
AddFile(const std::string& filename /* unused */) noexcept;
|
||||
|
||||
virtual std::optional<bool>
|
||||
IsExisted(const std::string& filename) noexcept;
|
||||
|
||||
virtual bool
|
||||
RemoveFile(const std::string& filename) noexcept;
|
||||
|
||||
public:
|
||||
virtual std::string
|
||||
GetName() const {
|
||||
return "MemIndexFileManagerImpl";
|
||||
}
|
||||
|
||||
std::map<std::string, storage::FieldDataPtr>
|
||||
LoadIndexToMemory(const std::vector<std::string>& remote_files);
|
||||
|
||||
std::vector<FieldDataPtr>
|
||||
CacheRawDataToMemory(std::vector<std::string> remote_files);
|
||||
|
||||
bool
|
||||
AddFile(const BinarySet& binary_set) noexcept;
|
||||
|
||||
std::map<std::string, int64_t>
|
||||
GetRemotePathsToFileSize() const {
|
||||
return remote_paths_to_size_;
|
||||
}
|
||||
|
||||
private:
|
||||
// remote file path
|
||||
std::map<std::string, int64_t> remote_paths_to_size_;
|
||||
};
|
||||
|
||||
using MemFileManagerImplPtr = std::shared_ptr<MemFileManagerImpl>;
|
||||
|
||||
} // namespace milvus::storage
|
||||
@ -206,6 +206,7 @@ MinioChunkManager::BuildGoogleCloudClient(
|
||||
|
||||
MinioChunkManager::MinioChunkManager(const StorageConfig& storage_config)
|
||||
: default_bucket_name_(storage_config.bucket_name) {
|
||||
remote_root_path_ = storage_config.root_path;
|
||||
RemoteStorageType storageType;
|
||||
if (storage_config.address.find("google") != std::string::npos) {
|
||||
storageType = RemoteStorageType::GOOGLE_CLOUD;
|
||||
|
||||
@ -30,12 +30,12 @@
|
||||
#include <google/cloud/storage/oauth2/compute_engine_credentials.h>
|
||||
#include <google/cloud/storage/oauth2/google_credentials.h>
|
||||
#include <google/cloud/status_or.h>
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "config/ConfigChunkManager.h"
|
||||
#include "storage/ChunkManager.h"
|
||||
#include "storage/Exception.h"
|
||||
#include "storage/Types.h"
|
||||
@ -47,7 +47,7 @@ enum class RemoteStorageType { S3 = 0, GOOGLE_CLOUD = 1, ALIYUN_CLOUD = 2 };
|
||||
/**
|
||||
* @brief This MinioChunkManager is responsible for read and write file in S3.
|
||||
*/
|
||||
class MinioChunkManager : public RemoteChunkManager {
|
||||
class MinioChunkManager : public ChunkManager {
|
||||
public:
|
||||
explicit MinioChunkManager(const StorageConfig& storage_config);
|
||||
|
||||
@ -99,6 +99,11 @@ class MinioChunkManager : public RemoteChunkManager {
|
||||
return "MinioChunkManager";
|
||||
}
|
||||
|
||||
virtual std::string
|
||||
GetRootPath() const {
|
||||
return remote_root_path_;
|
||||
}
|
||||
|
||||
inline std::string
|
||||
GetBucketName() {
|
||||
return default_bucket_name_;
|
||||
@ -163,6 +168,7 @@ class MinioChunkManager : public RemoteChunkManager {
|
||||
static std::mutex client_mutex_;
|
||||
std::shared_ptr<Aws::S3::S3Client> client_;
|
||||
std::string default_bucket_name_;
|
||||
std::string remote_root_path_;
|
||||
};
|
||||
|
||||
using MinioChunkManagerPtr = std::unique_ptr<MinioChunkManager>;
|
||||
|
||||
@ -16,46 +16,72 @@
|
||||
|
||||
#include "storage/PayloadReader.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "storage/FieldDataFactory.h"
|
||||
#include "storage/Util.h"
|
||||
#include "parquet/column_reader.h"
|
||||
#include "arrow/io/api.h"
|
||||
#include "arrow/status.h"
|
||||
#include "parquet/arrow/reader.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
PayloadReader::PayloadReader(std::shared_ptr<PayloadInputStream> input,
|
||||
DataType data_type)
|
||||
: column_type_(data_type) {
|
||||
init(std::move(input));
|
||||
}
|
||||
|
||||
PayloadReader::PayloadReader(const uint8_t* data,
|
||||
int length,
|
||||
DataType data_type)
|
||||
: column_type_(data_type) {
|
||||
auto input = std::make_shared<storage::PayloadInputStream>(data, length);
|
||||
auto input = std::make_shared<arrow::io::BufferReader>(data, length);
|
||||
init(input);
|
||||
}
|
||||
|
||||
void
|
||||
PayloadReader::init(std::shared_ptr<PayloadInputStream> input) {
|
||||
auto mem_pool = arrow::default_memory_pool();
|
||||
// TODO :: Stream read file data, avoid copying
|
||||
std::unique_ptr<parquet::arrow::FileReader> reader;
|
||||
auto st = parquet::arrow::OpenFile(input, mem_pool, &reader);
|
||||
AssertInfo(st.ok(), "failed to get arrow file reader");
|
||||
std::shared_ptr<arrow::Table> table;
|
||||
st = reader->ReadTable(&table);
|
||||
AssertInfo(st.ok(), "failed to get reader data to arrow table");
|
||||
auto column = table->column(0);
|
||||
AssertInfo(column != nullptr, "returned arrow column is null");
|
||||
AssertInfo(column->chunks().size() == 1,
|
||||
"arrow chunk size in arrow column should be 1");
|
||||
auto array = column->chunk(0);
|
||||
AssertInfo(array != nullptr, "empty arrow array of PayloadReader");
|
||||
PayloadReader::init(std::shared_ptr<arrow::io::BufferReader> input) {
|
||||
arrow::MemoryPool* pool = arrow::default_memory_pool();
|
||||
|
||||
// Configure general Parquet reader settings
|
||||
auto reader_properties = parquet::ReaderProperties(pool);
|
||||
reader_properties.set_buffer_size(4096 * 4);
|
||||
reader_properties.enable_buffered_stream();
|
||||
|
||||
// Configure Arrow-specific Parquet reader settings
|
||||
auto arrow_reader_props = parquet::ArrowReaderProperties();
|
||||
arrow_reader_props.set_batch_size(128 * 1024); // default 64 * 1024
|
||||
arrow_reader_props.set_pre_buffer(false);
|
||||
|
||||
parquet::arrow::FileReaderBuilder reader_builder;
|
||||
auto st = reader_builder.Open(input, reader_properties);
|
||||
AssertInfo(st.ok(), "file to read file");
|
||||
reader_builder.memory_pool(pool);
|
||||
reader_builder.properties(arrow_reader_props);
|
||||
|
||||
std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
|
||||
st = reader_builder.Build(&arrow_reader);
|
||||
AssertInfo(st.ok(), "build file reader");
|
||||
|
||||
int64_t column_index = 0;
|
||||
auto file_meta = arrow_reader->parquet_reader()->metadata();
|
||||
// LOG_SEGCORE_INFO_ << "serialized parquet metadata, num row group " <<
|
||||
// std::to_string(file_meta->num_row_groups())
|
||||
// << ", num column " << std::to_string(file_meta->num_columns()) << ", num rows "
|
||||
// << std::to_string(file_meta->num_rows()) << ", type width "
|
||||
// << std::to_string(file_meta->schema()->Column(column_index)->type_length());
|
||||
dim_ = datatype_is_vector(column_type_)
|
||||
? GetDimensionFromArrowArray(array, column_type_)
|
||||
? GetDimensionFromFileMetaData(
|
||||
file_meta->schema()->Column(column_index), column_type_)
|
||||
: 1;
|
||||
field_data_ =
|
||||
FieldDataFactory::GetInstance().CreateFieldData(column_type_, dim_);
|
||||
field_data_->FillFieldData(array);
|
||||
auto total_num_rows = file_meta->num_rows();
|
||||
|
||||
std::shared_ptr<::arrow::RecordBatchReader> rb_reader;
|
||||
st = arrow_reader->GetRecordBatchReader(&rb_reader);
|
||||
AssertInfo(st.ok(), "get record batch reader");
|
||||
|
||||
field_data_ = CreateFieldData(column_type_, dim_, total_num_rows);
|
||||
for (arrow::Result<std::shared_ptr<arrow::RecordBatch>> maybe_batch :
|
||||
*rb_reader) {
|
||||
AssertInfo(maybe_batch.ok(), "get batch record success");
|
||||
auto array = maybe_batch.ValueOrDie()->column(column_index);
|
||||
field_data_->FillFieldData(array);
|
||||
}
|
||||
AssertInfo(field_data_->IsFull(), "field data hasn't been filled done");
|
||||
// LOG_SEGCORE_INFO_ << "Peak arrow memory pool size " << pool->max_memory();
|
||||
}
|
||||
|
||||
} // namespace milvus::storage
|
||||
|
||||
@ -26,15 +26,12 @@ namespace milvus::storage {
|
||||
|
||||
class PayloadReader {
|
||||
public:
|
||||
explicit PayloadReader(std::shared_ptr<PayloadInputStream> input,
|
||||
DataType data_type);
|
||||
|
||||
explicit PayloadReader(const uint8_t* data, int length, DataType data_type);
|
||||
|
||||
~PayloadReader() = default;
|
||||
|
||||
void
|
||||
init(std::shared_ptr<PayloadInputStream> input);
|
||||
init(std::shared_ptr<arrow::io::BufferReader> buffer);
|
||||
|
||||
const FieldDataPtr
|
||||
get_field_data() const {
|
||||
|
||||
@ -32,7 +32,7 @@ class PayloadInputStream;
|
||||
struct Payload {
|
||||
DataType data_type;
|
||||
const uint8_t* raw_data;
|
||||
int rows;
|
||||
int64_t rows;
|
||||
std::optional<int> dimension;
|
||||
};
|
||||
|
||||
|
||||
66
internal/core/src/storage/RemoteChunkManagerSingleton.h
Normal file
66
internal/core/src/storage/RemoteChunkManagerSingleton.h
Normal file
@ -0,0 +1,66 @@
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <shared_mutex>
|
||||
|
||||
#include "storage/Util.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
class RemoteChunkManagerSingleton {
|
||||
private:
|
||||
RemoteChunkManagerSingleton() {
|
||||
}
|
||||
|
||||
public:
|
||||
RemoteChunkManagerSingleton(const RemoteChunkManagerSingleton&) = delete;
|
||||
RemoteChunkManagerSingleton&
|
||||
operator=(const RemoteChunkManagerSingleton&) = delete;
|
||||
|
||||
static RemoteChunkManagerSingleton&
|
||||
GetInstance() {
|
||||
static RemoteChunkManagerSingleton instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
void
|
||||
Init(const StorageConfig& storage_config) {
|
||||
std::unique_lock lck(mutex_);
|
||||
if (rcm_ == nullptr) {
|
||||
rcm_ = CreateChunkManager(storage_config);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Release() {
|
||||
std::unique_lock lck(mutex_);
|
||||
rcm_ = nullptr;
|
||||
}
|
||||
|
||||
ChunkManagerPtr
|
||||
GetRemoteChunkManager() {
|
||||
return rcm_;
|
||||
}
|
||||
|
||||
private:
|
||||
mutable std::shared_mutex mutex_;
|
||||
ChunkManagerPtr rcm_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace milvus::storage
|
||||
@ -36,7 +36,7 @@ class SafeQueue {
|
||||
return queue_.empty();
|
||||
}
|
||||
|
||||
void
|
||||
size_t
|
||||
size() {
|
||||
std::shared_lock<std::shared_mutex> lock(mutex_);
|
||||
return queue_.size();
|
||||
|
||||
@ -34,7 +34,7 @@ namespace milvus {
|
||||
class ThreadPool {
|
||||
public:
|
||||
explicit ThreadPool(const int thread_core_coefficient) : shutdown_(false) {
|
||||
auto thread_num = cpu_num * thread_core_coefficient;
|
||||
auto thread_num = CPU_NUM * thread_core_coefficient;
|
||||
LOG_SEGCORE_INFO_ << "Thread pool's worker num:" << thread_num;
|
||||
threads_ = std::vector<std::thread>(thread_num);
|
||||
Init();
|
||||
@ -46,7 +46,7 @@ class ThreadPool {
|
||||
|
||||
static ThreadPool&
|
||||
GetInstance() {
|
||||
static ThreadPool pool(thread_core_coefficient);
|
||||
static ThreadPool pool(THREAD_CORE_COEFFICIENT);
|
||||
return pool;
|
||||
}
|
||||
|
||||
|
||||
@ -86,7 +86,7 @@ struct StorageConfig {
|
||||
std::string bucket_name = "a-bucket";
|
||||
std::string access_key_id = "minioadmin";
|
||||
std::string access_key_value = "minioadmin";
|
||||
std::string remote_root_path = "files";
|
||||
std::string root_path = "files";
|
||||
std::string storage_type = "minio";
|
||||
std::string iam_endpoint = "";
|
||||
bool useSSL = false;
|
||||
|
||||
@ -19,15 +19,18 @@
|
||||
#include "arrow/type_fwd.h"
|
||||
#include "exceptions/EasyAssert.h"
|
||||
#include "common/Consts.h"
|
||||
#include "config/ConfigChunkManager.h"
|
||||
#include "storage/parquet_c.h"
|
||||
|
||||
#ifdef BUILD_DISK_ANN
|
||||
#include "storage/FieldData.h"
|
||||
#include "storage/ThreadPool.h"
|
||||
#include "storage/LocalChunkManager.h"
|
||||
#include "storage/MinioChunkManager.h"
|
||||
#include "storage/MemFileManagerImpl.h"
|
||||
#include "storage/DiskFileManagerImpl.h"
|
||||
#endif
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
std::map<std::string, ChunkManagerType> ChunkManagerType_Map = {
|
||||
{"local", ChunkManagerType::Local}, {"minio", ChunkManagerType::Minio}};
|
||||
|
||||
StorageType
|
||||
ReadMediumType(BinlogReaderPtr reader) {
|
||||
AssertInfo(reader->Tell() == 0,
|
||||
@ -273,6 +276,21 @@ CreateArrowSchema(DataType data_type, int dim) {
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
GetDimensionFromFileMetaData(const parquet::ColumnDescriptor* schema,
|
||||
DataType data_type) {
|
||||
switch (data_type) {
|
||||
case DataType::VECTOR_FLOAT: {
|
||||
return schema->type_length() / sizeof(float);
|
||||
}
|
||||
case DataType::VECTOR_BINARY: {
|
||||
return schema->type_length() * 8;
|
||||
}
|
||||
default:
|
||||
PanicInfo("unsupported data type");
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
GetDimensionFromArrowArray(std::shared_ptr<arrow::Array> data,
|
||||
DataType data_type) {
|
||||
@ -299,58 +317,242 @@ GetDimensionFromArrowArray(std::shared_ptr<arrow::Array> data,
|
||||
}
|
||||
|
||||
std::string
|
||||
GenLocalIndexPathPrefix(int64_t build_id, int64_t index_version) {
|
||||
return milvus::ChunkMangerConfig::GetLocalRootPath() + "/" +
|
||||
std::string(INDEX_ROOT_PATH) + "/" + std::to_string(build_id) + "/" +
|
||||
std::to_string(index_version) + "/";
|
||||
GenIndexPathPrefix(ChunkManagerPtr cm,
|
||||
int64_t build_id,
|
||||
int64_t index_version) {
|
||||
return cm->GetRootPath() + "/" + std::string(INDEX_ROOT_PATH) + "/" +
|
||||
std::to_string(build_id) + "/" + std::to_string(index_version) + "/";
|
||||
}
|
||||
|
||||
std::string
|
||||
GetLocalIndexPathPrefixWithBuildID(int64_t build_id) {
|
||||
return milvus::ChunkMangerConfig::GetLocalRootPath() + "/" +
|
||||
std::string(INDEX_ROOT_PATH) + "/" + std::to_string(build_id);
|
||||
GetIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id) {
|
||||
return cm->GetRootPath() + "/" + std::string(INDEX_ROOT_PATH) + "/" +
|
||||
std::to_string(build_id);
|
||||
}
|
||||
|
||||
std::string
|
||||
GenFieldRawDataPathPrefix(int64_t segment_id, int64_t field_id) {
|
||||
return milvus::ChunkMangerConfig::GetLocalRootPath() + "/" +
|
||||
std::string(RAWDATA_ROOT_PATH) + "/" + std::to_string(segment_id) +
|
||||
"/" + std::to_string(field_id) + "/";
|
||||
GenFieldRawDataPathPrefix(ChunkManagerPtr cm,
|
||||
int64_t segment_id,
|
||||
int64_t field_id) {
|
||||
return cm->GetRootPath() + "/" + std::string(RAWDATA_ROOT_PATH) + "/" +
|
||||
std::to_string(segment_id) + "/" + std::to_string(field_id) + "/";
|
||||
}
|
||||
|
||||
std::string
|
||||
GetSegmentRawDataPathPrefix(int64_t segment_id) {
|
||||
return milvus::ChunkMangerConfig::GetLocalRootPath() + "/" +
|
||||
std::string(RAWDATA_ROOT_PATH) + "/" + std::to_string(segment_id);
|
||||
GetSegmentRawDataPathPrefix(ChunkManagerPtr cm, int64_t segment_id) {
|
||||
return cm->GetRootPath() + "/" + std::string(RAWDATA_ROOT_PATH) + "/" +
|
||||
std::to_string(segment_id);
|
||||
}
|
||||
|
||||
std::vector<IndexType>
|
||||
DISK_LIST() {
|
||||
static std::vector<IndexType> ret{
|
||||
knowhere::IndexEnum::INDEX_DISKANN,
|
||||
};
|
||||
return ret;
|
||||
std::unique_ptr<DataCodec>
|
||||
DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager,
|
||||
const std::string& file) {
|
||||
auto fileSize = chunk_manager->Size(file);
|
||||
auto buf = std::shared_ptr<uint8_t[]>(new uint8_t[fileSize]);
|
||||
chunk_manager->Read(file, buf.get(), fileSize);
|
||||
|
||||
return DeserializeFileData(buf, fileSize);
|
||||
}
|
||||
|
||||
bool
|
||||
is_in_disk_list(const IndexType& index_type) {
|
||||
return is_in_list<IndexType>(index_type, DISK_LIST);
|
||||
std::pair<std::string, size_t>
|
||||
EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
|
||||
uint8_t* buf,
|
||||
int64_t batch_size,
|
||||
IndexMeta index_meta,
|
||||
FieldDataMeta field_meta,
|
||||
std::string object_key) {
|
||||
auto field_data = CreateFieldData(DataType::INT8);
|
||||
field_data->FillFieldData(buf, batch_size);
|
||||
auto indexData = std::make_shared<IndexData>(field_data);
|
||||
indexData->set_index_meta(index_meta);
|
||||
indexData->SetFieldDataMeta(field_meta);
|
||||
auto serialized_index_data = indexData->serialize_to_remote_file();
|
||||
auto serialized_index_size = serialized_index_data.size();
|
||||
chunk_manager->Write(
|
||||
object_key, serialized_index_data.data(), serialized_index_size);
|
||||
return std::make_pair(std::move(object_key), serialized_index_size);
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Returns the current resident set size (physical memory use) measured
|
||||
// * in bytes, or zero if the value cannot be determined on this OS.
|
||||
// */
|
||||
// size_t
|
||||
// getCurrentRSS() {
|
||||
// #if defined(_WIN32)
|
||||
// /* Windows -------------------------------------------------- */
|
||||
// PROCESS_MEMORY_COUNTERS info;
|
||||
// GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
|
||||
// return (size_t)info.WorkingSetSize;
|
||||
|
||||
// #elif defined(__APPLE__) && defined(__MACH__)
|
||||
// /* OSX ------------------------------------------------------ */
|
||||
// struct mach_task_basic_info info;
|
||||
// mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
|
||||
// if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) != KERN_SUCCESS)
|
||||
// return (size_t)0L; /* Can't access? */
|
||||
// return (size_t)info.resident_size;
|
||||
|
||||
// #elif defined(__linux__) || defined(__linux) || defined(linux) || defined(__gnu_linux__)
|
||||
// /* Linux ---------------------------------------------------- */
|
||||
// long rss = 0L;
|
||||
// FILE* fp = NULL;
|
||||
// if ((fp = fopen("/proc/self/statm", "r")) == NULL)
|
||||
// return (size_t)0L; /* Can't open? */
|
||||
// if (fscanf(fp, "%*s%ld", &rss) != 1) {
|
||||
// fclose(fp);
|
||||
// return (size_t)0L; /* Can't read? */
|
||||
// }
|
||||
// fclose(fp);
|
||||
// return (size_t)rss * (size_t)sysconf(_SC_PAGESIZE);
|
||||
|
||||
// #else
|
||||
// /* AIX, BSD, Solaris, and Unknown OS ------------------------ */
|
||||
// return (size_t)0L; /* Unsupported. */
|
||||
// #endif
|
||||
// }
|
||||
|
||||
std::vector<FieldDataPtr>
|
||||
GetObjectData(ChunkManager* remote_chunk_manager,
|
||||
const std::vector<std::string>& remote_files) {
|
||||
auto& pool = ThreadPool::GetInstance();
|
||||
std::vector<std::future<std::unique_ptr<DataCodec>>> futures;
|
||||
for (auto& file : remote_files) {
|
||||
futures.emplace_back(pool.Submit(
|
||||
DownloadAndDecodeRemoteFile, remote_chunk_manager, file));
|
||||
}
|
||||
|
||||
std::vector<FieldDataPtr> datas;
|
||||
for (int i = 0; i < futures.size(); ++i) {
|
||||
auto res = futures[i].get();
|
||||
datas.emplace_back(res->GetFieldData());
|
||||
}
|
||||
|
||||
ReleaseArrowUnused();
|
||||
return datas;
|
||||
}
|
||||
|
||||
std::map<std::string, int64_t>
|
||||
PutIndexData(ChunkManager* remote_chunk_manager,
|
||||
const std::vector<const uint8_t*>& data_slices,
|
||||
const std::vector<int64_t>& slice_sizes,
|
||||
const std::vector<std::string>& slice_names,
|
||||
FieldDataMeta& field_meta,
|
||||
IndexMeta& index_meta) {
|
||||
auto& pool = ThreadPool::GetInstance();
|
||||
std::vector<std::future<std::pair<std::string, size_t>>> futures;
|
||||
AssertInfo(data_slices.size() == slice_sizes.size(),
|
||||
"inconsistent size of data slices with slice sizes!");
|
||||
AssertInfo(data_slices.size() == slice_names.size(),
|
||||
"inconsistent size of data slices with slice names!");
|
||||
|
||||
for (int64_t i = 0; i < data_slices.size(); ++i) {
|
||||
futures.push_back(pool.Submit(EncodeAndUploadIndexSlice,
|
||||
remote_chunk_manager,
|
||||
const_cast<uint8_t*>(data_slices[i]),
|
||||
slice_sizes[i],
|
||||
index_meta,
|
||||
field_meta,
|
||||
slice_names[i]));
|
||||
}
|
||||
|
||||
std::map<std::string, int64_t> remote_paths_to_size;
|
||||
for (auto& future : futures) {
|
||||
auto res = future.get();
|
||||
remote_paths_to_size[res.first] = res.second;
|
||||
}
|
||||
|
||||
ReleaseArrowUnused();
|
||||
return remote_paths_to_size;
|
||||
}
|
||||
|
||||
int64_t
|
||||
GetTotalNumRowsForFieldDatas(const std::vector<FieldDataPtr>& field_datas) {
|
||||
int64_t count = 0;
|
||||
for (auto& field_data : field_datas) {
|
||||
count += field_data->get_num_rows();
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
void
|
||||
ReleaseArrowUnused() {
|
||||
static std::mutex release_mutex;
|
||||
|
||||
// While multiple threads are releasing memory,
|
||||
// we don't need everyone do releasing,
|
||||
// just let some of them do this also works well
|
||||
if (release_mutex.try_lock()) {
|
||||
arrow::default_memory_pool()->ReleaseUnused();
|
||||
release_mutex.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
ChunkManagerPtr
|
||||
CreateChunkManager(const StorageConfig& storage_config) {
|
||||
auto storage_type = ChunkManagerType_Map[storage_config.storage_type];
|
||||
|
||||
switch (storage_type) {
|
||||
case ChunkManagerType::Local: {
|
||||
return std::make_shared<LocalChunkManager>(
|
||||
storage_config.root_path);
|
||||
}
|
||||
case ChunkManagerType::Minio: {
|
||||
return std::make_shared<MinioChunkManager>(storage_config);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FileManagerImplPtr
|
||||
CreateFileManager(IndexType index_type,
|
||||
const FieldDataMeta& field_meta,
|
||||
const IndexMeta& index_meta,
|
||||
const StorageConfig& storage_config) {
|
||||
// TODO :: switch case index type to create file manager
|
||||
#ifdef BUILD_DISK_ANN
|
||||
ChunkManagerPtr cm) {
|
||||
if (is_in_disk_list(index_type)) {
|
||||
return std::make_shared<DiskFileManagerImpl>(
|
||||
field_meta, index_meta, storage_config);
|
||||
field_meta, index_meta, cm);
|
||||
}
|
||||
#endif
|
||||
|
||||
return nullptr;
|
||||
return std::make_shared<MemFileManagerImpl>(field_meta, index_meta, cm);
|
||||
}
|
||||
|
||||
FieldDataPtr
|
||||
CreateFieldData(const DataType& type, int64_t dim, int64_t total_num_rows) {
|
||||
switch (type) {
|
||||
case DataType::BOOL:
|
||||
return std::make_shared<FieldData<bool>>(type, total_num_rows);
|
||||
case DataType::INT8:
|
||||
return std::make_shared<FieldData<int8_t>>(type, total_num_rows);
|
||||
case DataType::INT16:
|
||||
return std::make_shared<FieldData<int16_t>>(type, total_num_rows);
|
||||
case DataType::INT32:
|
||||
return std::make_shared<FieldData<int32_t>>(type, total_num_rows);
|
||||
case DataType::INT64:
|
||||
return std::make_shared<FieldData<int64_t>>(type, total_num_rows);
|
||||
case DataType::FLOAT:
|
||||
return std::make_shared<FieldData<float>>(type, total_num_rows);
|
||||
case DataType::DOUBLE:
|
||||
return std::make_shared<FieldData<double>>(type, total_num_rows);
|
||||
case DataType::STRING:
|
||||
case DataType::VARCHAR:
|
||||
return std::make_shared<FieldData<std::string>>(type,
|
||||
total_num_rows);
|
||||
case DataType::JSON:
|
||||
return std::make_shared<FieldData<Json>>(type, total_num_rows);
|
||||
case DataType::VECTOR_FLOAT:
|
||||
return std::make_shared<FieldData<FloatVector>>(
|
||||
dim, type, total_num_rows);
|
||||
case DataType::VECTOR_BINARY:
|
||||
return std::make_shared<FieldData<BinaryVector>>(
|
||||
dim, type, total_num_rows);
|
||||
default:
|
||||
throw NotSupportedDataTypeException(
|
||||
"CreateFieldData not support data type " + datatype_name(type));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace milvus::storage
|
||||
|
||||
@ -23,7 +23,10 @@
|
||||
#include "storage/PayloadStream.h"
|
||||
#include "storage/FileManager.h"
|
||||
#include "storage/BinlogReader.h"
|
||||
#include "storage/ChunkManager.h"
|
||||
#include "storage/DataCodec.h"
|
||||
#include "knowhere/comp/index_param.h"
|
||||
#include "parquet/schema.h"
|
||||
|
||||
namespace milvus::storage {
|
||||
|
||||
@ -55,36 +58,73 @@ CreateArrowSchema(DataType data_type);
|
||||
std::shared_ptr<arrow::Schema>
|
||||
CreateArrowSchema(DataType data_type, int dim);
|
||||
|
||||
int
|
||||
GetDimensionFromFileMetaData(const parquet::ColumnDescriptor* schema,
|
||||
DataType data_type);
|
||||
|
||||
int
|
||||
GetDimensionFromArrowArray(std::shared_ptr<arrow::Array> array,
|
||||
DataType data_type);
|
||||
|
||||
std::string
|
||||
GetLocalIndexPathPrefixWithBuildID(int64_t build_id);
|
||||
GetIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id);
|
||||
|
||||
std::string
|
||||
GenLocalIndexPathPrefix(int64_t build_id, int64_t index_version);
|
||||
GenIndexPathPrefix(ChunkManagerPtr cm, int64_t build_id, int64_t index_version);
|
||||
|
||||
std::string
|
||||
GenFieldRawDataPathPrefix(int64_t segment_id, int64_t field_id);
|
||||
GenFieldRawDataPathPrefix(ChunkManagerPtr cm,
|
||||
int64_t segment_id,
|
||||
int64_t field_id);
|
||||
|
||||
std::string
|
||||
GetSegmentRawDataPathPrefix(int64_t segment_id);
|
||||
GetSegmentRawDataPathPrefix(ChunkManagerPtr cm, int64_t segment_id);
|
||||
|
||||
template <typename T>
|
||||
inline bool
|
||||
is_in_list(const T& t, std::function<std::vector<T>()> list_func) {
|
||||
auto l = list_func();
|
||||
return std::find(l.begin(), l.end(), t) != l.end();
|
||||
}
|
||||
std::unique_ptr<DataCodec>
|
||||
DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager,
|
||||
const std::string& file);
|
||||
|
||||
bool
|
||||
is_in_disk_list(const IndexType& index_type);
|
||||
std::pair<std::string, size_t>
|
||||
EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
|
||||
uint8_t* buf,
|
||||
int64_t batch_size,
|
||||
IndexMeta index_meta,
|
||||
FieldDataMeta field_meta,
|
||||
std::string object_key);
|
||||
|
||||
std::vector<FieldDataPtr>
|
||||
GetObjectData(ChunkManager* remote_chunk_manager,
|
||||
const std::vector<std::string>& remote_files);
|
||||
|
||||
std::map<std::string, int64_t>
|
||||
PutIndexData(ChunkManager* remote_chunk_manager,
|
||||
const std::vector<const uint8_t*>& data_slices,
|
||||
const std::vector<int64_t>& slice_sizes,
|
||||
const std::vector<std::string>& slice_names,
|
||||
FieldDataMeta& field_meta,
|
||||
IndexMeta& index_meta);
|
||||
|
||||
int64_t
|
||||
GetTotalNumRowsForFieldDatas(const std::vector<FieldDataPtr>& field_datas);
|
||||
|
||||
void
|
||||
ReleaseArrowUnused();
|
||||
|
||||
// size_t
|
||||
// getCurrentRSS();
|
||||
|
||||
ChunkManagerPtr
|
||||
CreateChunkManager(const StorageConfig& storage_config);
|
||||
|
||||
FileManagerImplPtr
|
||||
CreateFileManager(IndexType index_type,
|
||||
const FieldDataMeta& field_meta,
|
||||
const IndexMeta& index_meta,
|
||||
const StorageConfig& storage_config);
|
||||
ChunkManagerPtr cm);
|
||||
|
||||
FieldDataPtr
|
||||
CreateFieldData(const DataType& type,
|
||||
int64_t dim = 1,
|
||||
int64_t total_num_rows = 0);
|
||||
|
||||
} // namespace milvus::storage
|
||||
|
||||
@ -21,24 +21,12 @@
|
||||
#include "storage/PayloadWriter.h"
|
||||
#include "storage/FieldData.h"
|
||||
#include "common/CGoHelper.h"
|
||||
#include "storage/Util.h"
|
||||
|
||||
using Payload = milvus::storage::Payload;
|
||||
using PayloadWriter = milvus::storage::PayloadWriter;
|
||||
using PayloadReader = milvus::storage::PayloadReader;
|
||||
|
||||
void
|
||||
ReleaseArrowUnused() {
|
||||
static std::mutex release_mutex;
|
||||
|
||||
// While multiple threads are releasing memory,
|
||||
// we don't need everyone do releasing,
|
||||
// just let some of them do this also works well
|
||||
if (release_mutex.try_lock()) {
|
||||
arrow::default_memory_pool()->ReleaseUnused();
|
||||
release_mutex.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" CPayloadWriter
|
||||
NewPayloadWriter(int columnType) {
|
||||
auto data_type = static_cast<milvus::DataType>(columnType);
|
||||
@ -227,7 +215,7 @@ ReleasePayloadWriter(CPayloadWriter handler) {
|
||||
auto p = reinterpret_cast<PayloadWriter*>(handler);
|
||||
if (p != nullptr) {
|
||||
delete p;
|
||||
ReleaseArrowUnused();
|
||||
milvus::storage::ReleaseArrowUnused();
|
||||
}
|
||||
}
|
||||
|
||||
@ -378,8 +366,9 @@ GetOneStringFromPayload(CPayloadReader payloadReader,
|
||||
try {
|
||||
auto p = reinterpret_cast<PayloadReader*>(payloadReader);
|
||||
auto field_data = p->get_field_data();
|
||||
*cstr = (char*)(const_cast<void*>(field_data->RawValue(idx)));
|
||||
*str_size = field_data->get_element_size(idx);
|
||||
auto str = const_cast<void*>(field_data->RawValue(idx));
|
||||
*cstr = (char*)(*static_cast<std::string*>(str)).c_str();
|
||||
*str_size = field_data->Size(idx);
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(UnexpectedError, e.what());
|
||||
@ -434,7 +423,8 @@ ReleasePayloadReader(CPayloadReader payloadReader) {
|
||||
"released payloadReader should not be null pointer");
|
||||
auto p = reinterpret_cast<PayloadReader*>(payloadReader);
|
||||
delete (p);
|
||||
ReleaseArrowUnused();
|
||||
|
||||
milvus::storage::ReleaseArrowUnused();
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(UnexpectedError, e.what());
|
||||
|
||||
@ -15,28 +15,67 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "storage/storage_c.h"
|
||||
#include "config/ConfigChunkManager.h"
|
||||
#include "common/CGoHelper.h"
|
||||
|
||||
#ifdef BUILD_DISK_ANN
|
||||
#include "storage/LocalChunkManager.h"
|
||||
#endif
|
||||
#include "storage/RemoteChunkManagerSingleton.h"
|
||||
#include "storage/LocalChunkManagerSingleton.h"
|
||||
|
||||
CStatus
|
||||
GetLocalUsedSize(int64_t* size) {
|
||||
GetLocalUsedSize(const char* c_dir, int64_t* size) {
|
||||
try {
|
||||
#ifdef BUILD_DISK_ANN
|
||||
auto& local_chunk_manager =
|
||||
milvus::storage::LocalChunkManager::GetInstance();
|
||||
auto dir = milvus::ChunkMangerConfig::GetLocalRootPath();
|
||||
if (local_chunk_manager.DirExist(dir)) {
|
||||
*size = local_chunk_manager.GetSizeOfDir(dir);
|
||||
auto local_chunk_manager =
|
||||
milvus::storage::LocalChunkManagerSingleton::GetInstance()
|
||||
.GetChunkManager();
|
||||
std::string dir(c_dir);
|
||||
if (local_chunk_manager->DirExist(dir)) {
|
||||
*size = local_chunk_manager->GetSizeOfDir(dir);
|
||||
} else {
|
||||
*size = 0;
|
||||
}
|
||||
#endif
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(UnexpectedError, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
InitLocalChunkManagerSingleton(const char* c_path) {
|
||||
try {
|
||||
std::string path(c_path);
|
||||
milvus::storage::LocalChunkManagerSingleton::GetInstance().Init(path);
|
||||
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(UnexpectedError, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
InitRemoteChunkManagerSingleton(CStorageConfig c_storage_config) {
|
||||
try {
|
||||
milvus::storage::StorageConfig storage_config;
|
||||
storage_config.address = std::string(c_storage_config.address);
|
||||
storage_config.bucket_name = std::string(c_storage_config.bucket_name);
|
||||
storage_config.access_key_id =
|
||||
std::string(c_storage_config.access_key_id);
|
||||
storage_config.access_key_value =
|
||||
std::string(c_storage_config.access_key_value);
|
||||
storage_config.root_path = std::string(c_storage_config.root_path);
|
||||
storage_config.storage_type =
|
||||
std::string(c_storage_config.storage_type);
|
||||
storage_config.iam_endpoint =
|
||||
std::string(c_storage_config.iam_endpoint);
|
||||
storage_config.useSSL = c_storage_config.useSSL;
|
||||
storage_config.useIAM = c_storage_config.useIAM;
|
||||
milvus::storage::RemoteChunkManagerSingleton::GetInstance().Init(
|
||||
storage_config);
|
||||
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(UnexpectedError, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
CleanRemoteChunkManagerSingleton() {
|
||||
milvus::storage::RemoteChunkManagerSingleton::GetInstance().Release();
|
||||
}
|
||||
|
||||
@ -22,7 +22,16 @@ extern "C" {
|
||||
#include "common/type_c.h"
|
||||
|
||||
CStatus
|
||||
GetLocalUsedSize(int64_t* size);
|
||||
GetLocalUsedSize(const char* c_path, int64_t* size);
|
||||
|
||||
CStatus
|
||||
InitLocalChunkManagerSingleton(const char* path);
|
||||
|
||||
CStatus
|
||||
InitRemoteChunkManagerSingleton(CStorageConfig c_storage_config);
|
||||
|
||||
void
|
||||
CleanRemoteChunkManagerSingleton();
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user