mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
enhance: remove unused code for StorageV2 (#35132)
issue: https://github.com/milvus-io/milvus/issues/34168 Signed-off-by: zhenshan.cao <zhenshan.cao@zilliz.com>
This commit is contained in:
parent
9412002d7d
commit
aa247f192d
10
Makefile
10
Makefile
@ -335,6 +335,16 @@ test-querycoord:
|
|||||||
@echo "Running go unittests..."
|
@echo "Running go unittests..."
|
||||||
@(env bash $(PWD)/scripts/run_go_unittest.sh -t querycoord)
|
@(env bash $(PWD)/scripts/run_go_unittest.sh -t querycoord)
|
||||||
|
|
||||||
|
generate-mockery-flushcommon: getdeps
|
||||||
|
$(INSTALL_PATH)/mockery --name=MetaCache --dir=$(PWD)/internal/flushcommon/metacache --output=$(PWD)/internal/flushcommon/metacache --filename=mock_meta_cache.go --with-expecter --structname=MockMetaCache --outpkg=metacache --inpackage
|
||||||
|
$(INSTALL_PATH)/mockery --name=SyncManager --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_sync_manager.go --with-expecter --structname=MockSyncManager --outpkg=syncmgr --inpackage
|
||||||
|
$(INSTALL_PATH)/mockery --name=MetaWriter --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_meta_writer.go --with-expecter --structname=MockMetaWriter --outpkg=syncmgr --inpackage
|
||||||
|
$(INSTALL_PATH)/mockery --name=Serializer --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_serializer.go --with-expecter --structname=MockSerializer --outpkg=syncmgr --inpackage
|
||||||
|
$(INSTALL_PATH)/mockery --name=Task --dir=$(PWD)/internal/flushcommon/syncmgr --output=$(PWD)/internal/flushcommon/syncmgr --filename=mock_task.go --with-expecter --structname=MockTask --outpkg=syncmgr --inpackage
|
||||||
|
$(INSTALL_PATH)/mockery --name=WriteBuffer --dir=$(PWD)/internal/flushcommon/writebuffer --output=$(PWD)/internal/flushcommon/writebuffer --filename=mock_write_buffer.go --with-expecter --structname=MockWriteBuffer --outpkg=writebuffer --inpackage
|
||||||
|
$(INSTALL_PATH)/mockery --name=BufferManager --dir=$(PWD)/internal/flushcommon/writebuffer --output=$(PWD)/internal/flushcommon/writebuffer --filename=mock_manager.go --with-expecter --structname=MockBufferManager --outpkg=writebuffer --inpackage
|
||||||
|
$(INSTALL_PATH)/mockery --name=FlowgraphManager --dir=$(PWD)/internal/flushcommon/pipeline --output=$(PWD)/internal/flushcommon/pipeline --filename=mock_fgmanager.go --with-expecter --structname=MockFlowgraphManager --outpkg=pipeline --inpackage
|
||||||
|
|
||||||
test-metastore:
|
test-metastore:
|
||||||
@echo "Running go unittests..."
|
@echo "Running go unittests..."
|
||||||
@(env bash $(PWD)/scripts/run_go_unittest.sh -t metastore)
|
@(env bash $(PWD)/scripts/run_go_unittest.sh -t metastore)
|
||||||
|
|||||||
2
go.mod
2
go.mod
@ -55,8 +55,6 @@ require (
|
|||||||
google.golang.org/grpc/examples v0.0.0-20220617181431-3e7b97febc7f
|
google.golang.org/grpc/examples v0.0.0-20220617181431-3e7b97febc7f
|
||||||
)
|
)
|
||||||
|
|
||||||
require github.com/milvus-io/milvus-storage/go v0.0.0-20231227072638-ebd0b8e56d70
|
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/bits-and-blooms/bitset v1.10.0
|
github.com/bits-and-blooms/bitset v1.10.0
|
||||||
github.com/cenkalti/backoff/v4 v4.2.1
|
github.com/cenkalti/backoff/v4 v4.2.1
|
||||||
|
|||||||
2
go.sum
2
go.sum
@ -608,8 +608,6 @@ github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZz
|
|||||||
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4=
|
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4=
|
||||||
github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240717062137-3ffb1db01632 h1:CXig0DNtUsCLzchCFe3PR2KgOdobbz9gK2nSV7195PM=
|
github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240717062137-3ffb1db01632 h1:CXig0DNtUsCLzchCFe3PR2KgOdobbz9gK2nSV7195PM=
|
||||||
github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240717062137-3ffb1db01632/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
|
github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240717062137-3ffb1db01632/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
|
||||||
github.com/milvus-io/milvus-storage/go v0.0.0-20231227072638-ebd0b8e56d70 h1:Z+sp64fmAOxAG7mU0dfVOXvAXlwRB0c8a96rIM5HevI=
|
|
||||||
github.com/milvus-io/milvus-storage/go v0.0.0-20231227072638-ebd0b8e56d70/go.mod h1:GPETMcTZq1gLY1WA6Na5kiNAKnq8SEMMiVKUZrM3sho=
|
|
||||||
github.com/milvus-io/pulsar-client-go v0.6.10 h1:eqpJjU+/QX0iIhEo3nhOqMNXL+TyInAs1IAHZCrCM/A=
|
github.com/milvus-io/pulsar-client-go v0.6.10 h1:eqpJjU+/QX0iIhEo3nhOqMNXL+TyInAs1IAHZCrCM/A=
|
||||||
github.com/milvus-io/pulsar-client-go v0.6.10/go.mod h1:lQqCkgwDF8YFYjKA+zOheTk1tev2B+bKj5j7+nm8M1w=
|
github.com/milvus-io/pulsar-client-go v0.6.10/go.mod h1:lQqCkgwDF8YFYjKA+zOheTk1tev2B+bKj5j7+nm8M1w=
|
||||||
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
|
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
|
||||||
|
|||||||
@ -21,7 +21,6 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "storage/MemFileManagerImpl.h"
|
#include "storage/MemFileManagerImpl.h"
|
||||||
#include "storage/space.h"
|
|
||||||
#include "pb/clustering.pb.h"
|
#include "pb/clustering.pb.h"
|
||||||
#include "knowhere/cluster/cluster_factory.h"
|
#include "knowhere/cluster/cluster_factory.h"
|
||||||
|
|
||||||
|
|||||||
@ -25,7 +25,6 @@
|
|||||||
#include "storage/ChunkManager.h"
|
#include "storage/ChunkManager.h"
|
||||||
#include "storage/DataCodec.h"
|
#include "storage/DataCodec.h"
|
||||||
#include "storage/Types.h"
|
#include "storage/Types.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::clustering {
|
namespace milvus::clustering {
|
||||||
|
|
||||||
|
|||||||
@ -30,4 +30,4 @@ set(MILVUS_EXEC_SRCS
|
|||||||
|
|
||||||
add_library(milvus_exec STATIC ${MILVUS_EXEC_SRCS})
|
add_library(milvus_exec STATIC ${MILVUS_EXEC_SRCS})
|
||||||
|
|
||||||
target_link_libraries(milvus_exec milvus_common milvus-storage ${CONAN_LIBS})
|
target_link_libraries(milvus_exec milvus_common ${CONAN_LIBS})
|
||||||
|
|||||||
@ -25,7 +25,6 @@
|
|||||||
#include "index/ScalarIndex.h"
|
#include "index/ScalarIndex.h"
|
||||||
#include "index/Utils.h"
|
#include "index/Utils.h"
|
||||||
#include "storage/Util.h"
|
#include "storage/Util.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
namespace index {
|
namespace index {
|
||||||
@ -42,20 +41,6 @@ BitmapIndex<T>::BitmapIndex(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
BitmapIndex<T>::BitmapIndex(
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space)
|
|
||||||
: is_built_(false),
|
|
||||||
schema_(file_manager_context.fieldDataMeta.field_schema),
|
|
||||||
space_(space) {
|
|
||||||
if (file_manager_context.Valid()) {
|
|
||||||
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
|
|
||||||
file_manager_context, space);
|
|
||||||
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
BitmapIndex<T>::Build(const Config& config) {
|
BitmapIndex<T>::Build(const Config& config) {
|
||||||
@ -101,32 +86,6 @@ BitmapIndex<T>::Build(size_t n, const T* data) {
|
|||||||
is_built_ = true;
|
is_built_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void
|
|
||||||
BitmapIndex<T>::BuildV2(const Config& config) {
|
|
||||||
if (is_built_) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
auto field_name = file_manager_->GetIndexMeta().field_name;
|
|
||||||
auto reader = space_->ScanData();
|
|
||||||
std::vector<FieldDataPtr> field_datas;
|
|
||||||
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
|
|
||||||
if (!rec.ok()) {
|
|
||||||
PanicInfo(DataFormatBroken, "failed to read data");
|
|
||||||
}
|
|
||||||
auto data = rec.ValueUnsafe();
|
|
||||||
auto total_num_rows = data->num_rows();
|
|
||||||
auto col_data = data->GetColumnByName(field_name);
|
|
||||||
// todo: support nullable index
|
|
||||||
auto field_data = storage::CreateFieldData(
|
|
||||||
DataType(GetDType<T>()), false, 0, total_num_rows);
|
|
||||||
field_data->FillFieldData(col_data);
|
|
||||||
field_datas.push_back(field_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
BuildWithFieldData(field_datas);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
BitmapIndex<T>::BuildPrimitiveField(
|
BitmapIndex<T>::BuildPrimitiveField(
|
||||||
@ -302,21 +261,6 @@ BitmapIndex<T>::Upload(const Config& config) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
BinarySet
|
|
||||||
BitmapIndex<T>::UploadV2(const Config& config) {
|
|
||||||
auto binary_set = Serialize(config);
|
|
||||||
|
|
||||||
file_manager_->AddFileV2(binary_set);
|
|
||||||
|
|
||||||
auto remote_path_to_size = file_manager_->GetRemotePathsToFileSize();
|
|
||||||
BinarySet ret;
|
|
||||||
for (auto& file : remote_path_to_size) {
|
|
||||||
ret.Append(file.first, nullptr, file.second);
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
BitmapIndex<T>::Load(const BinarySet& binary_set, const Config& config) {
|
BitmapIndex<T>::Load(const BinarySet& binary_set, const Config& config) {
|
||||||
@ -420,48 +364,6 @@ BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
|
|||||||
is_built_ = true;
|
is_built_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void
|
|
||||||
BitmapIndex<T>::LoadV2(const Config& config) {
|
|
||||||
auto blobs = space_->StatisticsBlobs();
|
|
||||||
std::vector<std::string> index_files;
|
|
||||||
auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
|
|
||||||
for (auto& b : blobs) {
|
|
||||||
if (b.name.rfind(prefix, 0) == 0) {
|
|
||||||
index_files.push_back(b.name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::map<std::string, FieldDataPtr> index_datas{};
|
|
||||||
for (auto& file_name : index_files) {
|
|
||||||
auto res = space_->GetBlobByteSize(file_name);
|
|
||||||
if (!res.ok()) {
|
|
||||||
PanicInfo(S3Error, "unable to read index blob");
|
|
||||||
}
|
|
||||||
auto index_blob_data =
|
|
||||||
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
|
|
||||||
auto status = space_->ReadBlob(file_name, index_blob_data.get());
|
|
||||||
if (!status.ok()) {
|
|
||||||
PanicInfo(S3Error, "unable to read index blob");
|
|
||||||
}
|
|
||||||
auto raw_index_blob =
|
|
||||||
storage::DeserializeFileData(index_blob_data, res.value());
|
|
||||||
auto key = file_name.substr(file_name.find_last_of('/') + 1);
|
|
||||||
index_datas[key] = raw_index_blob->GetFieldData();
|
|
||||||
}
|
|
||||||
AssembleIndexDatas(index_datas);
|
|
||||||
|
|
||||||
BinarySet binary_set;
|
|
||||||
for (auto& [key, data] : index_datas) {
|
|
||||||
auto size = data->Size();
|
|
||||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
|
||||||
auto buf = std::shared_ptr<uint8_t[]>(
|
|
||||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
|
||||||
binary_set.Append(key, buf, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
LoadWithoutAssemble(binary_set, config);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
BitmapIndex<T>::Load(milvus::tracer::TraceContext ctx, const Config& config) {
|
BitmapIndex<T>::Load(milvus::tracer::TraceContext ctx, const Config& config) {
|
||||||
|
|||||||
@ -25,7 +25,6 @@
|
|||||||
#include "storage/FileManager.h"
|
#include "storage/FileManager.h"
|
||||||
#include "storage/DiskFileManagerImpl.h"
|
#include "storage/DiskFileManagerImpl.h"
|
||||||
#include "storage/MemFileManagerImpl.h"
|
#include "storage/MemFileManagerImpl.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
namespace index {
|
namespace index {
|
||||||
@ -46,10 +45,6 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||||||
const storage::FileManagerContext& file_manager_context =
|
const storage::FileManagerContext& file_manager_context =
|
||||||
storage::FileManagerContext());
|
storage::FileManagerContext());
|
||||||
|
|
||||||
explicit BitmapIndex(
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
|
|
||||||
~BitmapIndex() override = default;
|
~BitmapIndex() override = default;
|
||||||
|
|
||||||
BinarySet
|
BinarySet
|
||||||
@ -61,9 +56,6 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||||||
void
|
void
|
||||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||||
|
|
||||||
void
|
|
||||||
LoadV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
Count() override {
|
Count() override {
|
||||||
return total_num_rows_;
|
return total_num_rows_;
|
||||||
@ -83,9 +75,6 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||||||
void
|
void
|
||||||
BuildWithFieldData(const std::vector<FieldDataPtr>& datas) override;
|
BuildWithFieldData(const std::vector<FieldDataPtr>& datas) override;
|
||||||
|
|
||||||
void
|
|
||||||
BuildV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
const TargetBitmap
|
const TargetBitmap
|
||||||
In(size_t n, const T* values) override;
|
In(size_t n, const T* values) override;
|
||||||
|
|
||||||
@ -112,9 +101,6 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||||||
BinarySet
|
BinarySet
|
||||||
Upload(const Config& config = {}) override;
|
Upload(const Config& config = {}) override;
|
||||||
|
|
||||||
BinarySet
|
|
||||||
UploadV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
const bool
|
const bool
|
||||||
HasRawData() const override {
|
HasRawData() const override {
|
||||||
if (schema_.data_type() == proto::schema::DataType::Array) {
|
if (schema_.data_type() == proto::schema::DataType::Array) {
|
||||||
@ -195,7 +181,6 @@ class BitmapIndex : public ScalarIndex<T> {
|
|||||||
size_t total_num_rows_{0};
|
size_t total_num_rows_{0};
|
||||||
proto::schema::FieldSchema schema_;
|
proto::schema::FieldSchema schema_;
|
||||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||||
std::shared_ptr<milvus_storage::Space> space_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace index
|
} // namespace index
|
||||||
|
|||||||
@ -26,6 +26,6 @@ set(INDEX_FILES
|
|||||||
milvus_add_pkg_config("milvus_index")
|
milvus_add_pkg_config("milvus_index")
|
||||||
add_library(milvus_index SHARED ${INDEX_FILES})
|
add_library(milvus_index SHARED ${INDEX_FILES})
|
||||||
|
|
||||||
target_link_libraries(milvus_index milvus_storage milvus-storage tantivy_binding)
|
target_link_libraries(milvus_index milvus_storage tantivy_binding)
|
||||||
|
|
||||||
install(TARGETS milvus_index DESTINATION "${CMAKE_INSTALL_LIBDIR}")
|
install(TARGETS milvus_index DESTINATION "${CMAKE_INSTALL_LIBDIR}")
|
||||||
|
|||||||
@ -23,7 +23,6 @@
|
|||||||
#include "index/ScalarIndex.h"
|
#include "index/ScalarIndex.h"
|
||||||
#include "index/Utils.h"
|
#include "index/Utils.h"
|
||||||
#include "storage/Util.h"
|
#include "storage/Util.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
namespace index {
|
namespace index {
|
||||||
@ -43,23 +42,6 @@ HybridScalarIndex<T>::HybridScalarIndex(
|
|||||||
internal_index_type_ = ScalarIndexType::NONE;
|
internal_index_type_ = ScalarIndexType::NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
HybridScalarIndex<T>::HybridScalarIndex(
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space)
|
|
||||||
: is_built_(false),
|
|
||||||
bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND),
|
|
||||||
file_manager_context_(file_manager_context),
|
|
||||||
space_(space) {
|
|
||||||
if (file_manager_context.Valid()) {
|
|
||||||
mem_file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
|
|
||||||
file_manager_context, space);
|
|
||||||
AssertInfo(mem_file_manager_ != nullptr, "create file manager failed!");
|
|
||||||
}
|
|
||||||
field_type_ = file_manager_context.fieldDataMeta.field_schema.data_type();
|
|
||||||
internal_index_type_ = ScalarIndexType::NONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
ScalarIndexType
|
ScalarIndexType
|
||||||
HybridScalarIndex<T>::SelectIndexBuildType(size_t n, const T* values) {
|
HybridScalarIndex<T>::SelectIndexBuildType(size_t n, const T* values) {
|
||||||
@ -274,39 +256,6 @@ HybridScalarIndex<T>::Build(const Config& config) {
|
|||||||
is_built_ = true;
|
is_built_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void
|
|
||||||
HybridScalarIndex<T>::BuildV2(const Config& config) {
|
|
||||||
if (is_built_) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
bitmap_index_cardinality_limit_ =
|
|
||||||
GetBitmapCardinalityLimitFromConfig(config);
|
|
||||||
LOG_INFO("config bitmap cardinality limit to {}",
|
|
||||||
bitmap_index_cardinality_limit_);
|
|
||||||
|
|
||||||
auto field_name = mem_file_manager_->GetIndexMeta().field_name;
|
|
||||||
auto reader = space_->ScanData();
|
|
||||||
std::vector<FieldDataPtr> field_datas;
|
|
||||||
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
|
|
||||||
if (!rec.ok()) {
|
|
||||||
PanicInfo(DataFormatBroken, "failed to read data");
|
|
||||||
}
|
|
||||||
auto data = rec.ValueUnsafe();
|
|
||||||
auto total_num_rows = data->num_rows();
|
|
||||||
auto col_data = data->GetColumnByName(field_name);
|
|
||||||
// todo: support nullable index
|
|
||||||
auto field_data = storage::CreateFieldData(
|
|
||||||
DataType(GetDType<T>()), false, 0, total_num_rows);
|
|
||||||
field_data->FillFieldData(col_data);
|
|
||||||
field_datas.push_back(field_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
SelectIndexBuildType(field_datas);
|
|
||||||
BuildInternal(field_datas);
|
|
||||||
is_built_ = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
BinarySet
|
BinarySet
|
||||||
HybridScalarIndex<T>::Serialize(const Config& config) {
|
HybridScalarIndex<T>::Serialize(const Config& config) {
|
||||||
@ -356,21 +305,6 @@ HybridScalarIndex<T>::Upload(const Config& config) {
|
|||||||
return index_ret;
|
return index_ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
BinarySet
|
|
||||||
HybridScalarIndex<T>::UploadV2(const Config& config) {
|
|
||||||
auto internal_index = GetInternalIndex();
|
|
||||||
auto index_ret = internal_index->Upload(config);
|
|
||||||
|
|
||||||
auto index_type_ret = SerializeIndexType();
|
|
||||||
|
|
||||||
for (auto& [key, value] : index_type_ret.binary_map_) {
|
|
||||||
index_ret.Append(key, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
return index_ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
HybridScalarIndex<T>::DeserializeIndexType(const BinarySet& binary_set) {
|
HybridScalarIndex<T>::DeserializeIndexType(const BinarySet& binary_set) {
|
||||||
@ -380,12 +314,6 @@ HybridScalarIndex<T>::DeserializeIndexType(const BinarySet& binary_set) {
|
|||||||
internal_index_type_ = static_cast<ScalarIndexType>(index_type);
|
internal_index_type_ = static_cast<ScalarIndexType>(index_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void
|
|
||||||
HybridScalarIndex<T>::LoadV2(const Config& config) {
|
|
||||||
PanicInfo(Unsupported, "HybridScalarIndex LoadV2 not implemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::string
|
std::string
|
||||||
HybridScalarIndex<T>::GetRemoteIndexTypeFile(
|
HybridScalarIndex<T>::GetRemoteIndexTypeFile(
|
||||||
|
|||||||
@ -28,7 +28,6 @@
|
|||||||
#include "storage/FileManager.h"
|
#include "storage/FileManager.h"
|
||||||
#include "storage/DiskFileManagerImpl.h"
|
#include "storage/DiskFileManagerImpl.h"
|
||||||
#include "storage/MemFileManagerImpl.h"
|
#include "storage/MemFileManagerImpl.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus {
|
namespace milvus {
|
||||||
namespace index {
|
namespace index {
|
||||||
@ -46,10 +45,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||||||
const storage::FileManagerContext& file_manager_context =
|
const storage::FileManagerContext& file_manager_context =
|
||||||
storage::FileManagerContext());
|
storage::FileManagerContext());
|
||||||
|
|
||||||
explicit HybridScalarIndex(
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
|
|
||||||
~HybridScalarIndex() override = default;
|
~HybridScalarIndex() override = default;
|
||||||
|
|
||||||
BinarySet
|
BinarySet
|
||||||
@ -61,9 +56,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||||||
void
|
void
|
||||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||||
|
|
||||||
void
|
|
||||||
LoadV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
Count() override {
|
Count() override {
|
||||||
return internal_index_->Count();
|
return internal_index_->Count();
|
||||||
@ -85,9 +77,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||||||
void
|
void
|
||||||
Build(const Config& config = {}) override;
|
Build(const Config& config = {}) override;
|
||||||
|
|
||||||
void
|
|
||||||
BuildV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
const TargetBitmap
|
const TargetBitmap
|
||||||
In(size_t n, const T* values) override {
|
In(size_t n, const T* values) override {
|
||||||
return internal_index_->In(n, values);
|
return internal_index_->In(n, values);
|
||||||
@ -133,9 +122,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||||||
BinarySet
|
BinarySet
|
||||||
Upload(const Config& config = {}) override;
|
Upload(const Config& config = {}) override;
|
||||||
|
|
||||||
BinarySet
|
|
||||||
UploadV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ScalarIndexType
|
ScalarIndexType
|
||||||
SelectBuildTypeForPrimitiveType(
|
SelectBuildTypeForPrimitiveType(
|
||||||
@ -173,7 +159,6 @@ class HybridScalarIndex : public ScalarIndex<T> {
|
|||||||
std::shared_ptr<ScalarIndex<T>> internal_index_{nullptr};
|
std::shared_ptr<ScalarIndex<T>> internal_index_{nullptr};
|
||||||
storage::FileManagerContext file_manager_context_;
|
storage::FileManagerContext file_manager_context_;
|
||||||
std::shared_ptr<storage::MemFileManagerImpl> mem_file_manager_{nullptr};
|
std::shared_ptr<storage::MemFileManagerImpl> mem_file_manager_{nullptr};
|
||||||
std::shared_ptr<milvus_storage::Space> space_{nullptr};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace index
|
} // namespace index
|
||||||
|
|||||||
@ -44,9 +44,6 @@ class IndexBase {
|
|||||||
virtual void
|
virtual void
|
||||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) = 0;
|
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) = 0;
|
||||||
|
|
||||||
virtual void
|
|
||||||
LoadV2(const Config& config = {}) = 0;
|
|
||||||
|
|
||||||
virtual void
|
virtual void
|
||||||
BuildWithRawData(size_t n,
|
BuildWithRawData(size_t n,
|
||||||
const void* values,
|
const void* values,
|
||||||
@ -58,18 +55,12 @@ class IndexBase {
|
|||||||
virtual void
|
virtual void
|
||||||
Build(const Config& config = {}) = 0;
|
Build(const Config& config = {}) = 0;
|
||||||
|
|
||||||
virtual void
|
|
||||||
BuildV2(const Config& Config = {}) = 0;
|
|
||||||
|
|
||||||
virtual int64_t
|
virtual int64_t
|
||||||
Count() = 0;
|
Count() = 0;
|
||||||
|
|
||||||
virtual BinarySet
|
virtual BinarySet
|
||||||
Upload(const Config& config = {}) = 0;
|
Upload(const Config& config = {}) = 0;
|
||||||
|
|
||||||
virtual BinarySet
|
|
||||||
UploadV2(const Config& config = {}) = 0;
|
|
||||||
|
|
||||||
virtual const bool
|
virtual const bool
|
||||||
HasRawData() const = 0;
|
HasRawData() const = 0;
|
||||||
|
|
||||||
|
|||||||
@ -78,51 +78,6 @@ IndexFactory::CreatePrimitiveScalarIndex<std::string>(
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
ScalarIndexPtr<T>
|
|
||||||
IndexFactory::CreatePrimitiveScalarIndex(
|
|
||||||
const IndexType& index_type,
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space) {
|
|
||||||
if (index_type == INVERTED_INDEX_TYPE) {
|
|
||||||
return std::make_unique<InvertedIndexTantivy<T>>(file_manager_context,
|
|
||||||
space);
|
|
||||||
}
|
|
||||||
if (index_type == BITMAP_INDEX_TYPE) {
|
|
||||||
return std::make_unique<BitmapIndex<T>>(file_manager_context, space);
|
|
||||||
}
|
|
||||||
if (index_type == HYBRID_INDEX_TYPE) {
|
|
||||||
return std::make_unique<HybridScalarIndex<T>>(file_manager_context,
|
|
||||||
space);
|
|
||||||
}
|
|
||||||
return CreateScalarIndexSort<T>(file_manager_context, space);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
ScalarIndexPtr<std::string>
|
|
||||||
IndexFactory::CreatePrimitiveScalarIndex<std::string>(
|
|
||||||
const IndexType& index_type,
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space) {
|
|
||||||
#if defined(__linux__) || defined(__APPLE__)
|
|
||||||
if (index_type == INVERTED_INDEX_TYPE) {
|
|
||||||
return std::make_unique<InvertedIndexTantivy<std::string>>(
|
|
||||||
file_manager_context, space);
|
|
||||||
}
|
|
||||||
if (index_type == BITMAP_INDEX_TYPE) {
|
|
||||||
return std::make_unique<BitmapIndex<std::string>>(file_manager_context,
|
|
||||||
space);
|
|
||||||
}
|
|
||||||
if (index_type == HYBRID_INDEX_TYPE) {
|
|
||||||
return std::make_unique<HybridScalarIndex<std::string>>(
|
|
||||||
file_manager_context, space);
|
|
||||||
}
|
|
||||||
return CreateStringIndexMarisa(file_manager_context, space);
|
|
||||||
#else
|
|
||||||
PanicInfo(Unsupported, "unsupported platform");
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
IndexBasePtr
|
IndexBasePtr
|
||||||
IndexFactory::CreateIndex(
|
IndexFactory::CreateIndex(
|
||||||
const CreateIndexInfo& create_index_info,
|
const CreateIndexInfo& create_index_info,
|
||||||
@ -134,19 +89,6 @@ IndexFactory::CreateIndex(
|
|||||||
return CreateScalarIndex(create_index_info, file_manager_context);
|
return CreateScalarIndex(create_index_info, file_manager_context);
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexBasePtr
|
|
||||||
IndexFactory::CreateIndex(
|
|
||||||
const CreateIndexInfo& create_index_info,
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space) {
|
|
||||||
if (IsVectorDataType(create_index_info.field_type)) {
|
|
||||||
return CreateVectorIndex(
|
|
||||||
create_index_info, file_manager_context, space);
|
|
||||||
}
|
|
||||||
|
|
||||||
return CreateScalarIndex(create_index_info, file_manager_context, space);
|
|
||||||
}
|
|
||||||
|
|
||||||
IndexBasePtr
|
IndexBasePtr
|
||||||
IndexFactory::CreatePrimitiveScalarIndex(
|
IndexFactory::CreatePrimitiveScalarIndex(
|
||||||
DataType data_type,
|
DataType data_type,
|
||||||
@ -307,90 +249,4 @@ IndexFactory::CreateVectorIndex(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexBasePtr
|
|
||||||
IndexFactory::CreateVectorIndex(
|
|
||||||
const CreateIndexInfo& create_index_info,
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space) {
|
|
||||||
auto data_type = create_index_info.field_type;
|
|
||||||
auto index_type = create_index_info.index_type;
|
|
||||||
auto metric_type = create_index_info.metric_type;
|
|
||||||
auto version = create_index_info.index_engine_version;
|
|
||||||
|
|
||||||
if (knowhere::UseDiskLoad(index_type, version)) {
|
|
||||||
switch (data_type) {
|
|
||||||
case DataType::VECTOR_FLOAT: {
|
|
||||||
return std::make_unique<VectorDiskAnnIndex<float>>(
|
|
||||||
index_type,
|
|
||||||
metric_type,
|
|
||||||
version,
|
|
||||||
space,
|
|
||||||
file_manager_context);
|
|
||||||
}
|
|
||||||
case DataType::VECTOR_FLOAT16: {
|
|
||||||
return std::make_unique<VectorDiskAnnIndex<float16>>(
|
|
||||||
index_type,
|
|
||||||
metric_type,
|
|
||||||
version,
|
|
||||||
space,
|
|
||||||
file_manager_context);
|
|
||||||
}
|
|
||||||
case DataType::VECTOR_BFLOAT16: {
|
|
||||||
return std::make_unique<VectorDiskAnnIndex<bfloat16>>(
|
|
||||||
index_type,
|
|
||||||
metric_type,
|
|
||||||
version,
|
|
||||||
space,
|
|
||||||
file_manager_context);
|
|
||||||
}
|
|
||||||
case DataType::VECTOR_BINARY: {
|
|
||||||
return std::make_unique<VectorDiskAnnIndex<bin1>>(
|
|
||||||
index_type,
|
|
||||||
metric_type,
|
|
||||||
version,
|
|
||||||
space,
|
|
||||||
file_manager_context);
|
|
||||||
}
|
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
|
||||||
return std::make_unique<VectorDiskAnnIndex<float>>(
|
|
||||||
index_type,
|
|
||||||
metric_type,
|
|
||||||
version,
|
|
||||||
space,
|
|
||||||
file_manager_context);
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
PanicInfo(
|
|
||||||
DataTypeInvalid,
|
|
||||||
fmt::format("invalid data type to build disk index: {}",
|
|
||||||
data_type));
|
|
||||||
}
|
|
||||||
} else { // create mem index
|
|
||||||
switch (data_type) {
|
|
||||||
case DataType::VECTOR_FLOAT:
|
|
||||||
case DataType::VECTOR_SPARSE_FLOAT: {
|
|
||||||
return std::make_unique<VectorMemIndex<float>>(
|
|
||||||
create_index_info, file_manager_context, space);
|
|
||||||
}
|
|
||||||
case DataType::VECTOR_BINARY: {
|
|
||||||
return std::make_unique<VectorMemIndex<bin1>>(
|
|
||||||
create_index_info, file_manager_context, space);
|
|
||||||
}
|
|
||||||
case DataType::VECTOR_FLOAT16: {
|
|
||||||
return std::make_unique<VectorMemIndex<float16>>(
|
|
||||||
create_index_info, file_manager_context, space);
|
|
||||||
}
|
|
||||||
case DataType::VECTOR_BFLOAT16: {
|
|
||||||
return std::make_unique<VectorMemIndex<bfloat16>>(
|
|
||||||
create_index_info, file_manager_context, space);
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
PanicInfo(
|
|
||||||
DataTypeInvalid,
|
|
||||||
fmt::format("invalid data type to build mem index: {}",
|
|
||||||
data_type));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // namespace milvus::index
|
} // namespace milvus::index
|
||||||
|
|||||||
@ -32,7 +32,6 @@
|
|||||||
#include "index/ScalarIndexSort.h"
|
#include "index/ScalarIndexSort.h"
|
||||||
#include "index/StringIndexMarisa.h"
|
#include "index/StringIndexMarisa.h"
|
||||||
#include "index/BoolIndex.h"
|
#include "index/BoolIndex.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::index {
|
namespace milvus::index {
|
||||||
|
|
||||||
@ -56,11 +55,6 @@ class IndexFactory {
|
|||||||
CreateIndex(const CreateIndexInfo& create_index_info,
|
CreateIndex(const CreateIndexInfo& create_index_info,
|
||||||
const storage::FileManagerContext& file_manager_context);
|
const storage::FileManagerContext& file_manager_context);
|
||||||
|
|
||||||
IndexBasePtr
|
|
||||||
CreateIndex(const CreateIndexInfo& create_index_info,
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
|
|
||||||
IndexBasePtr
|
IndexBasePtr
|
||||||
CreateVectorIndex(const CreateIndexInfo& create_index_info,
|
CreateVectorIndex(const CreateIndexInfo& create_index_info,
|
||||||
const storage::FileManagerContext& file_manager_context);
|
const storage::FileManagerContext& file_manager_context);
|
||||||
@ -92,19 +86,6 @@ class IndexFactory {
|
|||||||
const storage::FileManagerContext& file_manager_context =
|
const storage::FileManagerContext& file_manager_context =
|
||||||
storage::FileManagerContext());
|
storage::FileManagerContext());
|
||||||
|
|
||||||
IndexBasePtr
|
|
||||||
CreateVectorIndex(const CreateIndexInfo& create_index_info,
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
|
|
||||||
IndexBasePtr
|
|
||||||
CreateScalarIndex(const CreateIndexInfo& create_index_info,
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space) {
|
|
||||||
PanicInfo(ErrorCode::Unsupported,
|
|
||||||
"CreateScalarIndexV2 not implemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
// IndexBasePtr
|
// IndexBasePtr
|
||||||
// CreateIndex(DataType dtype, const IndexType& index_type);
|
// CreateIndex(DataType dtype, const IndexType& index_type);
|
||||||
private:
|
private:
|
||||||
@ -115,12 +96,6 @@ class IndexFactory {
|
|||||||
CreatePrimitiveScalarIndex(const IndexType& index_type,
|
CreatePrimitiveScalarIndex(const IndexType& index_type,
|
||||||
const storage::FileManagerContext& file_manager =
|
const storage::FileManagerContext& file_manager =
|
||||||
storage::FileManagerContext());
|
storage::FileManagerContext());
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
ScalarIndexPtr<T>
|
|
||||||
CreatePrimitiveScalarIndex(const IndexType& index_type,
|
|
||||||
const storage::FileManagerContext& file_manager,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace milvus::index
|
} // namespace milvus::index
|
||||||
|
|||||||
@ -65,11 +65,10 @@ get_tantivy_data_type(const proto::schema::FieldSchema& schema) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
InvertedIndexTantivy<T>::InvertedIndexTantivy(
|
InvertedIndexTantivy<T>::InvertedIndexTantivy(
|
||||||
const storage::FileManagerContext& ctx,
|
const storage::FileManagerContext& ctx)
|
||||||
std::shared_ptr<milvus_storage::Space> space)
|
: schema_(ctx.fieldDataMeta.field_schema) {
|
||||||
: space_(space), schema_(ctx.fieldDataMeta.field_schema) {
|
mem_file_manager_ = std::make_shared<MemFileManager>(ctx);
|
||||||
mem_file_manager_ = std::make_shared<MemFileManager>(ctx, ctx.space_);
|
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx);
|
||||||
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx, ctx.space_);
|
|
||||||
auto field =
|
auto field =
|
||||||
std::to_string(disk_file_manager_->GetFieldDataMeta().field_id);
|
std::to_string(disk_file_manager_->GetFieldDataMeta().field_id);
|
||||||
auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix();
|
auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix();
|
||||||
@ -139,12 +138,6 @@ InvertedIndexTantivy<T>::Upload(const Config& config) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
BinarySet
|
|
||||||
InvertedIndexTantivy<T>::UploadV2(const Config& config) {
|
|
||||||
return Upload(config);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
InvertedIndexTantivy<T>::Build(const Config& config) {
|
InvertedIndexTantivy<T>::Build(const Config& config) {
|
||||||
@ -156,28 +149,6 @@ InvertedIndexTantivy<T>::Build(const Config& config) {
|
|||||||
BuildWithFieldData(field_datas);
|
BuildWithFieldData(field_datas);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void
|
|
||||||
InvertedIndexTantivy<T>::BuildV2(const Config& config) {
|
|
||||||
auto field_name = mem_file_manager_->GetIndexMeta().field_name;
|
|
||||||
auto reader = space_->ScanData();
|
|
||||||
std::vector<FieldDataPtr> field_datas;
|
|
||||||
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
|
|
||||||
if (!rec.ok()) {
|
|
||||||
PanicInfo(DataFormatBroken, "failed to read data");
|
|
||||||
}
|
|
||||||
auto data = rec.ValueUnsafe();
|
|
||||||
auto total_num_rows = data->num_rows();
|
|
||||||
auto col_data = data->GetColumnByName(field_name);
|
|
||||||
// todo: support nullable index
|
|
||||||
auto field_data = storage::CreateFieldData(
|
|
||||||
DataType(GetDType<T>()), false, 0, total_num_rows);
|
|
||||||
field_data->FillFieldData(col_data);
|
|
||||||
field_datas.push_back(field_data);
|
|
||||||
}
|
|
||||||
BuildWithFieldData(field_datas);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
|
InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
|
||||||
@ -201,14 +172,6 @@ InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
|
|||||||
wrapper_ = std::make_shared<TantivyIndexWrapper>(prefix.c_str());
|
wrapper_ = std::make_shared<TantivyIndexWrapper>(prefix.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void
|
|
||||||
InvertedIndexTantivy<T>::LoadV2(const Config& config) {
|
|
||||||
disk_file_manager_->CacheIndexToDisk();
|
|
||||||
auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix();
|
|
||||||
wrapper_ = std::make_shared<TantivyIndexWrapper>(prefix.c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void
|
inline void
|
||||||
apply_hits(TargetBitmap& bitset, const RustArrayWrapper& w, bool v) {
|
apply_hits(TargetBitmap& bitset, const RustArrayWrapper& w, bool v) {
|
||||||
for (size_t j = 0; j < w.array_.len; j++) {
|
for (size_t j = 0; j < w.array_.len; j++) {
|
||||||
|
|||||||
@ -18,7 +18,6 @@
|
|||||||
#include "tantivy-binding.h"
|
#include "tantivy-binding.h"
|
||||||
#include "tantivy-wrapper.h"
|
#include "tantivy-wrapper.h"
|
||||||
#include "index/StringIndex.h"
|
#include "index/StringIndex.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::index {
|
namespace milvus::index {
|
||||||
|
|
||||||
@ -34,13 +33,7 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||||||
using DiskFileManagerPtr = std::shared_ptr<DiskFileManager>;
|
using DiskFileManagerPtr = std::shared_ptr<DiskFileManager>;
|
||||||
|
|
||||||
InvertedIndexTantivy() = default;
|
InvertedIndexTantivy() = default;
|
||||||
|
explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx);
|
||||||
explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx)
|
|
||||||
: InvertedIndexTantivy(ctx, nullptr) {
|
|
||||||
}
|
|
||||||
|
|
||||||
explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
|
|
||||||
~InvertedIndexTantivy();
|
~InvertedIndexTantivy();
|
||||||
|
|
||||||
@ -56,9 +49,6 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||||||
void
|
void
|
||||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||||
|
|
||||||
void
|
|
||||||
LoadV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* deprecated.
|
* deprecated.
|
||||||
* TODO: why not remove this?
|
* TODO: why not remove this?
|
||||||
@ -78,9 +68,6 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||||||
void
|
void
|
||||||
Build(const Config& config = {}) override;
|
Build(const Config& config = {}) override;
|
||||||
|
|
||||||
void
|
|
||||||
BuildV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
Count() override {
|
Count() override {
|
||||||
return wrapper_->count();
|
return wrapper_->count();
|
||||||
@ -102,9 +89,6 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||||||
BinarySet
|
BinarySet
|
||||||
Upload(const Config& config = {}) override;
|
Upload(const Config& config = {}) override;
|
||||||
|
|
||||||
BinarySet
|
|
||||||
UploadV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* deprecated, only used in small chunk index.
|
* deprecated, only used in small chunk index.
|
||||||
*/
|
*/
|
||||||
@ -196,6 +180,5 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
|
|||||||
*/
|
*/
|
||||||
MemFileManagerPtr mem_file_manager_;
|
MemFileManagerPtr mem_file_manager_;
|
||||||
DiskFileManagerPtr disk_file_manager_;
|
DiskFileManagerPtr disk_file_manager_;
|
||||||
std::shared_ptr<milvus_storage::Space> space_;
|
|
||||||
};
|
};
|
||||||
} // namespace milvus::index
|
} // namespace milvus::index
|
||||||
|
|||||||
@ -44,73 +44,6 @@ ScalarIndexSort<T>::ScalarIndexSort(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline ScalarIndexSort<T>::ScalarIndexSort(
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space)
|
|
||||||
: is_built_(false), data_(), space_(space) {
|
|
||||||
if (file_manager_context.Valid()) {
|
|
||||||
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
|
|
||||||
file_manager_context, space);
|
|
||||||
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void
|
|
||||||
ScalarIndexSort<T>::BuildV2(const Config& config) {
|
|
||||||
if (is_built_) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
auto field_name = file_manager_->GetIndexMeta().field_name;
|
|
||||||
auto reader = space_->ScanData();
|
|
||||||
std::vector<FieldDataPtr> field_datas;
|
|
||||||
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
|
|
||||||
if (!rec.ok()) {
|
|
||||||
PanicInfo(DataFormatBroken, "failed to read data");
|
|
||||||
}
|
|
||||||
auto data = rec.ValueUnsafe();
|
|
||||||
auto total_num_rows = data->num_rows();
|
|
||||||
auto col_data = data->GetColumnByName(field_name);
|
|
||||||
auto nullable =
|
|
||||||
col_data->type()->id() == arrow::Type::NA ? true : false;
|
|
||||||
// will support build scalar index when nullable in the future just skip it
|
|
||||||
// now, not support to build index in nullable field_data
|
|
||||||
// todo: support nullable index
|
|
||||||
AssertInfo(!nullable,
|
|
||||||
"not support to build index in nullable field_data");
|
|
||||||
auto field_data = storage::CreateFieldData(
|
|
||||||
DataType(GetDType<T>()), nullable, 0, total_num_rows);
|
|
||||||
field_data->FillFieldData(col_data);
|
|
||||||
field_datas.push_back(field_data);
|
|
||||||
}
|
|
||||||
int64_t total_num_rows = 0;
|
|
||||||
for (const auto& data : field_datas) {
|
|
||||||
total_num_rows += data->get_num_rows();
|
|
||||||
}
|
|
||||||
if (total_num_rows == 0) {
|
|
||||||
PanicInfo(DataIsEmpty, "ScalarIndexSort cannot build null values!");
|
|
||||||
}
|
|
||||||
|
|
||||||
data_.reserve(total_num_rows);
|
|
||||||
int64_t offset = 0;
|
|
||||||
for (const auto& data : field_datas) {
|
|
||||||
auto slice_num = data->get_num_rows();
|
|
||||||
for (size_t i = 0; i < slice_num; ++i) {
|
|
||||||
auto value = reinterpret_cast<const T*>(data->RawValue(i));
|
|
||||||
data_.emplace_back(IndexStructure(*value, offset));
|
|
||||||
offset++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::sort(data_.begin(), data_.end());
|
|
||||||
idx_to_offsets_.resize(total_num_rows);
|
|
||||||
for (size_t i = 0; i < total_num_rows; ++i) {
|
|
||||||
idx_to_offsets_[data_[i].idx_] = i;
|
|
||||||
}
|
|
||||||
is_built_ = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
ScalarIndexSort<T>::Build(const Config& config) {
|
ScalarIndexSort<T>::Build(const Config& config) {
|
||||||
@ -215,21 +148,6 @@ ScalarIndexSort<T>::Upload(const Config& config) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
BinarySet
|
|
||||||
ScalarIndexSort<T>::UploadV2(const Config& config) {
|
|
||||||
auto binary_set = Serialize(config);
|
|
||||||
file_manager_->AddFileV2(binary_set);
|
|
||||||
|
|
||||||
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
|
|
||||||
BinarySet ret;
|
|
||||||
for (auto& file : remote_paths_to_size) {
|
|
||||||
ret.Append(file.first, nullptr, file.second);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
ScalarIndexSort<T>::LoadWithoutAssemble(const BinarySet& index_binary,
|
ScalarIndexSort<T>::LoadWithoutAssemble(const BinarySet& index_binary,
|
||||||
@ -277,47 +195,6 @@ ScalarIndexSort<T>::Load(milvus::tracer::TraceContext ctx,
|
|||||||
LoadWithoutAssemble(binary_set, config);
|
LoadWithoutAssemble(binary_set, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void
|
|
||||||
ScalarIndexSort<T>::LoadV2(const Config& config) {
|
|
||||||
auto blobs = space_->StatisticsBlobs();
|
|
||||||
std::vector<std::string> index_files;
|
|
||||||
auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
|
|
||||||
for (auto& b : blobs) {
|
|
||||||
if (b.name.rfind(prefix, 0) == 0) {
|
|
||||||
index_files.push_back(b.name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::map<std::string, FieldDataPtr> index_datas{};
|
|
||||||
for (auto& file_name : index_files) {
|
|
||||||
auto res = space_->GetBlobByteSize(file_name);
|
|
||||||
if (!res.ok()) {
|
|
||||||
PanicInfo(S3Error, "unable to read index blob");
|
|
||||||
}
|
|
||||||
auto index_blob_data =
|
|
||||||
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
|
|
||||||
auto status = space_->ReadBlob(file_name, index_blob_data.get());
|
|
||||||
if (!status.ok()) {
|
|
||||||
PanicInfo(S3Error, "unable to read index blob");
|
|
||||||
}
|
|
||||||
auto raw_index_blob =
|
|
||||||
storage::DeserializeFileData(index_blob_data, res.value());
|
|
||||||
auto key = file_name.substr(file_name.find_last_of('/') + 1);
|
|
||||||
index_datas[key] = raw_index_blob->GetFieldData();
|
|
||||||
}
|
|
||||||
AssembleIndexDatas(index_datas);
|
|
||||||
BinarySet binary_set;
|
|
||||||
for (auto& [key, data] : index_datas) {
|
|
||||||
auto size = data->Size();
|
|
||||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
|
||||||
auto buf = std::shared_ptr<uint8_t[]>(
|
|
||||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
|
||||||
binary_set.Append(key, buf, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
LoadWithoutAssemble(binary_set, config);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
const TargetBitmap
|
const TargetBitmap
|
||||||
ScalarIndexSort<T>::In(const size_t n, const T* values) {
|
ScalarIndexSort<T>::In(const size_t n, const T* values) {
|
||||||
|
|||||||
@ -26,7 +26,6 @@
|
|||||||
#include "index/IndexStructure.h"
|
#include "index/IndexStructure.h"
|
||||||
#include "index/ScalarIndex.h"
|
#include "index/ScalarIndex.h"
|
||||||
#include "storage/MemFileManagerImpl.h"
|
#include "storage/MemFileManagerImpl.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::index {
|
namespace milvus::index {
|
||||||
|
|
||||||
@ -37,10 +36,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||||||
const storage::FileManagerContext& file_manager_context =
|
const storage::FileManagerContext& file_manager_context =
|
||||||
storage::FileManagerContext());
|
storage::FileManagerContext());
|
||||||
|
|
||||||
explicit ScalarIndexSort(
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
|
|
||||||
BinarySet
|
BinarySet
|
||||||
Serialize(const Config& config) override;
|
Serialize(const Config& config) override;
|
||||||
|
|
||||||
@ -50,9 +45,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||||||
void
|
void
|
||||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||||
|
|
||||||
void
|
|
||||||
LoadV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
Count() override {
|
Count() override {
|
||||||
return data_.size();
|
return data_.size();
|
||||||
@ -69,9 +61,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||||||
void
|
void
|
||||||
Build(const Config& config = {}) override;
|
Build(const Config& config = {}) override;
|
||||||
|
|
||||||
void
|
|
||||||
BuildV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
const TargetBitmap
|
const TargetBitmap
|
||||||
In(size_t n, const T* values) override;
|
In(size_t n, const T* values) override;
|
||||||
|
|
||||||
@ -97,8 +86,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||||||
|
|
||||||
BinarySet
|
BinarySet
|
||||||
Upload(const Config& config = {}) override;
|
Upload(const Config& config = {}) override;
|
||||||
BinarySet
|
|
||||||
UploadV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
const bool
|
const bool
|
||||||
HasRawData() const override {
|
HasRawData() const override {
|
||||||
@ -133,7 +120,6 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
|||||||
std::vector<int32_t> idx_to_offsets_; // used to retrieve.
|
std::vector<int32_t> idx_to_offsets_; // used to retrieve.
|
||||||
std::vector<IndexStructure<T>> data_;
|
std::vector<IndexStructure<T>> data_;
|
||||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||||
std::shared_ptr<milvus_storage::Space> space_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -148,11 +134,4 @@ CreateScalarIndexSort(const storage::FileManagerContext& file_manager_context =
|
|||||||
storage::FileManagerContext()) {
|
storage::FileManagerContext()) {
|
||||||
return std::make_unique<ScalarIndexSort<T>>(file_manager_context);
|
return std::make_unique<ScalarIndexSort<T>>(file_manager_context);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline ScalarIndexSortPtr<T>
|
|
||||||
CreateScalarIndexSort(const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space) {
|
|
||||||
return std::make_unique<ScalarIndexSort<T>>(file_manager_context, space);
|
|
||||||
}
|
|
||||||
} // namespace milvus::index
|
} // namespace milvus::index
|
||||||
|
|||||||
@ -36,7 +36,6 @@
|
|||||||
#include "index/Utils.h"
|
#include "index/Utils.h"
|
||||||
#include "index/Index.h"
|
#include "index/Index.h"
|
||||||
#include "storage/Util.h"
|
#include "storage/Util.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::index {
|
namespace milvus::index {
|
||||||
|
|
||||||
@ -48,16 +47,6 @@ StringIndexMarisa::StringIndexMarisa(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
StringIndexMarisa::StringIndexMarisa(
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space)
|
|
||||||
: space_(space) {
|
|
||||||
if (file_manager_context.Valid()) {
|
|
||||||
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
|
|
||||||
file_manager_context, space_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
StringIndexMarisa::Size() {
|
StringIndexMarisa::Size() {
|
||||||
return trie_.size();
|
return trie_.size();
|
||||||
@ -68,65 +57,6 @@ valid_str_id(size_t str_id) {
|
|||||||
return str_id >= 0 && str_id != MARISA_INVALID_KEY_ID;
|
return str_id >= 0 && str_id != MARISA_INVALID_KEY_ID;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
StringIndexMarisa::BuildV2(const Config& config) {
|
|
||||||
if (built_) {
|
|
||||||
throw std::runtime_error("index has been built");
|
|
||||||
}
|
|
||||||
auto field_name = file_manager_->GetIndexMeta().field_name;
|
|
||||||
auto reader = space_->ScanData();
|
|
||||||
std::vector<FieldDataPtr> field_datas;
|
|
||||||
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
|
|
||||||
if (!rec.ok()) {
|
|
||||||
PanicInfo(DataFormatBroken, "failed to read data");
|
|
||||||
}
|
|
||||||
auto data = rec.ValueUnsafe();
|
|
||||||
auto total_num_rows = data->num_rows();
|
|
||||||
auto col_data = data->GetColumnByName(field_name);
|
|
||||||
auto nullable =
|
|
||||||
col_data->type()->id() == arrow::Type::NA ? true : false;
|
|
||||||
// will support build scalar index when nullable in the future just skip it
|
|
||||||
// now, not support to build index in nullable field_data
|
|
||||||
// todo: support nullable index
|
|
||||||
AssertInfo(!nullable,
|
|
||||||
"not support to build index in nullable field_data");
|
|
||||||
auto field_data = storage::CreateFieldData(
|
|
||||||
DataType::STRING, nullable, 0, total_num_rows);
|
|
||||||
field_data->FillFieldData(col_data);
|
|
||||||
field_datas.push_back(field_data);
|
|
||||||
}
|
|
||||||
int64_t total_num_rows = 0;
|
|
||||||
|
|
||||||
// fill key set.
|
|
||||||
marisa::Keyset keyset;
|
|
||||||
for (auto data : field_datas) {
|
|
||||||
auto slice_num = data->get_num_rows();
|
|
||||||
for (size_t i = 0; i < slice_num; ++i) {
|
|
||||||
keyset.push_back(
|
|
||||||
(*static_cast<const std::string*>(data->RawValue(i))).c_str());
|
|
||||||
}
|
|
||||||
total_num_rows += slice_num;
|
|
||||||
}
|
|
||||||
trie_.build(keyset);
|
|
||||||
|
|
||||||
// fill str_ids_
|
|
||||||
str_ids_.resize(total_num_rows);
|
|
||||||
int64_t offset = 0;
|
|
||||||
for (auto data : field_datas) {
|
|
||||||
auto slice_num = data->get_num_rows();
|
|
||||||
for (size_t i = 0; i < slice_num; ++i) {
|
|
||||||
auto str_id =
|
|
||||||
lookup(*static_cast<const std::string*>(data->RawValue(i)));
|
|
||||||
AssertInfo(valid_str_id(str_id), "invalid marisa key");
|
|
||||||
str_ids_[offset++] = str_id;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// fill str_ids_to_offsets_
|
|
||||||
fill_offsets();
|
|
||||||
|
|
||||||
built_ = true;
|
|
||||||
}
|
|
||||||
void
|
void
|
||||||
StringIndexMarisa::Build(const Config& config) {
|
StringIndexMarisa::Build(const Config& config) {
|
||||||
if (built_) {
|
if (built_) {
|
||||||
@ -245,20 +175,6 @@ StringIndexMarisa::Upload(const Config& config) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
BinarySet
|
|
||||||
StringIndexMarisa::UploadV2(const Config& config) {
|
|
||||||
auto binary_set = Serialize(config);
|
|
||||||
file_manager_->AddFileV2(binary_set);
|
|
||||||
|
|
||||||
auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize();
|
|
||||||
BinarySet ret;
|
|
||||||
for (auto& file : remote_paths_to_size) {
|
|
||||||
ret.Append(file.first, nullptr, file.second);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
StringIndexMarisa::LoadWithoutAssemble(const BinarySet& set,
|
StringIndexMarisa::LoadWithoutAssemble(const BinarySet& set,
|
||||||
const Config& config) {
|
const Config& config) {
|
||||||
@ -322,46 +238,6 @@ StringIndexMarisa::Load(milvus::tracer::TraceContext ctx,
|
|||||||
LoadWithoutAssemble(binary_set, config);
|
LoadWithoutAssemble(binary_set, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
StringIndexMarisa::LoadV2(const Config& config) {
|
|
||||||
auto blobs = space_->StatisticsBlobs();
|
|
||||||
std::vector<std::string> index_files;
|
|
||||||
auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
|
|
||||||
for (auto& b : blobs) {
|
|
||||||
if (b.name.rfind(prefix, 0) == 0) {
|
|
||||||
index_files.push_back(b.name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::map<std::string, FieldDataPtr> index_datas{};
|
|
||||||
for (auto& file_name : index_files) {
|
|
||||||
auto res = space_->GetBlobByteSize(file_name);
|
|
||||||
if (!res.ok()) {
|
|
||||||
PanicInfo(DataFormatBroken, "unable to read index blob");
|
|
||||||
}
|
|
||||||
auto index_blob_data =
|
|
||||||
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
|
|
||||||
auto status = space_->ReadBlob(file_name, index_blob_data.get());
|
|
||||||
if (!status.ok()) {
|
|
||||||
PanicInfo(DataFormatBroken, "unable to read index blob");
|
|
||||||
}
|
|
||||||
auto raw_index_blob =
|
|
||||||
storage::DeserializeFileData(index_blob_data, res.value());
|
|
||||||
index_datas[file_name] = raw_index_blob->GetFieldData();
|
|
||||||
}
|
|
||||||
AssembleIndexDatas(index_datas);
|
|
||||||
BinarySet binary_set;
|
|
||||||
for (auto& [key, data] : index_datas) {
|
|
||||||
auto size = data->Size();
|
|
||||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
|
||||||
auto buf = std::shared_ptr<uint8_t[]>(
|
|
||||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
|
||||||
auto file_name = key.substr(key.find_last_of('/') + 1);
|
|
||||||
binary_set.Append(file_name, buf, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
LoadWithoutAssemble(binary_set, config);
|
|
||||||
}
|
|
||||||
|
|
||||||
const TargetBitmap
|
const TargetBitmap
|
||||||
StringIndexMarisa::In(size_t n, const std::string* values) {
|
StringIndexMarisa::In(size_t n, const std::string* values) {
|
||||||
TargetBitmap bitset(str_ids_.size());
|
TargetBitmap bitset(str_ids_.size());
|
||||||
|
|||||||
@ -23,7 +23,6 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include "storage/MemFileManagerImpl.h"
|
#include "storage/MemFileManagerImpl.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::index {
|
namespace milvus::index {
|
||||||
|
|
||||||
@ -33,10 +32,6 @@ class StringIndexMarisa : public StringIndex {
|
|||||||
const storage::FileManagerContext& file_manager_context =
|
const storage::FileManagerContext& file_manager_context =
|
||||||
storage::FileManagerContext());
|
storage::FileManagerContext());
|
||||||
|
|
||||||
explicit StringIndexMarisa(
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
Size() override;
|
Size() override;
|
||||||
|
|
||||||
@ -49,9 +44,6 @@ class StringIndexMarisa : public StringIndex {
|
|||||||
void
|
void
|
||||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||||
|
|
||||||
void
|
|
||||||
LoadV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
Count() override {
|
Count() override {
|
||||||
return str_ids_.size();
|
return str_ids_.size();
|
||||||
@ -71,9 +63,6 @@ class StringIndexMarisa : public StringIndex {
|
|||||||
void
|
void
|
||||||
BuildWithFieldData(const std::vector<FieldDataPtr>& field_datas) override;
|
BuildWithFieldData(const std::vector<FieldDataPtr>& field_datas) override;
|
||||||
|
|
||||||
void
|
|
||||||
BuildV2(const Config& Config = {}) override;
|
|
||||||
|
|
||||||
const TargetBitmap
|
const TargetBitmap
|
||||||
In(size_t n, const std::string* values) override;
|
In(size_t n, const std::string* values) override;
|
||||||
|
|
||||||
@ -98,9 +87,6 @@ class StringIndexMarisa : public StringIndex {
|
|||||||
BinarySet
|
BinarySet
|
||||||
Upload(const Config& config = {}) override;
|
Upload(const Config& config = {}) override;
|
||||||
|
|
||||||
BinarySet
|
|
||||||
UploadV2(const Config& config = {});
|
|
||||||
|
|
||||||
const bool
|
const bool
|
||||||
HasRawData() const override {
|
HasRawData() const override {
|
||||||
return true;
|
return true;
|
||||||
@ -131,7 +117,6 @@ class StringIndexMarisa : public StringIndex {
|
|||||||
std::map<size_t, std::vector<size_t>> str_ids_to_offsets_;
|
std::map<size_t, std::vector<size_t>> str_ids_to_offsets_;
|
||||||
bool built_ = false;
|
bool built_ = false;
|
||||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||||
std::shared_ptr<milvus_storage::Space> space_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
using StringIndexMarisaPtr = std::unique_ptr<StringIndexMarisa>;
|
using StringIndexMarisaPtr = std::unique_ptr<StringIndexMarisa>;
|
||||||
@ -142,10 +127,4 @@ CreateStringIndexMarisa(
|
|||||||
storage::FileManagerContext()) {
|
storage::FileManagerContext()) {
|
||||||
return std::make_unique<StringIndexMarisa>(file_manager_context);
|
return std::make_unique<StringIndexMarisa>(file_manager_context);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline StringIndexPtr
|
|
||||||
CreateStringIndexMarisa(const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space) {
|
|
||||||
return std::make_unique<StringIndexMarisa>(file_manager_context, space);
|
|
||||||
}
|
|
||||||
} // namespace milvus::index
|
} // namespace milvus::index
|
||||||
|
|||||||
@ -73,45 +73,6 @@ VectorDiskAnnIndex<T>::VectorDiskAnnIndex(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
VectorDiskAnnIndex<T>::VectorDiskAnnIndex(
|
|
||||||
const IndexType& index_type,
|
|
||||||
const MetricType& metric_type,
|
|
||||||
const IndexVersion& version,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space,
|
|
||||||
const storage::FileManagerContext& file_manager_context)
|
|
||||||
: space_(space), VectorIndex(index_type, metric_type) {
|
|
||||||
CheckMetricTypeSupport<T>(metric_type);
|
|
||||||
file_manager_ = std::make_shared<storage::DiskFileManagerImpl>(
|
|
||||||
file_manager_context, file_manager_context.space_);
|
|
||||||
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
|
|
||||||
auto local_chunk_manager =
|
|
||||||
storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
|
||||||
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
|
|
||||||
|
|
||||||
// As we have guarded dup-load in QueryNode,
|
|
||||||
// this assertion failed only if the Milvus rebooted in the same pod,
|
|
||||||
// need to remove these files then re-load the segment
|
|
||||||
if (local_chunk_manager->Exist(local_index_path_prefix)) {
|
|
||||||
local_chunk_manager->RemoveDir(local_index_path_prefix);
|
|
||||||
}
|
|
||||||
CheckCompatible(version);
|
|
||||||
local_chunk_manager->CreateDir(local_index_path_prefix);
|
|
||||||
auto diskann_index_pack =
|
|
||||||
knowhere::Pack(std::shared_ptr<knowhere::FileManager>(file_manager_));
|
|
||||||
auto get_index_obj = knowhere::IndexFactory::Instance().Create<T>(
|
|
||||||
GetIndexType(), version, diskann_index_pack);
|
|
||||||
if (get_index_obj.has_value()) {
|
|
||||||
index_ = get_index_obj.value();
|
|
||||||
} else {
|
|
||||||
auto err = get_index_obj.error();
|
|
||||||
if (err == knowhere::Status::invalid_index_error) {
|
|
||||||
PanicInfo(ErrorCode::Unsupported, get_index_obj.what());
|
|
||||||
}
|
|
||||||
PanicInfo(ErrorCode::KnowhereError, get_index_obj.what());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
VectorDiskAnnIndex<T>::Load(const BinarySet& binary_set /* not used */,
|
VectorDiskAnnIndex<T>::Load(const BinarySet& binary_set /* not used */,
|
||||||
@ -153,21 +114,6 @@ VectorDiskAnnIndex<T>::Load(milvus::tracer::TraceContext ctx,
|
|||||||
SetDim(index_.Dim());
|
SetDim(index_.Dim());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void
|
|
||||||
VectorDiskAnnIndex<T>::LoadV2(const Config& config) {
|
|
||||||
knowhere::Json load_config = update_load_json(config);
|
|
||||||
|
|
||||||
file_manager_->CacheIndexToDisk();
|
|
||||||
|
|
||||||
auto stat = index_.Deserialize(knowhere::BinarySet(), load_config);
|
|
||||||
if (stat != knowhere::Status::success)
|
|
||||||
PanicInfo(ErrorCode::UnexpectedError,
|
|
||||||
"failed to Deserialize index, " + KnowhereStatusString(stat));
|
|
||||||
|
|
||||||
SetDim(index_.Dim());
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
BinarySet
|
BinarySet
|
||||||
VectorDiskAnnIndex<T>::Upload(const Config& config) {
|
VectorDiskAnnIndex<T>::Upload(const Config& config) {
|
||||||
@ -185,53 +131,6 @@ VectorDiskAnnIndex<T>::Upload(const Config& config) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
BinarySet
|
|
||||||
VectorDiskAnnIndex<T>::UploadV2(const Config& config) {
|
|
||||||
return Upload(config);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void
|
|
||||||
VectorDiskAnnIndex<T>::BuildV2(const Config& config) {
|
|
||||||
knowhere::Json build_config;
|
|
||||||
build_config.update(config);
|
|
||||||
|
|
||||||
auto local_data_path = file_manager_->CacheRawDataToDisk<T>(space_);
|
|
||||||
build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path;
|
|
||||||
|
|
||||||
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
|
|
||||||
build_config[DISK_ANN_PREFIX_PATH] = local_index_path_prefix;
|
|
||||||
|
|
||||||
if (GetIndexType() == knowhere::IndexEnum::INDEX_DISKANN) {
|
|
||||||
auto num_threads = GetValueFromConfig<std::string>(
|
|
||||||
build_config, DISK_ANN_BUILD_THREAD_NUM);
|
|
||||||
AssertInfo(
|
|
||||||
num_threads.has_value(),
|
|
||||||
"param " + std::string(DISK_ANN_BUILD_THREAD_NUM) + "is empty");
|
|
||||||
build_config[DISK_ANN_THREADS_NUM] =
|
|
||||||
std::atoi(num_threads.value().c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
auto opt_fields = GetValueFromConfig<OptFieldT>(config, VEC_OPT_FIELDS);
|
|
||||||
if (opt_fields.has_value() && index_.IsAdditionalScalarSupported()) {
|
|
||||||
build_config[VEC_OPT_FIELDS_PATH] =
|
|
||||||
file_manager_->CacheOptFieldToDisk(opt_fields.value());
|
|
||||||
// `partition_key_isolation` is already in the config, so it falls through
|
|
||||||
// into the index Build call directly
|
|
||||||
}
|
|
||||||
|
|
||||||
build_config.erase("insert_files");
|
|
||||||
build_config.erase(VEC_OPT_FIELDS);
|
|
||||||
index_.Build({}, build_config);
|
|
||||||
|
|
||||||
auto local_chunk_manager =
|
|
||||||
storage::LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
|
||||||
auto segment_id = file_manager_->GetFieldDataMeta().segment_id;
|
|
||||||
local_chunk_manager->RemoveDir(
|
|
||||||
storage::GetSegmentRawDataPathPrefix(local_chunk_manager, segment_id));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
VectorDiskAnnIndex<T>::Build(const Config& config) {
|
VectorDiskAnnIndex<T>::Build(const Config& config) {
|
||||||
|
|||||||
@ -21,7 +21,6 @@
|
|||||||
|
|
||||||
#include "index/VectorIndex.h"
|
#include "index/VectorIndex.h"
|
||||||
#include "storage/DiskFileManagerImpl.h"
|
#include "storage/DiskFileManagerImpl.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::index {
|
namespace milvus::index {
|
||||||
|
|
||||||
@ -35,14 +34,6 @@ class VectorDiskAnnIndex : public VectorIndex {
|
|||||||
const storage::FileManagerContext& file_manager_context =
|
const storage::FileManagerContext& file_manager_context =
|
||||||
storage::FileManagerContext());
|
storage::FileManagerContext());
|
||||||
|
|
||||||
explicit VectorDiskAnnIndex(
|
|
||||||
const IndexType& index_type,
|
|
||||||
const MetricType& metric_type,
|
|
||||||
const IndexVersion& version,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space,
|
|
||||||
const storage::FileManagerContext& file_manager_context =
|
|
||||||
storage::FileManagerContext());
|
|
||||||
|
|
||||||
BinarySet
|
BinarySet
|
||||||
Serialize(const Config& config) override { // deprecated
|
Serialize(const Config& config) override { // deprecated
|
||||||
BinarySet binary_set;
|
BinarySet binary_set;
|
||||||
@ -58,9 +49,6 @@ class VectorDiskAnnIndex : public VectorIndex {
|
|||||||
BinarySet
|
BinarySet
|
||||||
Upload(const Config& config = {}) override;
|
Upload(const Config& config = {}) override;
|
||||||
|
|
||||||
BinarySet
|
|
||||||
UploadV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
Count() override {
|
Count() override {
|
||||||
return index_.Count();
|
return index_.Count();
|
||||||
@ -73,9 +61,6 @@ class VectorDiskAnnIndex : public VectorIndex {
|
|||||||
void
|
void
|
||||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||||
|
|
||||||
void
|
|
||||||
LoadV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
void
|
void
|
||||||
BuildWithDataset(const DatasetPtr& dataset,
|
BuildWithDataset(const DatasetPtr& dataset,
|
||||||
const Config& config = {}) override;
|
const Config& config = {}) override;
|
||||||
@ -83,9 +68,6 @@ class VectorDiskAnnIndex : public VectorIndex {
|
|||||||
void
|
void
|
||||||
Build(const Config& config = {}) override;
|
Build(const Config& config = {}) override;
|
||||||
|
|
||||||
void
|
|
||||||
BuildV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
void
|
void
|
||||||
Query(const DatasetPtr dataset,
|
Query(const DatasetPtr dataset,
|
||||||
const SearchInfo& search_info,
|
const SearchInfo& search_info,
|
||||||
@ -119,7 +101,6 @@ class VectorDiskAnnIndex : public VectorIndex {
|
|||||||
knowhere::Index<knowhere::IndexNode> index_;
|
knowhere::Index<knowhere::IndexNode> index_;
|
||||||
std::shared_ptr<storage::DiskFileManagerImpl> file_manager_;
|
std::shared_ptr<storage::DiskFileManagerImpl> file_manager_;
|
||||||
uint32_t search_beamwidth_ = 8;
|
uint32_t search_beamwidth_ = 8;
|
||||||
std::shared_ptr<milvus_storage::Space> space_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|||||||
@ -48,7 +48,6 @@
|
|||||||
#include "storage/DataCodec.h"
|
#include "storage/DataCodec.h"
|
||||||
#include "storage/MemFileManagerImpl.h"
|
#include "storage/MemFileManagerImpl.h"
|
||||||
#include "storage/ThreadPools.h"
|
#include "storage/ThreadPools.h"
|
||||||
#include "storage/space.h"
|
|
||||||
#include "storage/Util.h"
|
#include "storage/Util.h"
|
||||||
#include "monitor/prometheus_client.h"
|
#include "monitor/prometheus_client.h"
|
||||||
|
|
||||||
@ -83,69 +82,6 @@ VectorMemIndex<T>::VectorMemIndex(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
VectorMemIndex<T>::VectorMemIndex(
|
|
||||||
const CreateIndexInfo& create_index_info,
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space)
|
|
||||||
: VectorIndex(create_index_info.index_type, create_index_info.metric_type),
|
|
||||||
space_(space),
|
|
||||||
create_index_info_(create_index_info) {
|
|
||||||
CheckMetricTypeSupport<T>(create_index_info.metric_type);
|
|
||||||
AssertInfo(!is_unsupported(create_index_info.index_type,
|
|
||||||
create_index_info.metric_type),
|
|
||||||
create_index_info.index_type +
|
|
||||||
" doesn't support metric: " + create_index_info.metric_type);
|
|
||||||
if (file_manager_context.Valid()) {
|
|
||||||
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
|
|
||||||
file_manager_context, file_manager_context.space_);
|
|
||||||
AssertInfo(file_manager_ != nullptr, "create file manager failed!");
|
|
||||||
}
|
|
||||||
auto version = create_index_info.index_engine_version;
|
|
||||||
CheckCompatible(version);
|
|
||||||
auto get_index_obj =
|
|
||||||
knowhere::IndexFactory::Instance().Create<T>(GetIndexType(), version);
|
|
||||||
if (get_index_obj.has_value()) {
|
|
||||||
index_ = get_index_obj.value();
|
|
||||||
} else {
|
|
||||||
auto err = get_index_obj.error();
|
|
||||||
if (err == knowhere::Status::invalid_index_error) {
|
|
||||||
PanicInfo(ErrorCode::Unsupported, get_index_obj.what());
|
|
||||||
}
|
|
||||||
PanicInfo(ErrorCode::KnowhereError, get_index_obj.what());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
BinarySet
|
|
||||||
VectorMemIndex<T>::UploadV2(const Config& config) {
|
|
||||||
auto binary_set = Serialize(config);
|
|
||||||
file_manager_->AddFileV2(binary_set);
|
|
||||||
|
|
||||||
auto store_version = file_manager_->space()->GetCurrentVersion();
|
|
||||||
std::shared_ptr<uint8_t[]> store_version_data(
|
|
||||||
new uint8_t[sizeof(store_version)]);
|
|
||||||
store_version_data[0] = store_version & 0x00000000000000FF;
|
|
||||||
store_version = store_version >> 8;
|
|
||||||
store_version_data[1] = store_version & 0x00000000000000FF;
|
|
||||||
store_version = store_version >> 8;
|
|
||||||
store_version_data[2] = store_version & 0x00000000000000FF;
|
|
||||||
store_version = store_version >> 8;
|
|
||||||
store_version_data[3] = store_version & 0x00000000000000FF;
|
|
||||||
store_version = store_version >> 8;
|
|
||||||
store_version_data[4] = store_version & 0x00000000000000FF;
|
|
||||||
store_version = store_version >> 8;
|
|
||||||
store_version_data[5] = store_version & 0x00000000000000FF;
|
|
||||||
store_version = store_version >> 8;
|
|
||||||
store_version_data[6] = store_version & 0x00000000000000FF;
|
|
||||||
store_version = store_version >> 8;
|
|
||||||
store_version_data[7] = store_version & 0x00000000000000FF;
|
|
||||||
BinarySet ret;
|
|
||||||
ret.Append("index_store_version", store_version_data, 8);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
knowhere::expected<std::vector<knowhere::IndexNode::IteratorPtr>>
|
knowhere::expected<std::vector<knowhere::IndexNode::IteratorPtr>>
|
||||||
VectorMemIndex<T>::VectorIterators(const milvus::DatasetPtr dataset,
|
VectorMemIndex<T>::VectorIterators(const milvus::DatasetPtr dataset,
|
||||||
@ -202,105 +138,6 @@ VectorMemIndex<T>::Load(const BinarySet& binary_set, const Config& config) {
|
|||||||
LoadWithoutAssemble(binary_set, config);
|
LoadWithoutAssemble(binary_set, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void
|
|
||||||
VectorMemIndex<T>::LoadV2(const Config& config) {
|
|
||||||
if (config.contains(kMmapFilepath)) {
|
|
||||||
return LoadFromFileV2(config);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto blobs = space_->StatisticsBlobs();
|
|
||||||
std::unordered_set<std::string> pending_index_files;
|
|
||||||
auto index_prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
|
|
||||||
for (auto& blob : blobs) {
|
|
||||||
if (blob.name.rfind(index_prefix, 0) == 0) {
|
|
||||||
pending_index_files.insert(blob.name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
auto slice_meta_file = index_prefix + "/" + INDEX_FILE_SLICE_META;
|
|
||||||
auto res = space_->GetBlobByteSize(std::string(slice_meta_file));
|
|
||||||
std::map<std::string, FieldDataPtr> index_datas{};
|
|
||||||
|
|
||||||
if (!res.ok() && !res.status().IsFileNotFound()) {
|
|
||||||
PanicInfo(DataFormatBroken, "failed to read blob");
|
|
||||||
}
|
|
||||||
bool slice_meta_exist = res.ok();
|
|
||||||
|
|
||||||
auto read_blob = [&](const std::string& file_name)
|
|
||||||
-> std::unique_ptr<storage::DataCodec> {
|
|
||||||
auto res = space_->GetBlobByteSize(file_name);
|
|
||||||
if (!res.ok()) {
|
|
||||||
PanicInfo(DataFormatBroken, "unable to read index blob");
|
|
||||||
}
|
|
||||||
auto index_blob_data =
|
|
||||||
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
|
|
||||||
auto status = space_->ReadBlob(file_name, index_blob_data.get());
|
|
||||||
if (!status.ok()) {
|
|
||||||
PanicInfo(DataFormatBroken, "unable to read index blob");
|
|
||||||
}
|
|
||||||
return storage::DeserializeFileData(index_blob_data, res.value());
|
|
||||||
};
|
|
||||||
if (slice_meta_exist) {
|
|
||||||
pending_index_files.erase(slice_meta_file);
|
|
||||||
auto slice_meta_sz = res.value();
|
|
||||||
auto slice_meta_data =
|
|
||||||
std::shared_ptr<uint8_t[]>(new uint8_t[slice_meta_sz]);
|
|
||||||
auto status = space_->ReadBlob(slice_meta_file, slice_meta_data.get());
|
|
||||||
if (!status.ok()) {
|
|
||||||
PanicInfo(DataFormatBroken, "unable to read slice meta");
|
|
||||||
}
|
|
||||||
auto raw_slice_meta =
|
|
||||||
storage::DeserializeFileData(slice_meta_data, slice_meta_sz);
|
|
||||||
Config meta_data = Config::parse(std::string(
|
|
||||||
static_cast<const char*>(raw_slice_meta->GetFieldData()->Data()),
|
|
||||||
raw_slice_meta->GetFieldData()->Size()));
|
|
||||||
for (auto& item : meta_data[META]) {
|
|
||||||
std::string prefix = item[NAME];
|
|
||||||
int slice_num = item[SLICE_NUM];
|
|
||||||
auto total_len = static_cast<size_t>(item[TOTAL_LEN]);
|
|
||||||
// todo: support nullable index
|
|
||||||
auto new_field_data = milvus::storage::CreateFieldData(
|
|
||||||
DataType::INT8, false, 1, total_len);
|
|
||||||
for (auto i = 0; i < slice_num; ++i) {
|
|
||||||
std::string file_name =
|
|
||||||
index_prefix + "/" + GenSlicedFileName(prefix, i);
|
|
||||||
auto raw_index_blob = read_blob(file_name);
|
|
||||||
new_field_data->FillFieldData(
|
|
||||||
raw_index_blob->GetFieldData()->Data(),
|
|
||||||
raw_index_blob->GetFieldData()->Size());
|
|
||||||
pending_index_files.erase(file_name);
|
|
||||||
}
|
|
||||||
AssertInfo(
|
|
||||||
new_field_data->IsFull(),
|
|
||||||
"index len is inconsistent after disassemble and assemble");
|
|
||||||
index_datas[prefix] = new_field_data;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!pending_index_files.empty()) {
|
|
||||||
for (auto& file_name : pending_index_files) {
|
|
||||||
auto raw_index_blob = read_blob(file_name);
|
|
||||||
index_datas.insert({file_name, raw_index_blob->GetFieldData()});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LOG_INFO("construct binary set...");
|
|
||||||
BinarySet binary_set;
|
|
||||||
for (auto& [key, data] : index_datas) {
|
|
||||||
LOG_INFO("add index data to binary set: {}", key);
|
|
||||||
auto size = data->Size();
|
|
||||||
auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction
|
|
||||||
auto buf = std::shared_ptr<uint8_t[]>(
|
|
||||||
(uint8_t*)const_cast<void*>(data->Data()), deleter);
|
|
||||||
auto file_name = key.substr(key.find_last_of('/') + 1);
|
|
||||||
binary_set.Append(file_name, buf, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG_INFO("load index into Knowhere...");
|
|
||||||
LoadWithoutAssemble(binary_set, config);
|
|
||||||
LOG_INFO("load vector index done");
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
VectorMemIndex<T>::Load(milvus::tracer::TraceContext ctx,
|
VectorMemIndex<T>::Load(milvus::tracer::TraceContext ctx,
|
||||||
@ -442,58 +279,6 @@ VectorMemIndex<T>::BuildWithDataset(const DatasetPtr& dataset,
|
|||||||
SetDim(index_.Dim());
|
SetDim(index_.Dim());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void
|
|
||||||
VectorMemIndex<T>::BuildV2(const Config& config) {
|
|
||||||
auto field_name = create_index_info_.field_name;
|
|
||||||
auto field_type = create_index_info_.field_type;
|
|
||||||
auto dim = create_index_info_.dim;
|
|
||||||
auto reader = space_->ScanData();
|
|
||||||
std::vector<FieldDataPtr> field_datas;
|
|
||||||
for (auto rec : *reader) {
|
|
||||||
if (!rec.ok()) {
|
|
||||||
PanicInfo(IndexBuildError,
|
|
||||||
"failed to read data: {}",
|
|
||||||
rec.status().ToString());
|
|
||||||
}
|
|
||||||
auto data = rec.ValueUnsafe();
|
|
||||||
if (data == nullptr) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
auto total_num_rows = data->num_rows();
|
|
||||||
auto col_data = data->GetColumnByName(field_name);
|
|
||||||
// todo: support nullable index
|
|
||||||
auto field_data =
|
|
||||||
storage::CreateFieldData(field_type, false, dim, total_num_rows);
|
|
||||||
field_data->FillFieldData(col_data);
|
|
||||||
field_datas.push_back(field_data);
|
|
||||||
}
|
|
||||||
int64_t total_size = 0;
|
|
||||||
int64_t total_num_rows = 0;
|
|
||||||
for (const auto& data : field_datas) {
|
|
||||||
total_size += data->Size();
|
|
||||||
total_num_rows += data->get_num_rows();
|
|
||||||
AssertInfo(dim == 0 || dim == data->get_dim(),
|
|
||||||
"inconsistent dim value between field datas!");
|
|
||||||
}
|
|
||||||
|
|
||||||
auto buf = std::shared_ptr<uint8_t[]>(new uint8_t[total_size]);
|
|
||||||
int64_t offset = 0;
|
|
||||||
for (auto data : field_datas) {
|
|
||||||
std::memcpy(buf.get() + offset, data->Data(), data->Size());
|
|
||||||
offset += data->Size();
|
|
||||||
data.reset();
|
|
||||||
}
|
|
||||||
field_datas.clear();
|
|
||||||
|
|
||||||
Config build_config;
|
|
||||||
build_config.update(config);
|
|
||||||
build_config.erase("insert_files");
|
|
||||||
|
|
||||||
auto dataset = GenDataset(total_num_rows, dim, buf.get());
|
|
||||||
BuildWithDataset(dataset, build_config);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void
|
void
|
||||||
VectorMemIndex<T>::Build(const Config& config) {
|
VectorMemIndex<T>::Build(const Config& config) {
|
||||||
@ -852,109 +637,6 @@ void VectorMemIndex<T>::LoadFromFile(const Config& config) {
|
|||||||
.count());
|
.count());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void
|
|
||||||
VectorMemIndex<T>::LoadFromFileV2(const Config& config) {
|
|
||||||
auto filepath = GetValueFromConfig<std::string>(config, kMmapFilepath);
|
|
||||||
AssertInfo(filepath.has_value(), "mmap filepath is empty when load index");
|
|
||||||
|
|
||||||
std::filesystem::create_directories(
|
|
||||||
std::filesystem::path(filepath.value()).parent_path());
|
|
||||||
|
|
||||||
auto file = File::Open(filepath.value(), O_CREAT | O_TRUNC | O_RDWR);
|
|
||||||
|
|
||||||
auto blobs = space_->StatisticsBlobs();
|
|
||||||
std::unordered_set<std::string> pending_index_files;
|
|
||||||
auto index_prefix = file_manager_->GetRemoteIndexObjectPrefixV2();
|
|
||||||
for (auto& blob : blobs) {
|
|
||||||
if (blob.name.rfind(index_prefix, 0) == 0) {
|
|
||||||
pending_index_files.insert(blob.name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
auto slice_meta_file = index_prefix + "/" + INDEX_FILE_SLICE_META;
|
|
||||||
auto res = space_->GetBlobByteSize(std::string(slice_meta_file));
|
|
||||||
|
|
||||||
if (!res.ok() && !res.status().IsFileNotFound()) {
|
|
||||||
PanicInfo(DataFormatBroken, "failed to read blob");
|
|
||||||
}
|
|
||||||
bool slice_meta_exist = res.ok();
|
|
||||||
|
|
||||||
auto read_blob = [&](const std::string& file_name)
|
|
||||||
-> std::unique_ptr<storage::DataCodec> {
|
|
||||||
auto res = space_->GetBlobByteSize(file_name);
|
|
||||||
if (!res.ok()) {
|
|
||||||
PanicInfo(DataFormatBroken, "unable to read index blob");
|
|
||||||
}
|
|
||||||
auto index_blob_data =
|
|
||||||
std::shared_ptr<uint8_t[]>(new uint8_t[res.value()]);
|
|
||||||
auto status = space_->ReadBlob(file_name, index_blob_data.get());
|
|
||||||
if (!status.ok()) {
|
|
||||||
PanicInfo(DataFormatBroken, "unable to read index blob");
|
|
||||||
}
|
|
||||||
return storage::DeserializeFileData(index_blob_data, res.value());
|
|
||||||
};
|
|
||||||
if (slice_meta_exist) {
|
|
||||||
pending_index_files.erase(slice_meta_file);
|
|
||||||
auto slice_meta_sz = res.value();
|
|
||||||
auto slice_meta_data =
|
|
||||||
std::shared_ptr<uint8_t[]>(new uint8_t[slice_meta_sz]);
|
|
||||||
auto status = space_->ReadBlob(slice_meta_file, slice_meta_data.get());
|
|
||||||
if (!status.ok()) {
|
|
||||||
PanicInfo(DataFormatBroken, "unable to read slice meta");
|
|
||||||
}
|
|
||||||
auto raw_slice_meta =
|
|
||||||
storage::DeserializeFileData(slice_meta_data, slice_meta_sz);
|
|
||||||
Config meta_data = Config::parse(std::string(
|
|
||||||
static_cast<const char*>(raw_slice_meta->GetFieldData()->Data()),
|
|
||||||
raw_slice_meta->GetFieldData()->Size()));
|
|
||||||
for (auto& item : meta_data[META]) {
|
|
||||||
std::string prefix = item[NAME];
|
|
||||||
int slice_num = item[SLICE_NUM];
|
|
||||||
auto total_len = static_cast<size_t>(item[TOTAL_LEN]);
|
|
||||||
|
|
||||||
for (auto i = 0; i < slice_num; ++i) {
|
|
||||||
std::string file_name =
|
|
||||||
index_prefix + "/" + GenSlicedFileName(prefix, i);
|
|
||||||
auto raw_index_blob = read_blob(file_name);
|
|
||||||
auto written =
|
|
||||||
file.Write(raw_index_blob->GetFieldData()->Data(),
|
|
||||||
raw_index_blob->GetFieldData()->Size());
|
|
||||||
pending_index_files.erase(file_name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!pending_index_files.empty()) {
|
|
||||||
for (auto& file_name : pending_index_files) {
|
|
||||||
auto raw_index_blob = read_blob(file_name);
|
|
||||||
file.Write(raw_index_blob->GetFieldData()->Data(),
|
|
||||||
raw_index_blob->GetFieldData()->Size());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
file.Close();
|
|
||||||
|
|
||||||
LOG_INFO("load index into Knowhere...");
|
|
||||||
auto conf = config;
|
|
||||||
conf.erase(kMmapFilepath);
|
|
||||||
conf[kEnableMmap] = true;
|
|
||||||
auto stat = index_.DeserializeFromFile(filepath.value(), conf);
|
|
||||||
if (stat != knowhere::Status::success) {
|
|
||||||
PanicInfo(DataFormatBroken,
|
|
||||||
"failed to Deserialize index: {}",
|
|
||||||
KnowhereStatusString(stat));
|
|
||||||
}
|
|
||||||
|
|
||||||
auto dim = index_.Dim();
|
|
||||||
this->SetDim(index_.Dim());
|
|
||||||
|
|
||||||
auto ok = unlink(filepath->data());
|
|
||||||
AssertInfo(ok == 0,
|
|
||||||
"failed to unlink mmap index file {}: {}",
|
|
||||||
filepath.value(),
|
|
||||||
strerror(errno));
|
|
||||||
LOG_INFO("load vector index done");
|
|
||||||
}
|
|
||||||
template class VectorMemIndex<float>;
|
template class VectorMemIndex<float>;
|
||||||
template class VectorMemIndex<bin1>;
|
template class VectorMemIndex<bin1>;
|
||||||
template class VectorMemIndex<float16>;
|
template class VectorMemIndex<float16>;
|
||||||
|
|||||||
@ -25,7 +25,6 @@
|
|||||||
#include "knowhere/index/index_factory.h"
|
#include "knowhere/index/index_factory.h"
|
||||||
#include "index/VectorIndex.h"
|
#include "index/VectorIndex.h"
|
||||||
#include "storage/MemFileManagerImpl.h"
|
#include "storage/MemFileManagerImpl.h"
|
||||||
#include "storage/space.h"
|
|
||||||
#include "index/IndexInfo.h"
|
#include "index/IndexInfo.h"
|
||||||
|
|
||||||
namespace milvus::index {
|
namespace milvus::index {
|
||||||
@ -40,9 +39,6 @@ class VectorMemIndex : public VectorIndex {
|
|||||||
const storage::FileManagerContext& file_manager_context =
|
const storage::FileManagerContext& file_manager_context =
|
||||||
storage::FileManagerContext());
|
storage::FileManagerContext());
|
||||||
|
|
||||||
explicit VectorMemIndex(const CreateIndexInfo& create_index_info,
|
|
||||||
const storage::FileManagerContext& file_manager,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
BinarySet
|
BinarySet
|
||||||
Serialize(const Config& config) override;
|
Serialize(const Config& config) override;
|
||||||
|
|
||||||
@ -52,9 +48,6 @@ class VectorMemIndex : public VectorIndex {
|
|||||||
void
|
void
|
||||||
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override;
|
||||||
|
|
||||||
void
|
|
||||||
LoadV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
void
|
void
|
||||||
BuildWithDataset(const DatasetPtr& dataset,
|
BuildWithDataset(const DatasetPtr& dataset,
|
||||||
const Config& config = {}) override;
|
const Config& config = {}) override;
|
||||||
@ -62,9 +55,6 @@ class VectorMemIndex : public VectorIndex {
|
|||||||
void
|
void
|
||||||
Build(const Config& config = {}) override;
|
Build(const Config& config = {}) override;
|
||||||
|
|
||||||
void
|
|
||||||
BuildV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
void
|
void
|
||||||
AddWithDataset(const DatasetPtr& dataset, const Config& config) override;
|
AddWithDataset(const DatasetPtr& dataset, const Config& config) override;
|
||||||
|
|
||||||
@ -91,9 +81,6 @@ class VectorMemIndex : public VectorIndex {
|
|||||||
BinarySet
|
BinarySet
|
||||||
Upload(const Config& config = {}) override;
|
Upload(const Config& config = {}) override;
|
||||||
|
|
||||||
BinarySet
|
|
||||||
UploadV2(const Config& config = {}) override;
|
|
||||||
|
|
||||||
knowhere::expected<std::vector<knowhere::IndexNode::IteratorPtr>>
|
knowhere::expected<std::vector<knowhere::IndexNode::IteratorPtr>>
|
||||||
VectorIterators(const DatasetPtr dataset,
|
VectorIterators(const DatasetPtr dataset,
|
||||||
const knowhere::Json& json,
|
const knowhere::Json& json,
|
||||||
@ -107,14 +94,10 @@ class VectorMemIndex : public VectorIndex {
|
|||||||
void
|
void
|
||||||
LoadFromFile(const Config& config);
|
LoadFromFile(const Config& config);
|
||||||
|
|
||||||
void
|
|
||||||
LoadFromFileV2(const Config& config);
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Config config_;
|
Config config_;
|
||||||
knowhere::Index<knowhere::IndexNode> index_;
|
knowhere::Index<knowhere::IndexNode> index_;
|
||||||
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
|
||||||
std::shared_ptr<milvus_storage::Space> space_;
|
|
||||||
|
|
||||||
CreateIndexInfo create_index_info_;
|
CreateIndexInfo create_index_info_;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -26,9 +26,6 @@ class IndexCreatorBase {
|
|||||||
virtual void
|
virtual void
|
||||||
Build() = 0;
|
Build() = 0;
|
||||||
|
|
||||||
virtual void
|
|
||||||
BuildV2() = 0;
|
|
||||||
|
|
||||||
virtual milvus::BinarySet
|
virtual milvus::BinarySet
|
||||||
Serialize() = 0;
|
Serialize() = 0;
|
||||||
|
|
||||||
@ -38,9 +35,6 @@ class IndexCreatorBase {
|
|||||||
|
|
||||||
virtual BinarySet
|
virtual BinarySet
|
||||||
Upload() = 0;
|
Upload() = 0;
|
||||||
|
|
||||||
virtual BinarySet
|
|
||||||
UploadV2() = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
using IndexCreatorBasePtr = std::unique_ptr<IndexCreatorBase>;
|
using IndexCreatorBasePtr = std::unique_ptr<IndexCreatorBase>;
|
||||||
|
|||||||
@ -23,7 +23,6 @@
|
|||||||
#include "indexbuilder/type_c.h"
|
#include "indexbuilder/type_c.h"
|
||||||
#include "storage/Types.h"
|
#include "storage/Types.h"
|
||||||
#include "storage/FileManager.h"
|
#include "storage/FileManager.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::indexbuilder {
|
namespace milvus::indexbuilder {
|
||||||
|
|
||||||
@ -74,41 +73,6 @@ class IndexFactory {
|
|||||||
fmt::format("invalid type is {}", invalid_dtype_msg));
|
fmt::format("invalid type is {}", invalid_dtype_msg));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexCreatorBasePtr
|
|
||||||
CreateIndex(DataType type,
|
|
||||||
const std::string& field_name,
|
|
||||||
const int64_t dim,
|
|
||||||
Config& config,
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space) {
|
|
||||||
auto invalid_dtype_msg =
|
|
||||||
std::string("invalid data type: ") + std::to_string(int(type));
|
|
||||||
|
|
||||||
switch (type) {
|
|
||||||
case DataType::BOOL:
|
|
||||||
case DataType::INT8:
|
|
||||||
case DataType::INT16:
|
|
||||||
case DataType::INT32:
|
|
||||||
case DataType::INT64:
|
|
||||||
case DataType::FLOAT:
|
|
||||||
case DataType::DOUBLE:
|
|
||||||
case DataType::VARCHAR:
|
|
||||||
case DataType::STRING:
|
|
||||||
return CreateScalarIndex(
|
|
||||||
type, config, file_manager_context, space);
|
|
||||||
|
|
||||||
case DataType::VECTOR_FLOAT:
|
|
||||||
case DataType::VECTOR_BINARY:
|
|
||||||
case DataType::VECTOR_FLOAT16:
|
|
||||||
case DataType::VECTOR_BFLOAT16:
|
|
||||||
case DataType::VECTOR_SPARSE_FLOAT:
|
|
||||||
return std::make_unique<VecIndexCreator>(
|
|
||||||
type, field_name, dim, config, file_manager_context, space);
|
|
||||||
default:
|
|
||||||
PanicInfo(ErrorCode::DataTypeInvalid, invalid_dtype_msg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace milvus::indexbuilder
|
} // namespace milvus::indexbuilder
|
||||||
|
|||||||
@ -36,18 +36,6 @@ ScalarIndexCreator::ScalarIndexCreator(
|
|||||||
index_info, file_manager_context);
|
index_info, file_manager_context);
|
||||||
}
|
}
|
||||||
|
|
||||||
ScalarIndexCreator::ScalarIndexCreator(
|
|
||||||
DataType dtype,
|
|
||||||
Config& config,
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space)
|
|
||||||
: config_(config), dtype_(dtype) {
|
|
||||||
milvus::index::CreateIndexInfo index_info;
|
|
||||||
index_info.field_type = dtype_;
|
|
||||||
index_info.index_type = index_type();
|
|
||||||
index_ = index::IndexFactory::GetInstance().CreateIndex(
|
|
||||||
index_info, file_manager_context, std::move(space));
|
|
||||||
}
|
|
||||||
void
|
void
|
||||||
ScalarIndexCreator::Build(const milvus::DatasetPtr& dataset) {
|
ScalarIndexCreator::Build(const milvus::DatasetPtr& dataset) {
|
||||||
auto size = dataset->GetRows();
|
auto size = dataset->GetRows();
|
||||||
@ -60,11 +48,6 @@ ScalarIndexCreator::Build() {
|
|||||||
index_->Build(config_);
|
index_->Build(config_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
ScalarIndexCreator::BuildV2() {
|
|
||||||
index_->BuildV2(config_);
|
|
||||||
}
|
|
||||||
|
|
||||||
milvus::BinarySet
|
milvus::BinarySet
|
||||||
ScalarIndexCreator::Serialize() {
|
ScalarIndexCreator::Serialize() {
|
||||||
return index_->Serialize(config_);
|
return index_->Serialize(config_);
|
||||||
@ -84,10 +67,4 @@ BinarySet
|
|||||||
ScalarIndexCreator::Upload() {
|
ScalarIndexCreator::Upload() {
|
||||||
return index_->Upload();
|
return index_->Upload();
|
||||||
}
|
}
|
||||||
|
|
||||||
BinarySet
|
|
||||||
ScalarIndexCreator::UploadV2() {
|
|
||||||
return index_->UploadV2();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace milvus::indexbuilder
|
} // namespace milvus::indexbuilder
|
||||||
|
|||||||
@ -17,7 +17,6 @@
|
|||||||
#include <common/CDataType.h>
|
#include <common/CDataType.h>
|
||||||
#include "index/Index.h"
|
#include "index/Index.h"
|
||||||
#include "index/ScalarIndex.h"
|
#include "index/ScalarIndex.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::indexbuilder {
|
namespace milvus::indexbuilder {
|
||||||
|
|
||||||
@ -27,19 +26,12 @@ class ScalarIndexCreator : public IndexCreatorBase {
|
|||||||
Config& config,
|
Config& config,
|
||||||
const storage::FileManagerContext& file_manager_context);
|
const storage::FileManagerContext& file_manager_context);
|
||||||
|
|
||||||
ScalarIndexCreator(DataType data_type,
|
|
||||||
Config& config,
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
void
|
void
|
||||||
Build(const milvus::DatasetPtr& dataset) override;
|
Build(const milvus::DatasetPtr& dataset) override;
|
||||||
|
|
||||||
void
|
void
|
||||||
Build() override;
|
Build() override;
|
||||||
|
|
||||||
void
|
|
||||||
BuildV2() override;
|
|
||||||
|
|
||||||
milvus::BinarySet
|
milvus::BinarySet
|
||||||
Serialize() override;
|
Serialize() override;
|
||||||
|
|
||||||
@ -49,9 +41,6 @@ class ScalarIndexCreator : public IndexCreatorBase {
|
|||||||
BinarySet
|
BinarySet
|
||||||
Upload() override;
|
Upload() override;
|
||||||
|
|
||||||
BinarySet
|
|
||||||
UploadV2() override;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::string
|
std::string
|
||||||
index_type();
|
index_type();
|
||||||
@ -72,13 +61,4 @@ CreateScalarIndex(DataType dtype,
|
|||||||
return std::make_unique<ScalarIndexCreator>(
|
return std::make_unique<ScalarIndexCreator>(
|
||||||
dtype, config, file_manager_context);
|
dtype, config, file_manager_context);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline ScalarIndexCreatorPtr
|
|
||||||
CreateScalarIndex(DataType dtype,
|
|
||||||
Config& config,
|
|
||||||
const storage::FileManagerContext& file_manager_context,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space) {
|
|
||||||
return std::make_unique<ScalarIndexCreator>(
|
|
||||||
dtype, config, file_manager_context, space);
|
|
||||||
}
|
|
||||||
} // namespace milvus::indexbuilder
|
} // namespace milvus::indexbuilder
|
||||||
|
|||||||
@ -24,7 +24,7 @@ VecIndexCreator::VecIndexCreator(
|
|||||||
DataType data_type,
|
DataType data_type,
|
||||||
Config& config,
|
Config& config,
|
||||||
const storage::FileManagerContext& file_manager_context)
|
const storage::FileManagerContext& file_manager_context)
|
||||||
: VecIndexCreator(data_type, "", 0, config, file_manager_context, nullptr) {
|
: VecIndexCreator(data_type, "", 0, config, file_manager_context) {
|
||||||
}
|
}
|
||||||
|
|
||||||
VecIndexCreator::VecIndexCreator(
|
VecIndexCreator::VecIndexCreator(
|
||||||
@ -32,9 +32,8 @@ VecIndexCreator::VecIndexCreator(
|
|||||||
const std::string& field_name,
|
const std::string& field_name,
|
||||||
const int64_t dim,
|
const int64_t dim,
|
||||||
Config& config,
|
Config& config,
|
||||||
const storage::FileManagerContext& file_manager_context,
|
const storage::FileManagerContext& file_manager_context)
|
||||||
std::shared_ptr<milvus_storage::Space> space)
|
: config_(config), data_type_(data_type) {
|
||||||
: config_(config), data_type_(data_type), space_(std::move(space)) {
|
|
||||||
index::CreateIndexInfo index_info;
|
index::CreateIndexInfo index_info;
|
||||||
index_info.field_type = data_type_;
|
index_info.field_type = data_type_;
|
||||||
index_info.index_type = index::GetIndexTypeFromConfig(config_);
|
index_info.index_type = index::GetIndexTypeFromConfig(config_);
|
||||||
@ -45,7 +44,7 @@ VecIndexCreator::VecIndexCreator(
|
|||||||
index_info.dim = dim;
|
index_info.dim = dim;
|
||||||
|
|
||||||
index_ = index::IndexFactory::GetInstance().CreateIndex(
|
index_ = index::IndexFactory::GetInstance().CreateIndex(
|
||||||
index_info, file_manager_context, space_);
|
index_info, file_manager_context);
|
||||||
AssertInfo(index_ != nullptr,
|
AssertInfo(index_ != nullptr,
|
||||||
"[VecIndexCreator]Index is null after create index");
|
"[VecIndexCreator]Index is null after create index");
|
||||||
}
|
}
|
||||||
@ -65,11 +64,6 @@ VecIndexCreator::Build() {
|
|||||||
index_->Build(config_);
|
index_->Build(config_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
VecIndexCreator::BuildV2() {
|
|
||||||
index_->BuildV2(config_);
|
|
||||||
}
|
|
||||||
|
|
||||||
milvus::BinarySet
|
milvus::BinarySet
|
||||||
VecIndexCreator::Serialize() {
|
VecIndexCreator::Serialize() {
|
||||||
return index_->Serialize(config_);
|
return index_->Serialize(config_);
|
||||||
@ -95,11 +89,6 @@ VecIndexCreator::Upload() {
|
|||||||
return index_->Upload();
|
return index_->Upload();
|
||||||
}
|
}
|
||||||
|
|
||||||
BinarySet
|
|
||||||
VecIndexCreator::UploadV2() {
|
|
||||||
return index_->UploadV2();
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
VecIndexCreator::CleanLocalData() {
|
VecIndexCreator::CleanLocalData() {
|
||||||
auto vector_index = dynamic_cast<index::VectorIndex*>(index_.get());
|
auto vector_index = dynamic_cast<index::VectorIndex*>(index_.get());
|
||||||
|
|||||||
@ -20,7 +20,6 @@
|
|||||||
#include "index/VectorIndex.h"
|
#include "index/VectorIndex.h"
|
||||||
#include "index/IndexInfo.h"
|
#include "index/IndexInfo.h"
|
||||||
#include "storage/Types.h"
|
#include "storage/Types.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::indexbuilder {
|
namespace milvus::indexbuilder {
|
||||||
|
|
||||||
@ -37,17 +36,14 @@ class VecIndexCreator : public IndexCreatorBase {
|
|||||||
const std::string& field_name,
|
const std::string& field_name,
|
||||||
const int64_t dim,
|
const int64_t dim,
|
||||||
Config& config,
|
Config& config,
|
||||||
const storage::FileManagerContext& file_manager_context,
|
const storage::FileManagerContext& file_manager_context);
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
void
|
void
|
||||||
Build(const milvus::DatasetPtr& dataset) override;
|
Build(const milvus::DatasetPtr& dataset) override;
|
||||||
|
|
||||||
void
|
void
|
||||||
Build() override;
|
Build() override;
|
||||||
|
|
||||||
void
|
|
||||||
BuildV2() override;
|
|
||||||
|
|
||||||
milvus::BinarySet
|
milvus::BinarySet
|
||||||
Serialize() override;
|
Serialize() override;
|
||||||
|
|
||||||
@ -65,9 +61,6 @@ class VecIndexCreator : public IndexCreatorBase {
|
|||||||
BinarySet
|
BinarySet
|
||||||
Upload() override;
|
Upload() override;
|
||||||
|
|
||||||
BinarySet
|
|
||||||
UploadV2() override;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void
|
void
|
||||||
CleanLocalData();
|
CleanLocalData();
|
||||||
@ -76,8 +69,6 @@ class VecIndexCreator : public IndexCreatorBase {
|
|||||||
milvus::index::IndexBasePtr index_ = nullptr;
|
milvus::index::IndexBasePtr index_ = nullptr;
|
||||||
Config config_;
|
Config config_;
|
||||||
DataType data_type_;
|
DataType data_type_;
|
||||||
|
|
||||||
std::shared_ptr<milvus_storage::Space> space_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace milvus::indexbuilder
|
} // namespace milvus::indexbuilder
|
||||||
|
|||||||
@ -15,7 +15,6 @@
|
|||||||
#include "fmt/core.h"
|
#include "fmt/core.h"
|
||||||
#include "indexbuilder/type_c.h"
|
#include "indexbuilder/type_c.h"
|
||||||
#include "log/Log.h"
|
#include "log/Log.h"
|
||||||
#include "storage/options.h"
|
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
@ -31,7 +30,6 @@
|
|||||||
#include "index/Utils.h"
|
#include "index/Utils.h"
|
||||||
#include "pb/index_cgo_msg.pb.h"
|
#include "pb/index_cgo_msg.pb.h"
|
||||||
#include "storage/Util.h"
|
#include "storage/Util.h"
|
||||||
#include "storage/space.h"
|
|
||||||
#include "index/Meta.h"
|
#include "index/Meta.h"
|
||||||
|
|
||||||
using namespace milvus;
|
using namespace milvus;
|
||||||
@ -234,107 +232,6 @@ CreateIndex(CIndex* res_index,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CStatus
|
|
||||||
CreateIndexV2(CIndex* res_index,
|
|
||||||
const uint8_t* serialized_build_index_info,
|
|
||||||
const uint64_t len) {
|
|
||||||
try {
|
|
||||||
auto build_index_info =
|
|
||||||
std::make_unique<milvus::proto::indexcgo::BuildIndexInfo>();
|
|
||||||
auto res =
|
|
||||||
build_index_info->ParseFromArray(serialized_build_index_info, len);
|
|
||||||
AssertInfo(res, "Unmarshall build index info failed");
|
|
||||||
auto field_type =
|
|
||||||
static_cast<DataType>(build_index_info->field_schema().data_type());
|
|
||||||
|
|
||||||
milvus::index::CreateIndexInfo index_info;
|
|
||||||
index_info.field_type = field_type;
|
|
||||||
index_info.dim = build_index_info->dim();
|
|
||||||
|
|
||||||
auto storage_config =
|
|
||||||
get_storage_config(build_index_info->storage_config());
|
|
||||||
auto config = get_config(build_index_info);
|
|
||||||
// get index type
|
|
||||||
auto index_type = milvus::index::GetValueFromConfig<std::string>(
|
|
||||||
config, "index_type");
|
|
||||||
AssertInfo(index_type.has_value(), "index type is empty");
|
|
||||||
index_info.index_type = index_type.value();
|
|
||||||
|
|
||||||
auto engine_version = build_index_info->current_index_version();
|
|
||||||
index_info.index_engine_version = engine_version;
|
|
||||||
config[milvus::index::INDEX_ENGINE_VERSION] =
|
|
||||||
std::to_string(engine_version);
|
|
||||||
|
|
||||||
// get metric type
|
|
||||||
if (milvus::IsVectorDataType(field_type)) {
|
|
||||||
auto metric_type = milvus::index::GetValueFromConfig<std::string>(
|
|
||||||
config, "metric_type");
|
|
||||||
AssertInfo(metric_type.has_value(), "metric type is empty");
|
|
||||||
index_info.metric_type = metric_type.value();
|
|
||||||
}
|
|
||||||
|
|
||||||
milvus::storage::FieldDataMeta field_meta{
|
|
||||||
build_index_info->collectionid(),
|
|
||||||
build_index_info->partitionid(),
|
|
||||||
build_index_info->segmentid(),
|
|
||||||
build_index_info->field_schema().fieldid(),
|
|
||||||
build_index_info->field_schema()};
|
|
||||||
milvus::storage::IndexMeta index_meta{
|
|
||||||
build_index_info->segmentid(),
|
|
||||||
build_index_info->field_schema().fieldid(),
|
|
||||||
build_index_info->buildid(),
|
|
||||||
build_index_info->index_version(),
|
|
||||||
"",
|
|
||||||
build_index_info->field_schema().name(),
|
|
||||||
field_type,
|
|
||||||
build_index_info->dim(),
|
|
||||||
};
|
|
||||||
|
|
||||||
auto store_space = milvus_storage::Space::Open(
|
|
||||||
build_index_info->store_path(),
|
|
||||||
milvus_storage::Options{nullptr,
|
|
||||||
build_index_info->store_version()});
|
|
||||||
AssertInfo(store_space.ok() && store_space.has_value(),
|
|
||||||
"create space failed: {}",
|
|
||||||
store_space.status().ToString());
|
|
||||||
|
|
||||||
auto index_space = milvus_storage::Space::Open(
|
|
||||||
build_index_info->index_store_path(),
|
|
||||||
milvus_storage::Options{.schema = store_space.value()->schema()});
|
|
||||||
AssertInfo(index_space.ok() && index_space.has_value(),
|
|
||||||
"create space failed: {}",
|
|
||||||
index_space.status().ToString());
|
|
||||||
|
|
||||||
LOG_INFO("init space success");
|
|
||||||
auto chunk_manager =
|
|
||||||
milvus::storage::CreateChunkManager(storage_config);
|
|
||||||
milvus::storage::FileManagerContext fileManagerContext(
|
|
||||||
field_meta,
|
|
||||||
index_meta,
|
|
||||||
chunk_manager,
|
|
||||||
std::move(index_space.value()));
|
|
||||||
|
|
||||||
auto index =
|
|
||||||
milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex(
|
|
||||||
field_type,
|
|
||||||
build_index_info->field_schema().name(),
|
|
||||||
build_index_info->dim(),
|
|
||||||
config,
|
|
||||||
fileManagerContext,
|
|
||||||
std::move(store_space.value()));
|
|
||||||
index->BuildV2();
|
|
||||||
*res_index = index.release();
|
|
||||||
return milvus::SuccessCStatus();
|
|
||||||
} catch (SegcoreError& e) {
|
|
||||||
auto status = CStatus();
|
|
||||||
status.error_code = e.get_error_code();
|
|
||||||
status.error_msg = strdup(e.what());
|
|
||||||
return status;
|
|
||||||
} catch (std::exception& e) {
|
|
||||||
return milvus::FailureCStatus(&e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
CStatus
|
CStatus
|
||||||
DeleteIndex(CIndex index) {
|
DeleteIndex(CIndex index) {
|
||||||
auto status = CStatus();
|
auto status = CStatus();
|
||||||
@ -823,29 +720,6 @@ SerializeIndexAndUpLoad(CIndex index, CBinarySet* c_binary_set) {
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
CStatus
|
|
||||||
SerializeIndexAndUpLoadV2(CIndex index, CBinarySet* c_binary_set) {
|
|
||||||
auto status = CStatus();
|
|
||||||
try {
|
|
||||||
AssertInfo(
|
|
||||||
index,
|
|
||||||
"failed to serialize index to binary set, passed index was null");
|
|
||||||
|
|
||||||
auto real_index =
|
|
||||||
reinterpret_cast<milvus::indexbuilder::IndexCreatorBase*>(index);
|
|
||||||
|
|
||||||
auto binary =
|
|
||||||
std::make_unique<knowhere::BinarySet>(real_index->UploadV2());
|
|
||||||
*c_binary_set = binary.release();
|
|
||||||
status.error_code = Success;
|
|
||||||
status.error_msg = "";
|
|
||||||
} catch (std::exception& e) {
|
|
||||||
status.error_code = UnexpectedError;
|
|
||||||
status.error_msg = strdup(e.what());
|
|
||||||
}
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
CStatus
|
CStatus
|
||||||
AppendOptionalFieldDataPath(CBuildIndexInfo c_build_index_info,
|
AppendOptionalFieldDataPath(CBuildIndexInfo c_build_index_info,
|
||||||
const int64_t field_id,
|
const int64_t field_id,
|
||||||
|
|||||||
@ -128,14 +128,6 @@ AppendOptionalFieldDataPath(CBuildIndexInfo c_build_index_info,
|
|||||||
CStatus
|
CStatus
|
||||||
SerializeIndexAndUpLoad(CIndex index, CBinarySet* c_binary_set);
|
SerializeIndexAndUpLoad(CIndex index, CBinarySet* c_binary_set);
|
||||||
|
|
||||||
CStatus
|
|
||||||
SerializeIndexAndUpLoadV2(CIndex index, CBinarySet* c_binary_set);
|
|
||||||
|
|
||||||
CStatus
|
|
||||||
CreateIndexV2(CIndex* res_index,
|
|
||||||
const uint8_t* serialized_build_index_info,
|
|
||||||
const uint64_t len);
|
|
||||||
|
|
||||||
CStatus
|
CStatus
|
||||||
AppendIndexStorageInfo(CBuildIndexInfo c_build_index_info,
|
AppendIndexStorageInfo(CBuildIndexInfo c_build_index_info,
|
||||||
const char* c_data_store_path,
|
const char* c_data_store_path,
|
||||||
|
|||||||
@ -43,6 +43,6 @@ set(SEGCORE_FILES
|
|||||||
reduce/GroupReduce.cpp)
|
reduce/GroupReduce.cpp)
|
||||||
add_library(milvus_segcore SHARED ${SEGCORE_FILES})
|
add_library(milvus_segcore SHARED ${SEGCORE_FILES})
|
||||||
|
|
||||||
target_link_libraries(milvus_segcore milvus_query milvus_bitset milvus_exec ${OpenMP_CXX_FLAGS} milvus-storage milvus_futures)
|
target_link_libraries(milvus_segcore milvus_query milvus_bitset milvus_exec ${OpenMP_CXX_FLAGS} milvus_futures)
|
||||||
|
|
||||||
install(TARGETS milvus_segcore DESTINATION "${CMAKE_INSTALL_LIBDIR}")
|
install(TARGETS milvus_segcore DESTINATION "${CMAKE_INSTALL_LIBDIR}")
|
||||||
|
|||||||
@ -33,8 +33,6 @@
|
|||||||
#include "storage/RemoteChunkManagerSingleton.h"
|
#include "storage/RemoteChunkManagerSingleton.h"
|
||||||
#include "storage/Util.h"
|
#include "storage/Util.h"
|
||||||
#include "storage/ThreadPools.h"
|
#include "storage/ThreadPools.h"
|
||||||
#include "storage/options.h"
|
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::segcore {
|
namespace milvus::segcore {
|
||||||
|
|
||||||
@ -280,89 +278,6 @@ SegmentGrowingImpl::LoadFieldData(const LoadFieldDataInfo& infos) {
|
|||||||
reserved_offset + num_rows);
|
reserved_offset + num_rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
SegmentGrowingImpl::LoadFieldDataV2(const LoadFieldDataInfo& infos) {
|
|
||||||
// schema don't include system field
|
|
||||||
AssertInfo(infos.field_infos.size() == schema_->size() + 2,
|
|
||||||
"lost some field data when load for growing segment");
|
|
||||||
AssertInfo(infos.field_infos.find(TimestampFieldID.get()) !=
|
|
||||||
infos.field_infos.end(),
|
|
||||||
"timestamps field data should be included");
|
|
||||||
AssertInfo(
|
|
||||||
infos.field_infos.find(RowFieldID.get()) != infos.field_infos.end(),
|
|
||||||
"rowID field data should be included");
|
|
||||||
auto primary_field_id =
|
|
||||||
schema_->get_primary_field_id().value_or(FieldId(-1));
|
|
||||||
AssertInfo(primary_field_id.get() != INVALID_FIELD_ID, "Primary key is -1");
|
|
||||||
AssertInfo(infos.field_infos.find(primary_field_id.get()) !=
|
|
||||||
infos.field_infos.end(),
|
|
||||||
"primary field data should be included");
|
|
||||||
|
|
||||||
size_t num_rows = storage::GetNumRowsForLoadInfo(infos);
|
|
||||||
auto reserved_offset = PreInsert(num_rows);
|
|
||||||
for (auto& [id, info] : infos.field_infos) {
|
|
||||||
auto field_id = FieldId(id);
|
|
||||||
auto field_data_info = FieldDataInfo(field_id.get(), num_rows);
|
|
||||||
auto& pool =
|
|
||||||
ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::MIDDLE);
|
|
||||||
auto res = milvus_storage::Space::Open(
|
|
||||||
infos.url, milvus_storage::Options{nullptr, infos.storage_version});
|
|
||||||
AssertInfo(res.ok(), "init space failed");
|
|
||||||
std::shared_ptr<milvus_storage::Space> space = std::move(res.value());
|
|
||||||
auto load_future = pool.Submit(
|
|
||||||
LoadFieldDatasFromRemote2, space, schema_, field_data_info);
|
|
||||||
auto field_data =
|
|
||||||
milvus::storage::CollectFieldDataChannel(field_data_info.channel);
|
|
||||||
if (field_id == TimestampFieldID) {
|
|
||||||
// step 2: sort timestamp
|
|
||||||
// query node already guarantees that the timestamp is ordered, avoid field data copy in c++
|
|
||||||
|
|
||||||
// step 3: fill into Segment.ConcurrentVector
|
|
||||||
insert_record_.timestamps_.set_data_raw(reserved_offset,
|
|
||||||
field_data);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (field_id == RowFieldID) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!indexing_record_.SyncDataWithIndex(field_id)) {
|
|
||||||
insert_record_.get_data_base(field_id)->set_data_raw(
|
|
||||||
reserved_offset, field_data);
|
|
||||||
}
|
|
||||||
if (segcore_config_.get_enable_interim_segment_index()) {
|
|
||||||
auto offset = reserved_offset;
|
|
||||||
for (auto& data : field_data) {
|
|
||||||
auto row_count = data->get_num_rows();
|
|
||||||
indexing_record_.AppendingIndex(
|
|
||||||
offset, row_count, field_id, data, insert_record_);
|
|
||||||
offset += row_count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
try_remove_chunks(field_id);
|
|
||||||
|
|
||||||
if (field_id == primary_field_id) {
|
|
||||||
insert_record_.insert_pks(field_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
// update average row data size
|
|
||||||
auto field_meta = (*schema_)[field_id];
|
|
||||||
if (IsVariableDataType(field_meta.get_data_type())) {
|
|
||||||
SegmentInternalInterface::set_field_avg_size(
|
|
||||||
field_id,
|
|
||||||
num_rows,
|
|
||||||
storage::GetByteSizeOfFieldDatas(field_data));
|
|
||||||
}
|
|
||||||
|
|
||||||
// update the mem size
|
|
||||||
stats_.mem_size += storage::GetByteSizeOfFieldDatas(field_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
// step 5: update small indexes
|
|
||||||
insert_record_.ack_responder_.AddSegment(reserved_offset,
|
|
||||||
reserved_offset + num_rows);
|
|
||||||
}
|
|
||||||
SegcoreError
|
SegcoreError
|
||||||
SegmentGrowingImpl::Delete(int64_t reserved_begin,
|
SegmentGrowingImpl::Delete(int64_t reserved_begin,
|
||||||
int64_t size,
|
int64_t size,
|
||||||
|
|||||||
@ -64,8 +64,6 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||||||
|
|
||||||
void
|
void
|
||||||
LoadFieldData(const LoadFieldDataInfo& info) override;
|
LoadFieldData(const LoadFieldDataInfo& info) override;
|
||||||
void
|
|
||||||
LoadFieldDataV2(const LoadFieldDataInfo& info) override;
|
|
||||||
|
|
||||||
void
|
void
|
||||||
RemoveDuplicatePkRecords() override;
|
RemoveDuplicatePkRecords() override;
|
||||||
|
|||||||
@ -115,9 +115,6 @@ class SegmentInterface {
|
|||||||
virtual void
|
virtual void
|
||||||
LoadFieldData(const LoadFieldDataInfo& info) = 0;
|
LoadFieldData(const LoadFieldDataInfo& info) = 0;
|
||||||
|
|
||||||
virtual void
|
|
||||||
LoadFieldDataV2(const LoadFieldDataInfo& info) = 0;
|
|
||||||
|
|
||||||
virtual void
|
virtual void
|
||||||
RemoveDuplicatePkRecords() = 0;
|
RemoveDuplicatePkRecords() = 0;
|
||||||
|
|
||||||
|
|||||||
@ -281,59 +281,6 @@ SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& load_info) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
SegmentSealedImpl::LoadFieldDataV2(const LoadFieldDataInfo& load_info) {
|
|
||||||
// TODO(SPARSE): support storage v2
|
|
||||||
// NOTE: lock only when data is ready to avoid starvation
|
|
||||||
// only one field for now, parallel load field data in golang
|
|
||||||
size_t num_rows = storage::GetNumRowsForLoadInfo(load_info);
|
|
||||||
|
|
||||||
for (auto& [id, info] : load_info.field_infos) {
|
|
||||||
AssertInfo(info.row_count > 0, "The row count of field data is 0");
|
|
||||||
|
|
||||||
auto field_id = FieldId(id);
|
|
||||||
auto insert_files = info.insert_files;
|
|
||||||
auto field_data_info =
|
|
||||||
FieldDataInfo(field_id.get(), num_rows, load_info.mmap_dir_path);
|
|
||||||
|
|
||||||
LOG_INFO("segment {} loads field {} with num_rows {}",
|
|
||||||
this->get_segment_id(),
|
|
||||||
field_id.get(),
|
|
||||||
num_rows);
|
|
||||||
|
|
||||||
auto parallel_degree = static_cast<uint64_t>(
|
|
||||||
DEFAULT_FIELD_MAX_MEMORY_LIMIT / FILE_SLICE_SIZE);
|
|
||||||
field_data_info.channel->set_capacity(parallel_degree * 2);
|
|
||||||
auto& pool =
|
|
||||||
ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::MIDDLE);
|
|
||||||
// auto load_future = pool.Submit(
|
|
||||||
// LoadFieldDatasFromRemote, insert_files, field_data_info.channel);
|
|
||||||
|
|
||||||
auto res = milvus_storage::Space::Open(
|
|
||||||
load_info.url,
|
|
||||||
milvus_storage::Options{nullptr, load_info.storage_version});
|
|
||||||
AssertInfo(res.ok(),
|
|
||||||
fmt::format("init space failed: {}, error: {}",
|
|
||||||
load_info.url,
|
|
||||||
res.status().ToString()));
|
|
||||||
std::shared_ptr<milvus_storage::Space> space = std::move(res.value());
|
|
||||||
auto load_future = pool.Submit(
|
|
||||||
LoadFieldDatasFromRemote2, space, schema_, field_data_info);
|
|
||||||
LOG_INFO("segment {} submits load field {} task to thread pool",
|
|
||||||
this->get_segment_id(),
|
|
||||||
field_id.get());
|
|
||||||
if (load_info.mmap_dir_path.empty() ||
|
|
||||||
SystemProperty::Instance().IsSystem(field_id)) {
|
|
||||||
LoadFieldData(field_id, field_data_info);
|
|
||||||
} else {
|
|
||||||
MapFieldData(field_id, field_data_info);
|
|
||||||
}
|
|
||||||
LOG_INFO("segment {} loads field {} done",
|
|
||||||
this->get_segment_id(),
|
|
||||||
field_id.get());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
SegmentSealedImpl::RemoveDuplicatePkRecords() {
|
SegmentSealedImpl::RemoveDuplicatePkRecords() {
|
||||||
std::unique_lock lck(mutex_);
|
std::unique_lock lck(mutex_);
|
||||||
|
|||||||
@ -50,8 +50,6 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||||||
LoadIndex(const LoadIndexInfo& info) override;
|
LoadIndex(const LoadIndexInfo& info) override;
|
||||||
void
|
void
|
||||||
LoadFieldData(const LoadFieldDataInfo& info) override;
|
LoadFieldData(const LoadFieldDataInfo& info) override;
|
||||||
void
|
|
||||||
LoadFieldDataV2(const LoadFieldDataInfo& info) override;
|
|
||||||
// erase duplicate records when sealed segment loaded done
|
// erase duplicate records when sealed segment loaded done
|
||||||
void
|
void
|
||||||
RemoveDuplicatePkRecords() override;
|
RemoveDuplicatePkRecords() override;
|
||||||
|
|||||||
@ -780,35 +780,7 @@ ReverseDataFromIndex(const index::IndexBase* index,
|
|||||||
|
|
||||||
return data_array;
|
return data_array;
|
||||||
}
|
}
|
||||||
void
|
|
||||||
LoadFieldDatasFromRemote2(std::shared_ptr<milvus_storage::Space> space,
|
|
||||||
SchemaPtr schema,
|
|
||||||
FieldDataInfo& field_data_info) {
|
|
||||||
auto reader = space->ScanData();
|
|
||||||
|
|
||||||
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
|
|
||||||
if (!rec.ok()) {
|
|
||||||
PanicInfo(DataFormatBroken, "failed to read data");
|
|
||||||
}
|
|
||||||
auto data = rec.ValueUnsafe();
|
|
||||||
auto total_num_rows = data->num_rows();
|
|
||||||
for (auto& field : schema->get_fields()) {
|
|
||||||
if (field.second.get_id().get() != field_data_info.field_id) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
auto col_data =
|
|
||||||
data->GetColumnByName(field.second.get_name().get());
|
|
||||||
auto field_data = storage::CreateFieldData(
|
|
||||||
field.second.get_data_type(),
|
|
||||||
field.second.is_nullable(),
|
|
||||||
field.second.is_vector() ? field.second.get_dim() : 0,
|
|
||||||
total_num_rows);
|
|
||||||
field_data->FillFieldData(col_data);
|
|
||||||
field_data_info.channel->push(field_data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
field_data_info.channel->close();
|
|
||||||
}
|
|
||||||
// init segcore storage config first, and create default remote chunk manager
|
// init segcore storage config first, and create default remote chunk manager
|
||||||
// segcore use default remote chunk manager to load data from minio/s3
|
// segcore use default remote chunk manager to load data from minio/s3
|
||||||
void
|
void
|
||||||
|
|||||||
@ -28,7 +28,6 @@
|
|||||||
#include "log/Log.h"
|
#include "log/Log.h"
|
||||||
#include "segcore/DeletedRecord.h"
|
#include "segcore/DeletedRecord.h"
|
||||||
#include "segcore/InsertRecord.h"
|
#include "segcore/InsertRecord.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::segcore {
|
namespace milvus::segcore {
|
||||||
|
|
||||||
@ -119,10 +118,6 @@ void
|
|||||||
LoadFieldDatasFromRemote(const std::vector<std::string>& remote_files,
|
LoadFieldDatasFromRemote(const std::vector<std::string>& remote_files,
|
||||||
FieldDataChannelPtr channel);
|
FieldDataChannelPtr channel);
|
||||||
|
|
||||||
void
|
|
||||||
LoadFieldDatasFromRemote2(std::shared_ptr<milvus_storage::Space> space,
|
|
||||||
SchemaPtr schema,
|
|
||||||
FieldDataInfo& field_data_info);
|
|
||||||
/**
|
/**
|
||||||
* Returns an index pointing to the first element in the range [first, last) such that `value < element` is true
|
* Returns an index pointing to the first element in the range [first, last) such that `value < element` is true
|
||||||
* (i.e. that is strictly greater than value), or last if no such element is found.
|
* (i.e. that is strictly greater than value), or last if no such element is found.
|
||||||
|
|||||||
@ -318,77 +318,6 @@ AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CStatus
|
|
||||||
AppendIndexV3(CLoadIndexInfo c_load_index_info) {
|
|
||||||
try {
|
|
||||||
auto load_index_info =
|
|
||||||
(milvus::segcore::LoadIndexInfo*)c_load_index_info;
|
|
||||||
auto& index_params = load_index_info->index_params;
|
|
||||||
auto field_type = load_index_info->field_type;
|
|
||||||
|
|
||||||
milvus::index::CreateIndexInfo index_info;
|
|
||||||
index_info.field_type = load_index_info->field_type;
|
|
||||||
|
|
||||||
// get index type
|
|
||||||
AssertInfo(index_params.find("index_type") != index_params.end(),
|
|
||||||
"index type is empty");
|
|
||||||
index_info.index_type = index_params.at("index_type");
|
|
||||||
|
|
||||||
// get metric type
|
|
||||||
if (milvus::IsVectorDataType(field_type)) {
|
|
||||||
AssertInfo(index_params.find("metric_type") != index_params.end(),
|
|
||||||
"metric type is empty for vector index");
|
|
||||||
index_info.metric_type = index_params.at("metric_type");
|
|
||||||
}
|
|
||||||
|
|
||||||
milvus::storage::FieldDataMeta field_meta{
|
|
||||||
load_index_info->collection_id,
|
|
||||||
load_index_info->partition_id,
|
|
||||||
load_index_info->segment_id,
|
|
||||||
load_index_info->field_id};
|
|
||||||
milvus::storage::IndexMeta index_meta{load_index_info->segment_id,
|
|
||||||
load_index_info->field_id,
|
|
||||||
load_index_info->index_build_id,
|
|
||||||
load_index_info->index_version};
|
|
||||||
auto config = milvus::index::ParseConfigFromIndexParams(
|
|
||||||
load_index_info->index_params);
|
|
||||||
|
|
||||||
auto res = milvus_storage::Space::Open(
|
|
||||||
load_index_info->uri,
|
|
||||||
milvus_storage::Options{nullptr,
|
|
||||||
load_index_info->index_store_version});
|
|
||||||
AssertInfo(res.ok(), "init space failed");
|
|
||||||
std::shared_ptr<milvus_storage::Space> space = std::move(res.value());
|
|
||||||
|
|
||||||
milvus::storage::FileManagerContext fileManagerContext(
|
|
||||||
field_meta, index_meta, nullptr, space);
|
|
||||||
load_index_info->index =
|
|
||||||
milvus::index::IndexFactory::GetInstance().CreateIndex(
|
|
||||||
index_info, fileManagerContext, space);
|
|
||||||
|
|
||||||
if (!load_index_info->mmap_dir_path.empty() &&
|
|
||||||
load_index_info->index->IsMmapSupported()) {
|
|
||||||
auto filepath =
|
|
||||||
std::filesystem::path(load_index_info->mmap_dir_path) /
|
|
||||||
std::to_string(load_index_info->segment_id) /
|
|
||||||
std::to_string(load_index_info->field_id) /
|
|
||||||
std::to_string(load_index_info->index_id);
|
|
||||||
|
|
||||||
config[kMmapFilepath] = filepath.string();
|
|
||||||
}
|
|
||||||
|
|
||||||
load_index_info->index->LoadV2(config);
|
|
||||||
auto status = CStatus();
|
|
||||||
status.error_code = milvus::Success;
|
|
||||||
status.error_msg = "";
|
|
||||||
return status;
|
|
||||||
} catch (std::exception& e) {
|
|
||||||
auto status = CStatus();
|
|
||||||
status.error_code = milvus::UnexpectedError;
|
|
||||||
status.error_msg = strdup(e.what());
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CStatus
|
CStatus
|
||||||
AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* c_file_path) {
|
AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* c_file_path) {
|
||||||
try {
|
try {
|
||||||
|
|||||||
@ -62,9 +62,6 @@ AppendIndexFilePath(CLoadIndexInfo c_load_index_info, const char* file_path);
|
|||||||
CStatus
|
CStatus
|
||||||
AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info);
|
AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info);
|
||||||
|
|
||||||
CStatus
|
|
||||||
AppendIndexV3(CLoadIndexInfo c_load_index_info);
|
|
||||||
|
|
||||||
CStatus
|
CStatus
|
||||||
AppendIndexEngineVersionToLoadInfo(CLoadIndexInfo c_load_index_info,
|
AppendIndexEngineVersionToLoadInfo(CLoadIndexInfo c_load_index_info,
|
||||||
int32_t index_engine_version);
|
int32_t index_engine_version);
|
||||||
|
|||||||
@ -29,7 +29,6 @@
|
|||||||
#include "storage/Util.h"
|
#include "storage/Util.h"
|
||||||
#include "futures/Future.h"
|
#include "futures/Future.h"
|
||||||
#include "futures/Executor.h"
|
#include "futures/Executor.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
////////////////////////////// common interfaces //////////////////////////////
|
////////////////////////////// common interfaces //////////////////////////////
|
||||||
CStatus
|
CStatus
|
||||||
@ -339,20 +338,6 @@ RemoveDuplicatePkRecords(CSegmentInterface c_segment) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CStatus
|
|
||||||
LoadFieldDataV2(CSegmentInterface c_segment,
|
|
||||||
CLoadFieldDataInfo c_load_field_data_info) {
|
|
||||||
try {
|
|
||||||
auto segment =
|
|
||||||
reinterpret_cast<milvus::segcore::SegmentInterface*>(c_segment);
|
|
||||||
AssertInfo(segment != nullptr, "segment conversion failed");
|
|
||||||
auto load_info = (LoadFieldDataInfo*)c_load_field_data_info;
|
|
||||||
segment->LoadFieldDataV2(*load_info);
|
|
||||||
return milvus::SuccessCStatus();
|
|
||||||
} catch (std::exception& e) {
|
|
||||||
return milvus::FailureCStatus(&e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// just for test
|
// just for test
|
||||||
CStatus
|
CStatus
|
||||||
LoadFieldRawData(CSegmentInterface c_segment,
|
LoadFieldRawData(CSegmentInterface c_segment,
|
||||||
|
|||||||
@ -102,10 +102,6 @@ CStatus
|
|||||||
LoadFieldData(CSegmentInterface c_segment,
|
LoadFieldData(CSegmentInterface c_segment,
|
||||||
CLoadFieldDataInfo load_field_data_info);
|
CLoadFieldDataInfo load_field_data_info);
|
||||||
|
|
||||||
CStatus
|
|
||||||
LoadFieldDataV2(CSegmentInterface c_segment,
|
|
||||||
CLoadFieldDataInfo load_field_data_info);
|
|
||||||
|
|
||||||
CStatus
|
CStatus
|
||||||
RemoveDuplicatePkRecords(CSegmentInterface c_segment);
|
RemoveDuplicatePkRecords(CSegmentInterface c_segment);
|
||||||
|
|
||||||
|
|||||||
@ -69,7 +69,6 @@ if (DEFINED AZURE_BUILD_DIR)
|
|||||||
"-L${AZURE_BUILD_DIR} -lblob-chunk-manager"
|
"-L${AZURE_BUILD_DIR} -lblob-chunk-manager"
|
||||||
blob-chunk-manager
|
blob-chunk-manager
|
||||||
milvus_common
|
milvus_common
|
||||||
milvus-storage
|
|
||||||
milvus_monitor
|
milvus_monitor
|
||||||
pthread
|
pthread
|
||||||
${CONAN_LIBS}
|
${CONAN_LIBS}
|
||||||
@ -77,7 +76,6 @@ if (DEFINED AZURE_BUILD_DIR)
|
|||||||
else ()
|
else ()
|
||||||
target_link_libraries(milvus_storage PUBLIC
|
target_link_libraries(milvus_storage PUBLIC
|
||||||
milvus_common
|
milvus_common
|
||||||
milvus-storage
|
|
||||||
milvus_monitor
|
milvus_monitor
|
||||||
pthread
|
pthread
|
||||||
${CONAN_LIBS}
|
${CONAN_LIBS}
|
||||||
|
|||||||
@ -45,16 +45,6 @@
|
|||||||
#include "storage/Util.h"
|
#include "storage/Util.h"
|
||||||
|
|
||||||
namespace milvus::storage {
|
namespace milvus::storage {
|
||||||
|
|
||||||
DiskFileManagerImpl::DiskFileManagerImpl(
|
|
||||||
const FileManagerContext& fileManagerContext,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space)
|
|
||||||
: FileManagerImpl(fileManagerContext.fieldDataMeta,
|
|
||||||
fileManagerContext.indexMeta),
|
|
||||||
space_(space) {
|
|
||||||
rcm_ = fileManagerContext.chunkManagerPtr;
|
|
||||||
}
|
|
||||||
|
|
||||||
DiskFileManagerImpl::DiskFileManagerImpl(
|
DiskFileManagerImpl::DiskFileManagerImpl(
|
||||||
const FileManagerContext& fileManagerContext)
|
const FileManagerContext& fileManagerContext)
|
||||||
: FileManagerImpl(fileManagerContext.fieldDataMeta,
|
: FileManagerImpl(fileManagerContext.fieldDataMeta,
|
||||||
@ -78,39 +68,10 @@ std::string
|
|||||||
DiskFileManagerImpl::GetRemoteIndexPath(const std::string& file_name,
|
DiskFileManagerImpl::GetRemoteIndexPath(const std::string& file_name,
|
||||||
int64_t slice_num) const {
|
int64_t slice_num) const {
|
||||||
std::string remote_prefix;
|
std::string remote_prefix;
|
||||||
if (space_ != nullptr) {
|
remote_prefix = GetRemoteIndexObjectPrefix();
|
||||||
remote_prefix = GetRemoteIndexObjectPrefixV2();
|
|
||||||
} else {
|
|
||||||
remote_prefix = GetRemoteIndexObjectPrefix();
|
|
||||||
}
|
|
||||||
return remote_prefix + "/" + file_name + "_" + std::to_string(slice_num);
|
return remote_prefix + "/" + file_name + "_" + std::to_string(slice_num);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
|
||||||
DiskFileManagerImpl::AddFileUsingSpace(
|
|
||||||
const std::string& local_file_name,
|
|
||||||
const std::vector<int64_t>& local_file_offsets,
|
|
||||||
const std::vector<std::string>& remote_files,
|
|
||||||
const std::vector<int64_t>& remote_file_sizes) {
|
|
||||||
auto local_chunk_manager =
|
|
||||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
|
||||||
for (int64_t i = 0; i < remote_files.size(); ++i) {
|
|
||||||
auto buf =
|
|
||||||
std::shared_ptr<uint8_t[]>(new uint8_t[remote_file_sizes[i]]);
|
|
||||||
local_chunk_manager->Read(local_file_name,
|
|
||||||
local_file_offsets[i],
|
|
||||||
buf.get(),
|
|
||||||
remote_file_sizes[i]);
|
|
||||||
|
|
||||||
auto status =
|
|
||||||
space_->WriteBlob(remote_files[i], buf.get(), remote_file_sizes[i]);
|
|
||||||
if (!status.ok()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
DiskFileManagerImpl::AddFile(const std::string& file) noexcept {
|
DiskFileManagerImpl::AddFile(const std::string& file) noexcept {
|
||||||
auto local_chunk_manager =
|
auto local_chunk_manager =
|
||||||
@ -204,85 +165,17 @@ DiskFileManagerImpl::AddBatchIndexFiles(
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::map<std::string, int64_t> res;
|
std::map<std::string, int64_t> res;
|
||||||
if (space_ != nullptr) {
|
res = PutIndexData(rcm_.get(),
|
||||||
res = PutIndexData(space_,
|
data_slices,
|
||||||
data_slices,
|
remote_file_sizes,
|
||||||
remote_file_sizes,
|
remote_files,
|
||||||
remote_files,
|
field_meta_,
|
||||||
field_meta_,
|
index_meta_);
|
||||||
index_meta_);
|
|
||||||
} else {
|
|
||||||
res = PutIndexData(rcm_.get(),
|
|
||||||
data_slices,
|
|
||||||
remote_file_sizes,
|
|
||||||
remote_files,
|
|
||||||
field_meta_,
|
|
||||||
index_meta_);
|
|
||||||
}
|
|
||||||
for (auto& re : res) {
|
for (auto& re : res) {
|
||||||
remote_paths_to_size_[re.first] = re.second;
|
remote_paths_to_size_[re.first] = re.second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
DiskFileManagerImpl::CacheIndexToDisk() {
|
|
||||||
auto blobs = space_->StatisticsBlobs();
|
|
||||||
std::vector<std::string> remote_files;
|
|
||||||
for (auto& blob : blobs) {
|
|
||||||
remote_files.push_back(blob.name);
|
|
||||||
}
|
|
||||||
auto local_chunk_manager =
|
|
||||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
|
||||||
|
|
||||||
std::map<std::string, std::vector<int>> index_slices;
|
|
||||||
for (auto& file_path : remote_files) {
|
|
||||||
auto pos = file_path.find_last_of("_");
|
|
||||||
index_slices[file_path.substr(0, pos)].emplace_back(
|
|
||||||
std::stoi(file_path.substr(pos + 1)));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto& slices : index_slices) {
|
|
||||||
std::sort(slices.second.begin(), slices.second.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
auto EstimateParallelDegree = [&](const std::string& file) -> uint64_t {
|
|
||||||
auto fileSize = space_->GetBlobByteSize(file);
|
|
||||||
return uint64_t(DEFAULT_FIELD_MAX_MEMORY_LIMIT / fileSize.value());
|
|
||||||
};
|
|
||||||
|
|
||||||
for (auto& slices : index_slices) {
|
|
||||||
auto prefix = slices.first;
|
|
||||||
auto local_index_file_name =
|
|
||||||
GetLocalIndexObjectPrefix() +
|
|
||||||
prefix.substr(prefix.find_last_of('/') + 1);
|
|
||||||
local_chunk_manager->CreateFile(local_index_file_name);
|
|
||||||
int64_t offset = 0;
|
|
||||||
std::vector<std::string> batch_remote_files;
|
|
||||||
uint64_t max_parallel_degree = INT_MAX;
|
|
||||||
for (int& iter : slices.second) {
|
|
||||||
if (batch_remote_files.size() == max_parallel_degree) {
|
|
||||||
auto next_offset = CacheBatchIndexFilesToDiskV2(
|
|
||||||
batch_remote_files, local_index_file_name, offset);
|
|
||||||
offset = next_offset;
|
|
||||||
batch_remote_files.clear();
|
|
||||||
}
|
|
||||||
auto origin_file = prefix + "_" + std::to_string(iter);
|
|
||||||
if (batch_remote_files.size() == 0) {
|
|
||||||
// Use first file size as average size to estimate
|
|
||||||
max_parallel_degree = EstimateParallelDegree(origin_file);
|
|
||||||
}
|
|
||||||
batch_remote_files.push_back(origin_file);
|
|
||||||
}
|
|
||||||
if (batch_remote_files.size() > 0) {
|
|
||||||
auto next_offset = CacheBatchIndexFilesToDiskV2(
|
|
||||||
batch_remote_files, local_index_file_name, offset);
|
|
||||||
offset = next_offset;
|
|
||||||
batch_remote_files.clear();
|
|
||||||
}
|
|
||||||
local_paths_.emplace_back(local_index_file_name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
DiskFileManagerImpl::CacheIndexToDisk(
|
DiskFileManagerImpl::CacheIndexToDisk(
|
||||||
const std::vector<std::string>& remote_files) {
|
const std::vector<std::string>& remote_files) {
|
||||||
@ -329,111 +222,6 @@ DiskFileManagerImpl::CacheIndexToDisk(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t
|
|
||||||
DiskFileManagerImpl::CacheBatchIndexFilesToDisk(
|
|
||||||
const std::vector<std::string>& remote_files,
|
|
||||||
const std::string& local_file_name,
|
|
||||||
uint64_t local_file_init_offfset) {
|
|
||||||
auto local_chunk_manager =
|
|
||||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
|
||||||
auto index_datas = GetObjectData(rcm_.get(), remote_files);
|
|
||||||
int batch_size = remote_files.size();
|
|
||||||
AssertInfo(index_datas.size() == batch_size,
|
|
||||||
"inconsistent file num and index data num!");
|
|
||||||
|
|
||||||
uint64_t offset = local_file_init_offfset;
|
|
||||||
for (int i = 0; i < batch_size; ++i) {
|
|
||||||
auto index_data = index_datas[i].get()->GetFieldData();
|
|
||||||
auto index_size = index_data->Size();
|
|
||||||
auto uint8_data =
|
|
||||||
reinterpret_cast<uint8_t*>(const_cast<void*>(index_data->Data()));
|
|
||||||
local_chunk_manager->Write(
|
|
||||||
local_file_name, offset, uint8_data, index_size);
|
|
||||||
offset += index_size;
|
|
||||||
}
|
|
||||||
return offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t
|
|
||||||
DiskFileManagerImpl::CacheBatchIndexFilesToDiskV2(
|
|
||||||
const std::vector<std::string>& remote_files,
|
|
||||||
const std::string& local_file_name,
|
|
||||||
uint64_t local_file_init_offfset) {
|
|
||||||
auto local_chunk_manager =
|
|
||||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
|
||||||
auto index_datas = GetObjectData(space_, remote_files);
|
|
||||||
int batch_size = remote_files.size();
|
|
||||||
AssertInfo(index_datas.size() == batch_size,
|
|
||||||
"inconsistent file num and index data num!");
|
|
||||||
|
|
||||||
uint64_t offset = local_file_init_offfset;
|
|
||||||
for (int i = 0; i < batch_size; ++i) {
|
|
||||||
auto index_data = index_datas[i];
|
|
||||||
auto index_size = index_data->Size();
|
|
||||||
auto uint8_data =
|
|
||||||
reinterpret_cast<uint8_t*>(const_cast<void*>(index_data->Data()));
|
|
||||||
local_chunk_manager->Write(
|
|
||||||
local_file_name, offset, uint8_data, index_size);
|
|
||||||
offset += index_size;
|
|
||||||
}
|
|
||||||
return offset;
|
|
||||||
}
|
|
||||||
template <typename DataType>
|
|
||||||
std::string
|
|
||||||
DiskFileManagerImpl::CacheRawDataToDisk(
|
|
||||||
std::shared_ptr<milvus_storage::Space> space) {
|
|
||||||
auto segment_id = GetFieldDataMeta().segment_id;
|
|
||||||
auto field_id = GetFieldDataMeta().field_id;
|
|
||||||
|
|
||||||
auto local_chunk_manager =
|
|
||||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
|
||||||
auto local_data_path = storage::GenFieldRawDataPathPrefix(
|
|
||||||
local_chunk_manager, segment_id, field_id) +
|
|
||||||
"raw_data";
|
|
||||||
local_chunk_manager->CreateFile(local_data_path);
|
|
||||||
// file format
|
|
||||||
// num_rows(uint32) | dim(uint32) | index_data ([]uint8_t)
|
|
||||||
uint32_t num_rows = 0;
|
|
||||||
uint32_t dim = 0;
|
|
||||||
int64_t write_offset = sizeof(num_rows) + sizeof(dim);
|
|
||||||
auto reader = space->ScanData();
|
|
||||||
for (auto rec : *reader) {
|
|
||||||
if (!rec.ok()) {
|
|
||||||
PanicInfo(IndexBuildError,
|
|
||||||
fmt::format("failed to read data: {}",
|
|
||||||
rec.status().ToString()));
|
|
||||||
}
|
|
||||||
auto data = rec.ValueUnsafe();
|
|
||||||
if (data == nullptr) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
auto total_num_rows = data->num_rows();
|
|
||||||
num_rows += total_num_rows;
|
|
||||||
auto col_data = data->GetColumnByName(index_meta_.field_name);
|
|
||||||
auto field_data = storage::CreateFieldData(
|
|
||||||
index_meta_.field_type, false, index_meta_.dim, total_num_rows);
|
|
||||||
field_data->FillFieldData(col_data);
|
|
||||||
dim = field_data->get_dim();
|
|
||||||
auto data_size =
|
|
||||||
field_data->get_num_rows() * milvus::GetVecRowSize<DataType>(dim);
|
|
||||||
local_chunk_manager->Write(local_data_path,
|
|
||||||
write_offset,
|
|
||||||
const_cast<void*>(field_data->Data()),
|
|
||||||
data_size);
|
|
||||||
write_offset += data_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
// write num_rows and dim value to file header
|
|
||||||
write_offset = 0;
|
|
||||||
local_chunk_manager->Write(
|
|
||||||
local_data_path, write_offset, &num_rows, sizeof(num_rows));
|
|
||||||
write_offset += sizeof(num_rows);
|
|
||||||
local_chunk_manager->Write(
|
|
||||||
local_data_path, write_offset, &dim, sizeof(dim));
|
|
||||||
|
|
||||||
return local_data_path;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
SortByPath(std::vector<std::string>& paths) {
|
SortByPath(std::vector<std::string>& paths) {
|
||||||
std::sort(paths.begin(),
|
std::sort(paths.begin(),
|
||||||
@ -682,92 +470,6 @@ WriteOptFieldsIvfMeta(
|
|||||||
write_offset += sizeof(num_of_fields);
|
write_offset += sizeof(num_of_fields);
|
||||||
}
|
}
|
||||||
|
|
||||||
// write optional scalar fields ivf info in the following format without space among them
|
|
||||||
// | (meta)
|
|
||||||
// | version (uint8_t) | num_of_fields (uint32_t) |
|
|
||||||
// | (field_0)
|
|
||||||
// | field_id (int64_t) | num_of_unique_field_data (uint32_t)
|
|
||||||
// | size_0 (uint32_t) | offset_0 (uint32_t)...
|
|
||||||
// | size_1 | offset_0, offset_1, ...
|
|
||||||
std::string
|
|
||||||
DiskFileManagerImpl::CacheOptFieldToDisk(
|
|
||||||
std::shared_ptr<milvus_storage::Space> space, OptFieldT& fields_map) {
|
|
||||||
const uint32_t num_of_fields = fields_map.size();
|
|
||||||
if (0 == num_of_fields) {
|
|
||||||
return "";
|
|
||||||
} else if (num_of_fields > 1) {
|
|
||||||
PanicInfo(
|
|
||||||
ErrorCode::NotImplemented,
|
|
||||||
"vector index build with multiple fields is not supported yet");
|
|
||||||
}
|
|
||||||
if (nullptr == space) {
|
|
||||||
LOG_ERROR("Failed to cache optional field. Space is null");
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
auto segment_id = GetFieldDataMeta().segment_id;
|
|
||||||
auto vec_field_id = GetFieldDataMeta().field_id;
|
|
||||||
auto local_chunk_manager =
|
|
||||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
|
||||||
auto local_data_path = storage::GenFieldRawDataPathPrefix(
|
|
||||||
local_chunk_manager, segment_id, vec_field_id) +
|
|
||||||
std::string(VEC_OPT_FIELDS);
|
|
||||||
local_chunk_manager->CreateFile(local_data_path);
|
|
||||||
|
|
||||||
uint64_t write_offset = 0;
|
|
||||||
WriteOptFieldsIvfMeta(
|
|
||||||
local_chunk_manager, local_data_path, num_of_fields, write_offset);
|
|
||||||
|
|
||||||
std::unordered_set<int64_t> actual_field_ids;
|
|
||||||
auto reader = space->ScanData();
|
|
||||||
for (auto& [field_id, tup] : fields_map) {
|
|
||||||
const auto& field_name = std::get<0>(tup);
|
|
||||||
const auto& field_type = std::get<1>(tup);
|
|
||||||
std::vector<FieldDataPtr> field_datas;
|
|
||||||
for (auto rec : *reader) {
|
|
||||||
if (!rec.ok()) {
|
|
||||||
PanicInfo(IndexBuildError,
|
|
||||||
fmt::format("failed to read optional field data: {}",
|
|
||||||
rec.status().ToString()));
|
|
||||||
}
|
|
||||||
auto data = rec.ValueUnsafe();
|
|
||||||
if (data == nullptr) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
auto total_num_rows = data->num_rows();
|
|
||||||
if (0 == total_num_rows) {
|
|
||||||
LOG_WARN("optional field {} has no data", field_name);
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
auto col_data = data->GetColumnByName(field_name);
|
|
||||||
auto field_data =
|
|
||||||
storage::CreateFieldData(field_type, false, 1, total_num_rows);
|
|
||||||
field_data->FillFieldData(col_data);
|
|
||||||
field_datas.emplace_back(field_data);
|
|
||||||
}
|
|
||||||
if (WriteOptFieldIvfData(field_type,
|
|
||||||
field_id,
|
|
||||||
local_chunk_manager,
|
|
||||||
local_data_path,
|
|
||||||
field_datas,
|
|
||||||
write_offset)) {
|
|
||||||
actual_field_ids.insert(field_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (actual_field_ids.size() != num_of_fields) {
|
|
||||||
write_offset = 0;
|
|
||||||
WriteOptFieldsIvfMeta(local_chunk_manager,
|
|
||||||
local_data_path,
|
|
||||||
actual_field_ids.size(),
|
|
||||||
write_offset);
|
|
||||||
if (actual_field_ids.empty()) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return local_data_path;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string
|
std::string
|
||||||
DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) {
|
DiskFileManagerImpl::CacheOptFieldToDisk(OptFieldT& fields_map) {
|
||||||
const uint32_t num_of_fields = fields_map.size();
|
const uint32_t num_of_fields = fields_map.size();
|
||||||
@ -904,17 +606,4 @@ DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(
|
|||||||
template std::string
|
template std::string
|
||||||
DiskFileManagerImpl::CacheRawDataToDisk<bin1>(
|
DiskFileManagerImpl::CacheRawDataToDisk<bin1>(
|
||||||
std::vector<std::string> remote_files);
|
std::vector<std::string> remote_files);
|
||||||
template std::string
|
|
||||||
DiskFileManagerImpl::CacheRawDataToDisk<float>(
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
template std::string
|
|
||||||
DiskFileManagerImpl::CacheRawDataToDisk<float16>(
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
template std::string
|
|
||||||
DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
template std::string
|
|
||||||
DiskFileManagerImpl::CacheRawDataToDisk<bin1>(
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
|
|
||||||
} // namespace milvus::storage
|
} // namespace milvus::storage
|
||||||
|
|||||||
@ -25,8 +25,6 @@
|
|||||||
#include "storage/IndexData.h"
|
#include "storage/IndexData.h"
|
||||||
#include "storage/FileManager.h"
|
#include "storage/FileManager.h"
|
||||||
#include "storage/ChunkManager.h"
|
#include "storage/ChunkManager.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
#include "common/Consts.h"
|
#include "common/Consts.h"
|
||||||
|
|
||||||
namespace milvus::storage {
|
namespace milvus::storage {
|
||||||
@ -35,9 +33,6 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
|||||||
public:
|
public:
|
||||||
explicit DiskFileManagerImpl(const FileManagerContext& fileManagerContext);
|
explicit DiskFileManagerImpl(const FileManagerContext& fileManagerContext);
|
||||||
|
|
||||||
explicit DiskFileManagerImpl(const FileManagerContext& fileManagerContext,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
|
|
||||||
virtual ~DiskFileManagerImpl();
|
virtual ~DiskFileManagerImpl();
|
||||||
|
|
||||||
virtual bool
|
virtual bool
|
||||||
@ -77,19 +72,6 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
|||||||
void
|
void
|
||||||
CacheIndexToDisk(const std::vector<std::string>& remote_files);
|
CacheIndexToDisk(const std::vector<std::string>& remote_files);
|
||||||
|
|
||||||
void
|
|
||||||
CacheIndexToDisk();
|
|
||||||
|
|
||||||
uint64_t
|
|
||||||
CacheBatchIndexFilesToDisk(const std::vector<std::string>& remote_files,
|
|
||||||
const std::string& local_file_name,
|
|
||||||
uint64_t local_file_init_offfset);
|
|
||||||
|
|
||||||
uint64_t
|
|
||||||
CacheBatchIndexFilesToDiskV2(const std::vector<std::string>& remote_files,
|
|
||||||
const std::string& local_file_name,
|
|
||||||
uint64_t local_file_init_offfset);
|
|
||||||
|
|
||||||
void
|
void
|
||||||
AddBatchIndexFiles(const std::string& local_file_name,
|
AddBatchIndexFiles(const std::string& local_file_name,
|
||||||
const std::vector<int64_t>& local_file_offsets,
|
const std::vector<int64_t>& local_file_offsets,
|
||||||
@ -100,27 +82,12 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
|||||||
std::string
|
std::string
|
||||||
CacheRawDataToDisk(std::vector<std::string> remote_files);
|
CacheRawDataToDisk(std::vector<std::string> remote_files);
|
||||||
|
|
||||||
template <typename DataType>
|
|
||||||
std::string
|
|
||||||
CacheRawDataToDisk(std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
|
|
||||||
std::string
|
std::string
|
||||||
CacheOptFieldToDisk(OptFieldT& fields_map);
|
CacheOptFieldToDisk(OptFieldT& fields_map);
|
||||||
|
|
||||||
std::string
|
|
||||||
CacheOptFieldToDisk(std::shared_ptr<milvus_storage::Space> space,
|
|
||||||
OptFieldT& fields_map);
|
|
||||||
|
|
||||||
virtual bool
|
|
||||||
AddFileUsingSpace(const std::string& local_file_name,
|
|
||||||
const std::vector<int64_t>& local_file_offsets,
|
|
||||||
const std::vector<std::string>& remote_files,
|
|
||||||
const std::vector<int64_t>& remote_file_sizes);
|
|
||||||
|
|
||||||
std::string
|
std::string
|
||||||
GetRemoteIndexPrefix() const {
|
GetRemoteIndexPrefix() const {
|
||||||
return space_ != nullptr ? GetRemoteIndexObjectPrefixV2()
|
return GetRemoteIndexObjectPrefix();
|
||||||
: GetRemoteIndexObjectPrefix();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -141,8 +108,6 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
|||||||
|
|
||||||
// remote file path
|
// remote file path
|
||||||
std::map<std::string, int64_t> remote_paths_to_size_;
|
std::map<std::string, int64_t> remote_paths_to_size_;
|
||||||
|
|
||||||
std::shared_ptr<milvus_storage::Space> space_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
using DiskANNFileManagerImplPtr = std::shared_ptr<DiskFileManagerImpl>;
|
using DiskANNFileManagerImplPtr = std::shared_ptr<DiskFileManagerImpl>;
|
||||||
|
|||||||
@ -25,7 +25,6 @@
|
|||||||
#include "log/Log.h"
|
#include "log/Log.h"
|
||||||
#include "storage/ChunkManager.h"
|
#include "storage/ChunkManager.h"
|
||||||
#include "storage/Types.h"
|
#include "storage/Types.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::storage {
|
namespace milvus::storage {
|
||||||
|
|
||||||
@ -40,15 +39,6 @@ struct FileManagerContext {
|
|||||||
chunkManagerPtr(chunkManagerPtr) {
|
chunkManagerPtr(chunkManagerPtr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
FileManagerContext(const FieldDataMeta& fieldDataMeta,
|
|
||||||
const IndexMeta& indexMeta,
|
|
||||||
const ChunkManagerPtr& chunkManagerPtr,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space)
|
|
||||||
: fieldDataMeta(fieldDataMeta),
|
|
||||||
indexMeta(indexMeta),
|
|
||||||
chunkManagerPtr(chunkManagerPtr),
|
|
||||||
space_(space) {
|
|
||||||
}
|
|
||||||
bool
|
bool
|
||||||
Valid() const {
|
Valid() const {
|
||||||
return chunkManagerPtr != nullptr;
|
return chunkManagerPtr != nullptr;
|
||||||
@ -57,7 +47,6 @@ struct FileManagerContext {
|
|||||||
FieldDataMeta fieldDataMeta;
|
FieldDataMeta fieldDataMeta;
|
||||||
IndexMeta indexMeta;
|
IndexMeta indexMeta;
|
||||||
ChunkManagerPtr chunkManagerPtr;
|
ChunkManagerPtr chunkManagerPtr;
|
||||||
std::shared_ptr<milvus_storage::Space> space_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#define FILEMANAGER_TRY try {
|
#define FILEMANAGER_TRY try {
|
||||||
|
|||||||
@ -26,15 +26,6 @@
|
|||||||
|
|
||||||
namespace milvus::storage {
|
namespace milvus::storage {
|
||||||
|
|
||||||
MemFileManagerImpl::MemFileManagerImpl(
|
|
||||||
const FileManagerContext& fileManagerContext,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space)
|
|
||||||
: FileManagerImpl(fileManagerContext.fieldDataMeta,
|
|
||||||
fileManagerContext.indexMeta),
|
|
||||||
space_(space) {
|
|
||||||
rcm_ = fileManagerContext.chunkManagerPtr;
|
|
||||||
}
|
|
||||||
|
|
||||||
MemFileManagerImpl::MemFileManagerImpl(
|
MemFileManagerImpl::MemFileManagerImpl(
|
||||||
const FileManagerContext& fileManagerContext)
|
const FileManagerContext& fileManagerContext)
|
||||||
: FileManagerImpl(fileManagerContext.fieldDataMeta,
|
: FileManagerImpl(fileManagerContext.fieldDataMeta,
|
||||||
@ -91,50 +82,6 @@ MemFileManagerImpl::AddFile(const BinarySet& binary_set) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
|
||||||
MemFileManagerImpl::AddFileV2(const BinarySet& binary_set) {
|
|
||||||
std::vector<const uint8_t*> data_slices;
|
|
||||||
std::vector<int64_t> slice_sizes;
|
|
||||||
std::vector<std::string> slice_names;
|
|
||||||
|
|
||||||
auto AddBatchIndexFiles = [&]() {
|
|
||||||
auto res = PutIndexData(space_,
|
|
||||||
data_slices,
|
|
||||||
slice_sizes,
|
|
||||||
slice_names,
|
|
||||||
field_meta_,
|
|
||||||
index_meta_);
|
|
||||||
for (auto& [file, size] : res) {
|
|
||||||
remote_paths_to_size_[file] = size;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
auto remotePrefix = GetRemoteIndexObjectPrefixV2();
|
|
||||||
int64_t batch_size = 0;
|
|
||||||
for (auto iter = binary_set.binary_map_.begin();
|
|
||||||
iter != binary_set.binary_map_.end();
|
|
||||||
iter++) {
|
|
||||||
if (batch_size >= DEFAULT_FIELD_MAX_MEMORY_LIMIT) {
|
|
||||||
AddBatchIndexFiles();
|
|
||||||
data_slices.clear();
|
|
||||||
slice_sizes.clear();
|
|
||||||
slice_names.clear();
|
|
||||||
batch_size = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
data_slices.emplace_back(iter->second->data.get());
|
|
||||||
slice_sizes.emplace_back(iter->second->size);
|
|
||||||
slice_names.emplace_back(remotePrefix + "/" + iter->first);
|
|
||||||
batch_size += iter->second->size;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (data_slices.size() > 0) {
|
|
||||||
AddBatchIndexFiles();
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
MemFileManagerImpl::LoadFile(const std::string& filename) noexcept {
|
MemFileManagerImpl::LoadFile(const std::string& filename) noexcept {
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -25,7 +25,6 @@
|
|||||||
#include "storage/IndexData.h"
|
#include "storage/IndexData.h"
|
||||||
#include "storage/FileManager.h"
|
#include "storage/FileManager.h"
|
||||||
#include "storage/ChunkManager.h"
|
#include "storage/ChunkManager.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::storage {
|
namespace milvus::storage {
|
||||||
|
|
||||||
@ -33,9 +32,6 @@ class MemFileManagerImpl : public FileManagerImpl {
|
|||||||
public:
|
public:
|
||||||
explicit MemFileManagerImpl(const FileManagerContext& fileManagerContext);
|
explicit MemFileManagerImpl(const FileManagerContext& fileManagerContext);
|
||||||
|
|
||||||
MemFileManagerImpl(const FileManagerContext& fileManagerContext,
|
|
||||||
std::shared_ptr<milvus_storage::Space> space);
|
|
||||||
|
|
||||||
virtual bool
|
virtual bool
|
||||||
LoadFile(const std::string& filename) noexcept;
|
LoadFile(const std::string& filename) noexcept;
|
||||||
|
|
||||||
@ -63,14 +59,6 @@ class MemFileManagerImpl : public FileManagerImpl {
|
|||||||
bool
|
bool
|
||||||
AddFile(const BinarySet& binary_set);
|
AddFile(const BinarySet& binary_set);
|
||||||
|
|
||||||
bool
|
|
||||||
AddFileV2(const BinarySet& binary_set);
|
|
||||||
|
|
||||||
std::shared_ptr<milvus_storage::Space>
|
|
||||||
space() const {
|
|
||||||
return space_;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::map<std::string, int64_t>
|
std::map<std::string, int64_t>
|
||||||
GetRemotePathsToFileSize() const {
|
GetRemotePathsToFileSize() const {
|
||||||
return remote_paths_to_size_;
|
return remote_paths_to_size_;
|
||||||
@ -79,7 +67,6 @@ class MemFileManagerImpl : public FileManagerImpl {
|
|||||||
private:
|
private:
|
||||||
// remote file path
|
// remote file path
|
||||||
std::map<std::string, int64_t> remote_paths_to_size_;
|
std::map<std::string, int64_t> remote_paths_to_size_;
|
||||||
std::shared_ptr<milvus_storage::Space> space_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
using MemFileManagerImplPtr = std::shared_ptr<MemFileManagerImpl>;
|
using MemFileManagerImplPtr = std::shared_ptr<MemFileManagerImpl>;
|
||||||
|
|||||||
@ -516,22 +516,6 @@ DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager,
|
|||||||
return DeserializeFileData(buf, fileSize);
|
return DeserializeFileData(buf, fileSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<DataCodec>
|
|
||||||
DownloadAndDecodeRemoteFileV2(std::shared_ptr<milvus_storage::Space> space,
|
|
||||||
const std::string& file) {
|
|
||||||
auto fileSize = space->GetBlobByteSize(file);
|
|
||||||
if (!fileSize.ok()) {
|
|
||||||
PanicInfo(FileReadFailed, fileSize.status().ToString());
|
|
||||||
}
|
|
||||||
auto buf = std::shared_ptr<uint8_t[]>(new uint8_t[fileSize.value()]);
|
|
||||||
auto status = space->ReadBlob(file, buf.get());
|
|
||||||
if (!status.ok()) {
|
|
||||||
PanicInfo(FileReadFailed, status.ToString());
|
|
||||||
}
|
|
||||||
|
|
||||||
return DeserializeFileData(buf, fileSize.value());
|
|
||||||
}
|
|
||||||
|
|
||||||
std::pair<std::string, size_t>
|
std::pair<std::string, size_t>
|
||||||
EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
|
EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
|
||||||
uint8_t* buf,
|
uint8_t* buf,
|
||||||
@ -551,27 +535,6 @@ EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
|
|||||||
return std::make_pair(std::move(object_key), serialized_index_size);
|
return std::make_pair(std::move(object_key), serialized_index_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<std::string, size_t>
|
|
||||||
EncodeAndUploadIndexSlice2(std::shared_ptr<milvus_storage::Space> space,
|
|
||||||
uint8_t* buf,
|
|
||||||
int64_t batch_size,
|
|
||||||
IndexMeta index_meta,
|
|
||||||
FieldDataMeta field_meta,
|
|
||||||
std::string object_key) {
|
|
||||||
// todo: support nullable index
|
|
||||||
auto field_data = CreateFieldData(DataType::INT8, false);
|
|
||||||
field_data->FillFieldData(buf, batch_size);
|
|
||||||
auto indexData = std::make_shared<IndexData>(field_data);
|
|
||||||
indexData->set_index_meta(index_meta);
|
|
||||||
indexData->SetFieldDataMeta(field_meta);
|
|
||||||
auto serialized_index_data = indexData->serialize_to_remote_file();
|
|
||||||
auto serialized_index_size = serialized_index_data.size();
|
|
||||||
auto status = space->WriteBlob(
|
|
||||||
object_key, serialized_index_data.data(), serialized_index_size);
|
|
||||||
AssertInfo(status.ok(), "write to space error: {}", status.ToString());
|
|
||||||
return std::make_pair(std::move(object_key), serialized_index_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::pair<std::string, size_t>
|
std::pair<std::string, size_t>
|
||||||
EncodeAndUploadFieldSlice(ChunkManager* chunk_manager,
|
EncodeAndUploadFieldSlice(ChunkManager* chunk_manager,
|
||||||
void* buf,
|
void* buf,
|
||||||
@ -609,36 +572,6 @@ GetObjectData(ChunkManager* remote_chunk_manager,
|
|||||||
return futures;
|
return futures;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<FieldDataPtr>
|
|
||||||
GetObjectData(std::shared_ptr<milvus_storage::Space> space,
|
|
||||||
const std::vector<std::string>& remote_files) {
|
|
||||||
auto& pool = ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::HIGH);
|
|
||||||
std::vector<std::future<std::unique_ptr<DataCodec>>> futures;
|
|
||||||
for (auto& file : remote_files) {
|
|
||||||
futures.emplace_back(
|
|
||||||
pool.Submit(DownloadAndDecodeRemoteFileV2, space, file));
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<FieldDataPtr> datas;
|
|
||||||
std::exception_ptr first_exception = nullptr;
|
|
||||||
for (auto& future : futures) {
|
|
||||||
try {
|
|
||||||
auto res = future.get();
|
|
||||||
datas.emplace_back(res->GetFieldData());
|
|
||||||
} catch (...) {
|
|
||||||
if (!first_exception) {
|
|
||||||
first_exception = std::current_exception();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ReleaseArrowUnused();
|
|
||||||
if (first_exception) {
|
|
||||||
std::rethrow_exception(first_exception);
|
|
||||||
}
|
|
||||||
|
|
||||||
return datas;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::map<std::string, int64_t>
|
std::map<std::string, int64_t>
|
||||||
PutIndexData(ChunkManager* remote_chunk_manager,
|
PutIndexData(ChunkManager* remote_chunk_manager,
|
||||||
const std::vector<const uint8_t*>& data_slices,
|
const std::vector<const uint8_t*>& data_slices,
|
||||||
@ -687,54 +620,6 @@ PutIndexData(ChunkManager* remote_chunk_manager,
|
|||||||
return remote_paths_to_size;
|
return remote_paths_to_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<std::string, int64_t>
|
|
||||||
PutIndexData(std::shared_ptr<milvus_storage::Space> space,
|
|
||||||
const std::vector<const uint8_t*>& data_slices,
|
|
||||||
const std::vector<int64_t>& slice_sizes,
|
|
||||||
const std::vector<std::string>& slice_names,
|
|
||||||
FieldDataMeta& field_meta,
|
|
||||||
IndexMeta& index_meta) {
|
|
||||||
auto& pool = ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::MIDDLE);
|
|
||||||
std::vector<std::future<std::pair<std::string, size_t>>> futures;
|
|
||||||
AssertInfo(data_slices.size() == slice_sizes.size(),
|
|
||||||
"inconsistent data slices size {} with slice sizes {}",
|
|
||||||
data_slices.size(),
|
|
||||||
slice_sizes.size());
|
|
||||||
AssertInfo(data_slices.size() == slice_names.size(),
|
|
||||||
"inconsistent data slices size {} with slice names size {}",
|
|
||||||
data_slices.size(),
|
|
||||||
slice_names.size());
|
|
||||||
|
|
||||||
for (int64_t i = 0; i < data_slices.size(); ++i) {
|
|
||||||
futures.push_back(pool.Submit(EncodeAndUploadIndexSlice2,
|
|
||||||
space,
|
|
||||||
const_cast<uint8_t*>(data_slices[i]),
|
|
||||||
slice_sizes[i],
|
|
||||||
index_meta,
|
|
||||||
field_meta,
|
|
||||||
slice_names[i]));
|
|
||||||
}
|
|
||||||
|
|
||||||
std::map<std::string, int64_t> remote_paths_to_size;
|
|
||||||
std::exception_ptr first_exception = nullptr;
|
|
||||||
for (auto& future : futures) {
|
|
||||||
try {
|
|
||||||
auto res = future.get();
|
|
||||||
remote_paths_to_size[res.first] = res.second;
|
|
||||||
} catch (...) {
|
|
||||||
if (!first_exception) {
|
|
||||||
first_exception = std::current_exception();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ReleaseArrowUnused();
|
|
||||||
if (first_exception) {
|
|
||||||
std::rethrow_exception(first_exception);
|
|
||||||
}
|
|
||||||
|
|
||||||
return remote_paths_to_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t
|
int64_t
|
||||||
GetTotalNumRowsForFieldDatas(const std::vector<FieldDataPtr>& field_datas) {
|
GetTotalNumRowsForFieldDatas(const std::vector<FieldDataPtr>& field_datas) {
|
||||||
int64_t count = 0;
|
int64_t count = 0;
|
||||||
|
|||||||
@ -31,7 +31,6 @@
|
|||||||
#include "storage/ChunkManager.h"
|
#include "storage/ChunkManager.h"
|
||||||
#include "storage/DataCodec.h"
|
#include "storage/DataCodec.h"
|
||||||
#include "storage/Types.h"
|
#include "storage/Types.h"
|
||||||
#include "storage/space.h"
|
|
||||||
|
|
||||||
namespace milvus::storage {
|
namespace milvus::storage {
|
||||||
|
|
||||||
@ -89,10 +88,6 @@ std::unique_ptr<DataCodec>
|
|||||||
DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager,
|
DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager,
|
||||||
const std::string& file);
|
const std::string& file);
|
||||||
|
|
||||||
std::unique_ptr<DataCodec>
|
|
||||||
DownloadAndDecodeRemoteFileV2(std::shared_ptr<milvus_storage::Space> space,
|
|
||||||
const std::string& file);
|
|
||||||
|
|
||||||
std::pair<std::string, size_t>
|
std::pair<std::string, size_t>
|
||||||
EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
|
EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
|
||||||
uint8_t* buf,
|
uint8_t* buf,
|
||||||
@ -102,13 +97,6 @@ EncodeAndUploadIndexSlice(ChunkManager* chunk_manager,
|
|||||||
std::string object_key);
|
std::string object_key);
|
||||||
|
|
||||||
std::pair<std::string, size_t>
|
std::pair<std::string, size_t>
|
||||||
EncodeAndUploadIndexSlice2(std::shared_ptr<milvus_storage::Space> space,
|
|
||||||
uint8_t* buf,
|
|
||||||
int64_t batch_size,
|
|
||||||
IndexMeta index_meta,
|
|
||||||
FieldDataMeta field_meta,
|
|
||||||
std::string object_key);
|
|
||||||
std::pair<std::string, size_t>
|
|
||||||
EncodeAndUploadFieldSlice(ChunkManager* chunk_manager,
|
EncodeAndUploadFieldSlice(ChunkManager* chunk_manager,
|
||||||
void* buf,
|
void* buf,
|
||||||
int64_t element_count,
|
int64_t element_count,
|
||||||
@ -120,10 +108,6 @@ std::vector<std::future<std::unique_ptr<DataCodec>>>
|
|||||||
GetObjectData(ChunkManager* remote_chunk_manager,
|
GetObjectData(ChunkManager* remote_chunk_manager,
|
||||||
const std::vector<std::string>& remote_files);
|
const std::vector<std::string>& remote_files);
|
||||||
|
|
||||||
std::vector<FieldDataPtr>
|
|
||||||
GetObjectData(std::shared_ptr<milvus_storage::Space> space,
|
|
||||||
const std::vector<std::string>& remote_files);
|
|
||||||
|
|
||||||
std::map<std::string, int64_t>
|
std::map<std::string, int64_t>
|
||||||
PutIndexData(ChunkManager* remote_chunk_manager,
|
PutIndexData(ChunkManager* remote_chunk_manager,
|
||||||
const std::vector<const uint8_t*>& data_slices,
|
const std::vector<const uint8_t*>& data_slices,
|
||||||
@ -132,13 +116,6 @@ PutIndexData(ChunkManager* remote_chunk_manager,
|
|||||||
FieldDataMeta& field_meta,
|
FieldDataMeta& field_meta,
|
||||||
IndexMeta& index_meta);
|
IndexMeta& index_meta);
|
||||||
|
|
||||||
std::map<std::string, int64_t>
|
|
||||||
PutIndexData(std::shared_ptr<milvus_storage::Space> space,
|
|
||||||
const std::vector<const uint8_t*>& data_slices,
|
|
||||||
const std::vector<int64_t>& slice_sizes,
|
|
||||||
const std::vector<std::string>& slice_names,
|
|
||||||
FieldDataMeta& field_meta,
|
|
||||||
IndexMeta& index_meta);
|
|
||||||
int64_t
|
int64_t
|
||||||
GetTotalNumRowsForFieldDatas(const std::vector<FieldDataPtr>& field_datas);
|
GetTotalNumRowsForFieldDatas(const std::vector<FieldDataPtr>& field_datas);
|
||||||
|
|
||||||
|
|||||||
2
internal/core/thirdparty/CMakeLists.txt
vendored
2
internal/core/thirdparty/CMakeLists.txt
vendored
@ -41,8 +41,6 @@ if (USE_OPENDAL)
|
|||||||
endif()
|
endif()
|
||||||
add_subdirectory(tantivy)
|
add_subdirectory(tantivy)
|
||||||
|
|
||||||
add_subdirectory(milvus-storage)
|
|
||||||
|
|
||||||
if (LINUX)
|
if (LINUX)
|
||||||
add_subdirectory(jemalloc)
|
add_subdirectory(jemalloc)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@ -1,48 +0,0 @@
|
|||||||
#-------------------------------------------------------------------------------
|
|
||||||
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
|
||||||
# with the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
||||||
# or implied. See the License for the specific language governing permissions and limitations under the License.
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
set( MILVUS_STORAGE_VERSION 9d1ad9c)
|
|
||||||
|
|
||||||
message(STATUS "Building milvus-storage-${MILVUS_STORAGE_VERSION} from source")
|
|
||||||
message(STATUS ${CMAKE_BUILD_TYPE})
|
|
||||||
|
|
||||||
# message(FATAL_ERROR ${CMAKE_CURRENT_SOURCE_DIR}/milvus-storage.patch)
|
|
||||||
# set(milvus-storage-patch git apply --ignore-whitespace ${CMAKE_CURRENT_SOURCE_DIR}/milvus-storage.patch)
|
|
||||||
set( CMAKE_PREFIX_PATH ${CONAN_BOOST_ROOT} )
|
|
||||||
FetchContent_Declare(
|
|
||||||
milvus-storage
|
|
||||||
GIT_REPOSITORY "https://github.com/milvus-io/milvus-storage.git"
|
|
||||||
GIT_TAG ${MILVUS_STORAGE_VERSION}
|
|
||||||
SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/milvus-storage-src
|
|
||||||
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/milvus-storage-build
|
|
||||||
SOURCE_SUBDIR cpp
|
|
||||||
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/milvus-storage_CMakeLists.txt <SOURCE_DIR>/cpp/CMakeLists.txt
|
|
||||||
DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} )
|
|
||||||
|
|
||||||
FetchContent_MakeAvailable(milvus-storage)
|
|
||||||
# target_compile_features(milvus-storage PUBLIC cxx_std_20)
|
|
||||||
|
|
||||||
# FetchContent_GetProperties( milvus-storage )
|
|
||||||
# if ( NOT milvus-storage_POPULATED )
|
|
||||||
# FetchContent_Populate( milvus-storage)
|
|
||||||
|
|
||||||
# # Adding the following target:
|
|
||||||
# add_subdirectory( ${milvus-storage_SOURCE_DIR}/cpp
|
|
||||||
# ${milvus-storage_BINARY_DIR} )
|
|
||||||
# endif()
|
|
||||||
|
|
||||||
# message(FATAL_ERROR ${milvus-storage_SOURCE_DIR} ${milvus-storage_BINARY_DIR})
|
|
||||||
# get prometheus COMPILE_OPTIONS
|
|
||||||
# get_property( var DIRECTORY "${milvus-storage_SOURCE_DIR}" PROPERTY COMPILE_OPTIONS )
|
|
||||||
message( STATUS "milvus-storage src compile options: ${var}" )
|
|
||||||
# unset(CMAKE_CXX_STANDARD)
|
|
||||||
@ -1,34 +0,0 @@
|
|||||||
cmake_minimum_required(VERSION 3.20.0)
|
|
||||||
|
|
||||||
project(milvus-storage VERSION 0.1.0)
|
|
||||||
|
|
||||||
option(WITH_UT "Build the testing tree." ON)
|
|
||||||
option(WITH_ASAN "Build with address sanitizer." OFF)
|
|
||||||
option(USE_OPENDAL "Build with opendal." OFF)
|
|
||||||
|
|
||||||
if (USE_OPENDAL)
|
|
||||||
add_compile_definitions(MILVUS_OPENDAL)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 20)
|
|
||||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
|
||||||
|
|
||||||
find_package(Boost REQUIRED)
|
|
||||||
find_package(Arrow REQUIRED)
|
|
||||||
find_package(Protobuf REQUIRED)
|
|
||||||
find_package(glog REQUIRED)
|
|
||||||
find_package(AWSSDK REQUIRED)
|
|
||||||
|
|
||||||
file(GLOB_RECURSE SRC_FILES src/*.cpp src/*.cc)
|
|
||||||
message(STATUS "SRC_FILES: ${SRC_FILES}")
|
|
||||||
add_library(milvus-storage ${SRC_FILES})
|
|
||||||
target_include_directories(milvus-storage PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/milvus-storage ${CMAKE_CURRENT_SOURCE_DIR}/src)
|
|
||||||
target_link_libraries(milvus-storage PUBLIC arrow::arrow Boost::boost protobuf::protobuf AWS::aws-sdk-cpp-core glog::glog)
|
|
||||||
if (USE_OPENDAL)
|
|
||||||
target_link_libraries(milvus-storage PUBLIC opendal)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WITH_UT)
|
|
||||||
enable_testing()
|
|
||||||
add_subdirectory(test)
|
|
||||||
endif()
|
|
||||||
@ -36,9 +36,6 @@
|
|||||||
#include "storage/InsertData.h"
|
#include "storage/InsertData.h"
|
||||||
#include "storage/ThreadPool.h"
|
#include "storage/ThreadPool.h"
|
||||||
#include "storage/Types.h"
|
#include "storage/Types.h"
|
||||||
#include "storage/options.h"
|
|
||||||
#include "storage/schema.h"
|
|
||||||
#include "storage/space.h"
|
|
||||||
#include "storage/Util.h"
|
#include "storage/Util.h"
|
||||||
#include "storage/DiskFileManagerImpl.h"
|
#include "storage/DiskFileManagerImpl.h"
|
||||||
#include "storage/LocalChunkManagerSingleton.h"
|
#include "storage/LocalChunkManagerSingleton.h"
|
||||||
@ -285,62 +282,6 @@ PrepareInsertData(const int64_t opt_field_data_range) -> std::string {
|
|||||||
return path;
|
return path;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto
|
|
||||||
PrepareInsertDataSpace(const int64_t opt_field_data_range)
|
|
||||||
-> std::pair<std::string, std::shared_ptr<milvus_storage::Space>> {
|
|
||||||
std::string path = kOptFieldPath + "space/" + std::to_string(kOptFieldId);
|
|
||||||
arrow::FieldVector arrow_fields{
|
|
||||||
arrow::field("pk", arrow::int64()),
|
|
||||||
arrow::field("ts", arrow::int64()),
|
|
||||||
arrow::field(kOptFieldName, arrow::int64()),
|
|
||||||
arrow::field("vec", arrow::fixed_size_binary(1))};
|
|
||||||
auto arrow_schema = std::make_shared<arrow::Schema>(arrow_fields);
|
|
||||||
milvus_storage::SchemaOptions schema_options = {
|
|
||||||
.primary_column = "pk", .version_column = "ts", .vector_column = "vec"};
|
|
||||||
auto schema =
|
|
||||||
std::make_shared<milvus_storage::Schema>(arrow_schema, schema_options);
|
|
||||||
boost::filesystem::remove_all(path);
|
|
||||||
boost::filesystem::create_directories(path);
|
|
||||||
EXPECT_TRUE(schema->Validate().ok());
|
|
||||||
auto opt_space = milvus_storage::Space::Open(
|
|
||||||
"file://" + boost::filesystem::canonical(path).string(),
|
|
||||||
milvus_storage::Options{schema});
|
|
||||||
EXPECT_TRUE(opt_space.has_value());
|
|
||||||
auto space = std::move(opt_space.value());
|
|
||||||
const auto data = PrepareRawFieldData<int64_t>(opt_field_data_range);
|
|
||||||
arrow::Int64Builder pk_builder;
|
|
||||||
arrow::Int64Builder ts_builder;
|
|
||||||
arrow::NumericBuilder<arrow::Int64Type> scalar_builder;
|
|
||||||
arrow::FixedSizeBinaryBuilder vec_builder(arrow::fixed_size_binary(1));
|
|
||||||
const uint8_t kByteZero = 0;
|
|
||||||
for (size_t i = 0; i < kEntityCnt; ++i) {
|
|
||||||
EXPECT_TRUE(pk_builder.Append(i).ok());
|
|
||||||
EXPECT_TRUE(ts_builder.Append(i).ok());
|
|
||||||
EXPECT_TRUE(vec_builder.Append(&kByteZero).ok());
|
|
||||||
}
|
|
||||||
for (size_t i = 0; i < kEntityCnt; ++i) {
|
|
||||||
EXPECT_TRUE(scalar_builder.Append(data[i]).ok());
|
|
||||||
}
|
|
||||||
std::shared_ptr<arrow::Array> pk_array;
|
|
||||||
EXPECT_TRUE(pk_builder.Finish(&pk_array).ok());
|
|
||||||
std::shared_ptr<arrow::Array> ts_array;
|
|
||||||
EXPECT_TRUE(ts_builder.Finish(&ts_array).ok());
|
|
||||||
std::shared_ptr<arrow::Array> scalar_array;
|
|
||||||
EXPECT_TRUE(scalar_builder.Finish(&scalar_array).ok());
|
|
||||||
std::shared_ptr<arrow::Array> vec_array;
|
|
||||||
EXPECT_TRUE(vec_builder.Finish(&vec_array).ok());
|
|
||||||
auto batch =
|
|
||||||
arrow::RecordBatch::Make(arrow_schema,
|
|
||||||
kEntityCnt,
|
|
||||||
{pk_array, ts_array, scalar_array, vec_array});
|
|
||||||
milvus_storage::WriteOption write_opt = {kEntityCnt};
|
|
||||||
space->Write(*arrow::RecordBatchReader::Make({batch}, arrow_schema)
|
|
||||||
.ValueOrDie()
|
|
||||||
.get(),
|
|
||||||
write_opt);
|
|
||||||
return {path, std::move(space)};
|
|
||||||
}
|
|
||||||
|
|
||||||
template <DataType DT>
|
template <DataType DT>
|
||||||
auto
|
auto
|
||||||
PrepareOptionalField(const std::shared_ptr<DiskFileManagerImpl>& file_manager,
|
PrepareOptionalField(const std::shared_ptr<DiskFileManagerImpl>& file_manager,
|
||||||
@ -400,47 +341,24 @@ CheckOptFieldCorrectness(
|
|||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskFieldEmpty) {
|
|
||||||
auto file_manager = CreateFileManager(cm_);
|
|
||||||
{
|
|
||||||
const auto& [insert_file_space_path, space] =
|
|
||||||
PrepareInsertDataSpace(kOptFieldDataRange);
|
|
||||||
OptFieldT opt_fields;
|
|
||||||
EXPECT_TRUE(file_manager->CacheOptFieldToDisk(opt_fields).empty());
|
|
||||||
EXPECT_TRUE(
|
|
||||||
file_manager->CacheOptFieldToDisk(space, opt_fields).empty());
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
auto opt_fileds =
|
|
||||||
PrepareOptionalField<DataType::INT64>(file_manager, "");
|
|
||||||
auto res = file_manager->CacheOptFieldToDisk(nullptr, opt_fileds);
|
|
||||||
EXPECT_TRUE(res.empty());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOptFieldMoreThanOne) {
|
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOptFieldMoreThanOne) {
|
||||||
auto file_manager = CreateFileManager(cm_);
|
auto file_manager = CreateFileManager(cm_);
|
||||||
const auto insert_file_path =
|
const auto insert_file_path =
|
||||||
PrepareInsertData<DataType::INT64, int64_t>(kOptFieldDataRange);
|
PrepareInsertData<DataType::INT64, int64_t>(kOptFieldDataRange);
|
||||||
const auto& [insert_file_space_path, space] =
|
|
||||||
PrepareInsertDataSpace(kOptFieldDataRange);
|
|
||||||
OptFieldT opt_fields =
|
OptFieldT opt_fields =
|
||||||
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
|
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
|
||||||
opt_fields[kOptFieldId + 1] = {
|
opt_fields[kOptFieldId + 1] = {
|
||||||
kOptFieldName + "second", DataType::INT64, {insert_file_space_path}};
|
kOptFieldName + "second", DataType::INT64, {insert_file_path}};
|
||||||
EXPECT_THROW(file_manager->CacheOptFieldToDisk(opt_fields), SegcoreError);
|
EXPECT_THROW(file_manager->CacheOptFieldToDisk(opt_fields), SegcoreError);
|
||||||
EXPECT_THROW(file_manager->CacheOptFieldToDisk(space, opt_fields),
|
|
||||||
SegcoreError);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceCorrect) {
|
TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskSpaceCorrect) {
|
||||||
auto file_manager = CreateFileManager(cm_);
|
auto file_manager = CreateFileManager(cm_);
|
||||||
const auto& [insert_file_path, space] =
|
const auto insert_file_path =
|
||||||
PrepareInsertDataSpace(kOptFieldDataRange);
|
PrepareInsertData<DataType::INT64, int64_t>(kOptFieldDataRange);
|
||||||
auto opt_fileds =
|
auto opt_fileds =
|
||||||
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
|
PrepareOptionalField<DataType::INT64>(file_manager, insert_file_path);
|
||||||
auto res = file_manager->CacheOptFieldToDisk(space, opt_fileds);
|
auto res = file_manager->CacheOptFieldToDisk(opt_fileds);
|
||||||
ASSERT_FALSE(res.empty());
|
ASSERT_FALSE(res.empty());
|
||||||
CheckOptFieldCorrectness(res);
|
CheckOptFieldCorrectness(res);
|
||||||
}
|
}
|
||||||
@ -477,12 +395,4 @@ TEST_F(DiskAnnFileManagerTest, CacheOptFieldToDiskOnlyOneCategory) {
|
|||||||
auto res = file_manager->CacheOptFieldToDisk(opt_fileds);
|
auto res = file_manager->CacheOptFieldToDisk(opt_fileds);
|
||||||
ASSERT_TRUE(res.empty());
|
ASSERT_TRUE(res.empty());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
{
|
|
||||||
const auto& [insert_file_path, space] = PrepareInsertDataSpace(1);
|
|
||||||
auto opt_fileds = PrepareOptionalField<DataType::INT64>(
|
|
||||||
file_manager, insert_file_path);
|
|
||||||
auto res = file_manager->CacheOptFieldToDisk(space, opt_fileds);
|
|
||||||
ASSERT_TRUE(res.empty());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@ -32,7 +32,6 @@
|
|||||||
#include "index/IndexFactory.h"
|
#include "index/IndexFactory.h"
|
||||||
#include "common/QueryResult.h"
|
#include "common/QueryResult.h"
|
||||||
#include "segcore/Types.h"
|
#include "segcore/Types.h"
|
||||||
#include "storage/options.h"
|
|
||||||
#include "test_utils/indexbuilder_test_utils.h"
|
#include "test_utils/indexbuilder_test_utils.h"
|
||||||
#include "test_utils/storage_test_utils.h"
|
#include "test_utils/storage_test_utils.h"
|
||||||
#include "test_utils/DataGen.h"
|
#include "test_utils/DataGen.h"
|
||||||
@ -916,261 +915,4 @@ TEST(Indexing, SearchDiskAnnWithBFloat16) {
|
|||||||
SearchResult result;
|
SearchResult result;
|
||||||
EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result));
|
EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//class IndexTestV2
|
|
||||||
// : public ::testing::TestWithParam<std::tuple<Param, int64_t, bool>> {
|
|
||||||
// protected:
|
|
||||||
// std::shared_ptr<arrow::Schema>
|
|
||||||
// TestSchema(int vec_size) {
|
|
||||||
// arrow::FieldVector fields;
|
|
||||||
// fields.push_back(arrow::field("pk", arrow::int64()));
|
|
||||||
// fields.push_back(arrow::field("ts", arrow::int64()));
|
|
||||||
// fields.push_back(
|
|
||||||
// arrow::field("vec", arrow::fixed_size_binary(vec_size)));
|
|
||||||
// return std::make_shared<arrow::Schema>(fields);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// std::shared_ptr<arrow::RecordBatchReader>
|
|
||||||
// TestRecords(int vec_size, GeneratedData& dataset) {
|
|
||||||
// arrow::Int64Builder pk_builder;
|
|
||||||
// arrow::Int64Builder ts_builder;
|
|
||||||
// arrow::FixedSizeBinaryBuilder vec_builder(
|
|
||||||
// arrow::fixed_size_binary(vec_size));
|
|
||||||
// if (!is_binary) {
|
|
||||||
// xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
|
||||||
// auto data = reinterpret_cast<char*>(xb_data.data());
|
|
||||||
// for (auto i = 0; i < NB; ++i) {
|
|
||||||
// EXPECT_TRUE(pk_builder.Append(i).ok());
|
|
||||||
// EXPECT_TRUE(ts_builder.Append(i).ok());
|
|
||||||
// EXPECT_TRUE(vec_builder.Append(data + i * vec_size).ok());
|
|
||||||
// }
|
|
||||||
// } else {
|
|
||||||
// xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
|
||||||
// for (auto i = 0; i < NB; ++i) {
|
|
||||||
// EXPECT_TRUE(pk_builder.Append(i).ok());
|
|
||||||
// EXPECT_TRUE(ts_builder.Append(i).ok());
|
|
||||||
// EXPECT_TRUE(
|
|
||||||
// vec_builder.Append(xb_bin_data.data() + i * vec_size).ok());
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// std::shared_ptr<arrow::Array> pk_array;
|
|
||||||
// EXPECT_TRUE(pk_builder.Finish(&pk_array).ok());
|
|
||||||
// std::shared_ptr<arrow::Array> ts_array;
|
|
||||||
// EXPECT_TRUE(ts_builder.Finish(&ts_array).ok());
|
|
||||||
// std::shared_ptr<arrow::Array> vec_array;
|
|
||||||
// EXPECT_TRUE(vec_builder.Finish(&vec_array).ok());
|
|
||||||
// auto schema = TestSchema(vec_size);
|
|
||||||
// auto rec_batch = arrow::RecordBatch::Make(
|
|
||||||
// schema, NB, {pk_array, ts_array, vec_array});
|
|
||||||
// auto reader =
|
|
||||||
// arrow::RecordBatchReader::Make({rec_batch}, schema).ValueOrDie();
|
|
||||||
// return reader;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// std::shared_ptr<milvus_storage::Space>
|
|
||||||
// TestSpace(int vec_size, GeneratedData& dataset) {
|
|
||||||
// auto arrow_schema = TestSchema(vec_size);
|
|
||||||
// auto schema_options = std::make_shared<milvus_storage::SchemaOptions>();
|
|
||||||
// schema_options->primary_column = "pk";
|
|
||||||
// schema_options->version_column = "ts";
|
|
||||||
// schema_options->vector_column = "vec";
|
|
||||||
// auto schema = std::make_shared<milvus_storage::Schema>(arrow_schema,
|
|
||||||
// schema_options);
|
|
||||||
// EXPECT_TRUE(schema->Validate().ok());
|
|
||||||
//
|
|
||||||
// auto space_res = milvus_storage::Space::Open(
|
|
||||||
// "file://" + boost::filesystem::canonical(temp_path).string(),
|
|
||||||
// milvus_storage::Options{schema});
|
|
||||||
// EXPECT_TRUE(space_res.has_value());
|
|
||||||
//
|
|
||||||
// auto space = std::move(space_res.value());
|
|
||||||
// auto rec = TestRecords(vec_size, dataset);
|
|
||||||
// auto write_opt = milvus_storage::WriteOption{NB};
|
|
||||||
// space->Write(rec.get(), &write_opt);
|
|
||||||
// return std::move(space);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// void
|
|
||||||
// SetUp() override {
|
|
||||||
// temp_path = boost::filesystem::temp_directory_path() /
|
|
||||||
// boost::filesystem::unique_path();
|
|
||||||
// boost::filesystem::create_directory(temp_path);
|
|
||||||
// storage_config_ = get_default_local_storage_config();
|
|
||||||
//
|
|
||||||
// auto param = GetParam();
|
|
||||||
// index_type = std::get<0>(param).first;
|
|
||||||
// metric_type = std::get<0>(param).second;
|
|
||||||
// file_slice_size = std::get<1>(param);
|
|
||||||
// enable_mmap = index_type != knowhere::IndexEnum::INDEX_DISKANN &&
|
|
||||||
// std::get<2>(param);
|
|
||||||
// if (enable_mmap) {
|
|
||||||
// mmap_file_path = boost::filesystem::temp_directory_path() /
|
|
||||||
// boost::filesystem::unique_path();
|
|
||||||
// }
|
|
||||||
// NB = 3000;
|
|
||||||
//
|
|
||||||
// // try to reduce the test time,
|
|
||||||
// // but the large dataset is needed for the case below.
|
|
||||||
// auto test_name = std::string(
|
|
||||||
// testing::UnitTest::GetInstance()->current_test_info()->name());
|
|
||||||
// if (test_name == "Mmap" &&
|
|
||||||
// index_type == knowhere::IndexEnum::INDEX_HNSW) {
|
|
||||||
// NB = 270000;
|
|
||||||
// }
|
|
||||||
// build_conf = generate_build_conf(index_type, metric_type);
|
|
||||||
// load_conf = generate_load_conf(index_type, metric_type, NB);
|
|
||||||
// search_conf = generate_search_conf(index_type, metric_type);
|
|
||||||
// range_search_conf = generate_range_search_conf(index_type, metric_type);
|
|
||||||
//
|
|
||||||
// std::map<knowhere::MetricType, bool> is_binary_map = {
|
|
||||||
// {knowhere::IndexEnum::INDEX_FAISS_IDMAP, false},
|
|
||||||
// {knowhere::IndexEnum::INDEX_FAISS_IVFPQ, false},
|
|
||||||
// {knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, false},
|
|
||||||
// {knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, false},
|
|
||||||
// {knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, true},
|
|
||||||
// {knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, true},
|
|
||||||
// {knowhere::IndexEnum::INDEX_HNSW, false},
|
|
||||||
// {knowhere::IndexEnum::INDEX_DISKANN, false},
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// is_binary = is_binary_map[index_type];
|
|
||||||
// int vec_size;
|
|
||||||
// if (is_binary) {
|
|
||||||
// vec_size = DIM / 8;
|
|
||||||
// vec_field_data_type = milvus::DataType::VECTOR_BINARY;
|
|
||||||
// } else {
|
|
||||||
// vec_size = DIM * 4;
|
|
||||||
// vec_field_data_type = milvus::DataType::VECTOR_FLOAT;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// auto dataset = GenDataset(NB, metric_type, is_binary);
|
|
||||||
// space = TestSpace(vec_size, dataset);
|
|
||||||
//
|
|
||||||
// if (!is_binary) {
|
|
||||||
// xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
|
||||||
// xq_dataset = knowhere::GenDataSet(
|
|
||||||
// NQ, DIM, xb_data.data() + DIM * query_offset);
|
|
||||||
// } else {
|
|
||||||
// xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
|
||||||
// xq_dataset = knowhere::GenDataSet(
|
|
||||||
// NQ, DIM, xb_bin_data.data() + DIM * query_offset);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// void
|
|
||||||
// TearDown() override {
|
|
||||||
// boost::filesystem::remove_all(temp_path);
|
|
||||||
// if (enable_mmap) {
|
|
||||||
// boost::filesystem::remove_all(mmap_file_path);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// protected:
|
|
||||||
// std::string index_type, metric_type;
|
|
||||||
// bool is_binary;
|
|
||||||
// milvus::Config build_conf;
|
|
||||||
// milvus::Config load_conf;
|
|
||||||
// milvus::Config search_conf;
|
|
||||||
// milvus::Config range_search_conf;
|
|
||||||
// milvus::DataType vec_field_data_type;
|
|
||||||
// knowhere::DataSetPtr xb_dataset;
|
|
||||||
// FixedVector<float> xb_data;
|
|
||||||
// FixedVector<uint8_t> xb_bin_data;
|
|
||||||
// knowhere::DataSetPtr xq_dataset;
|
|
||||||
// int64_t query_offset = 100;
|
|
||||||
// int64_t NB = 3000;
|
|
||||||
// StorageConfig storage_config_;
|
|
||||||
//
|
|
||||||
// boost::filesystem::path temp_path;
|
|
||||||
// std::shared_ptr<milvus_storage::Space> space;
|
|
||||||
// int64_t file_slice_size = DEFAULT_INDEX_FILE_SLICE_SIZE;
|
|
||||||
// bool enable_mmap;
|
|
||||||
// boost::filesystem::path mmap_file_path;
|
|
||||||
//};
|
|
||||||
//
|
|
||||||
//INSTANTIATE_TEST_SUITE_P(
|
|
||||||
// IndexTypeParameters,
|
|
||||||
// IndexTestV2,
|
|
||||||
// testing::Combine(
|
|
||||||
// ::testing::Values(
|
|
||||||
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP,
|
|
||||||
// knowhere::metric::L2),
|
|
||||||
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ,
|
|
||||||
// knowhere::metric::L2),
|
|
||||||
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT,
|
|
||||||
// knowhere::metric::L2),
|
|
||||||
// std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
|
|
||||||
// knowhere::metric::L2),
|
|
||||||
// std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
|
||||||
// knowhere::metric::JACCARD),
|
|
||||||
// std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP,
|
|
||||||
// knowhere::metric::JACCARD),
|
|
||||||
//#ifdef BUILD_DISK_ANN
|
|
||||||
// std::pair(knowhere::IndexEnum::INDEX_DISKANN, knowhere::metric::L2),
|
|
||||||
//#endif
|
|
||||||
// std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::metric::L2)),
|
|
||||||
// testing::Values(DEFAULT_INDEX_FILE_SLICE_SIZE, 5000L),
|
|
||||||
// testing::Bool()));
|
|
||||||
//
|
|
||||||
//TEST_P(IndexTestV2, BuildAndQuery) {
|
|
||||||
// FILE_SLICE_SIZE = file_slice_size;
|
|
||||||
// milvus::index::CreateIndexInfo create_index_info;
|
|
||||||
// create_index_info.index_type = index_type;
|
|
||||||
// create_index_info.metric_type = metric_type;
|
|
||||||
// create_index_info.field_type = vec_field_data_type;
|
|
||||||
// create_index_info.field_name = "vec";
|
|
||||||
// create_index_info.dim = DIM;
|
|
||||||
// create_index_info.index_engine_version =
|
|
||||||
// knowhere::Version::GetCurrentVersion().VersionNumber();
|
|
||||||
// index::IndexBasePtr index;
|
|
||||||
//
|
|
||||||
// milvus::storage::FieldDataMeta field_data_meta{1, 2, 3, 100};
|
|
||||||
// milvus::storage::IndexMeta index_meta{.segment_id = 3,
|
|
||||||
// .field_id = 100,
|
|
||||||
// .build_id = 1000,
|
|
||||||
// .index_version = 1,
|
|
||||||
// .field_name = "vec",
|
|
||||||
// .field_type = vec_field_data_type,
|
|
||||||
// .dim = DIM};
|
|
||||||
// auto chunk_manager = milvus::storage::CreateChunkManager(storage_config_);
|
|
||||||
// milvus::storage::FileManagerContext file_manager_context(
|
|
||||||
// field_data_meta, index_meta, chunk_manager, space);
|
|
||||||
// index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
|
||||||
// create_index_info, file_manager_context, space);
|
|
||||||
//
|
|
||||||
// auto build_conf = generate_build_conf(index_type, metric_type);
|
|
||||||
// index->BuildV2(build_conf);
|
|
||||||
// milvus::index::IndexBasePtr new_index;
|
|
||||||
// milvus::index::VectorIndex* vec_index = nullptr;
|
|
||||||
//
|
|
||||||
// auto binary_set = index->UploadV2();
|
|
||||||
// index.reset();
|
|
||||||
//
|
|
||||||
// new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
|
||||||
// create_index_info, file_manager_context, space);
|
|
||||||
// vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
|
||||||
//
|
|
||||||
// load_conf = generate_load_conf(index_type, metric_type, 0);
|
|
||||||
// if (enable_mmap) {
|
|
||||||
// load_conf[kMmapFilepath] = mmap_file_path.string();
|
|
||||||
// }
|
|
||||||
// ASSERT_NO_THROW(vec_index->LoadV2(load_conf));
|
|
||||||
// EXPECT_EQ(vec_index->Count(), NB);
|
|
||||||
// EXPECT_EQ(vec_index->GetDim(), DIM);
|
|
||||||
//
|
|
||||||
// milvus::SearchInfo search_info;
|
|
||||||
// search_info.topk_ = K;
|
|
||||||
// search_info.metric_type_ = metric_type;
|
|
||||||
// search_info.search_params_ = search_conf;
|
|
||||||
// auto result = vec_index->Query(xq_dataset, search_info, nullptr);
|
|
||||||
// EXPECT_EQ(result->total_nq_, NQ);
|
|
||||||
// EXPECT_EQ(result->unity_topK_, K);
|
|
||||||
// EXPECT_EQ(result->distances_.size(), NQ * K);
|
|
||||||
// EXPECT_EQ(result->seg_offsets_.size(), NQ * K);
|
|
||||||
// if (!is_binary) {
|
|
||||||
// EXPECT_EQ(result->seg_offsets_[0], query_offset);
|
|
||||||
// }
|
|
||||||
// search_info.search_params_ = range_search_conf;
|
|
||||||
// vec_index->Query(xq_dataset, search_info, nullptr);
|
|
||||||
//}
|
|
||||||
@ -301,31 +301,6 @@ TestRecords(int vec_size, GeneratedData& dataset, std::vector<T>& scalars) {
|
|||||||
return reader;
|
return reader;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
std::shared_ptr<milvus_storage::Space>
|
|
||||||
TestSpace(boost::filesystem::path& temp_path,
|
|
||||||
int vec_size,
|
|
||||||
GeneratedData& dataset,
|
|
||||||
std::vector<T>& scalars) {
|
|
||||||
auto arrow_schema = TestSchema<T>(vec_size);
|
|
||||||
milvus_storage::SchemaOptions schema_options{
|
|
||||||
.primary_column = "pk", .version_column = "ts", .vector_column = "vec"};
|
|
||||||
auto schema =
|
|
||||||
std::make_shared<milvus_storage::Schema>(arrow_schema, schema_options);
|
|
||||||
EXPECT_TRUE(schema->Validate().ok());
|
|
||||||
|
|
||||||
auto space_res = milvus_storage::Space::Open(
|
|
||||||
"file://" + boost::filesystem::canonical(temp_path).string(),
|
|
||||||
milvus_storage::Options{schema});
|
|
||||||
EXPECT_TRUE(space_res.has_value());
|
|
||||||
|
|
||||||
auto space = std::move(space_res.value());
|
|
||||||
auto rec = TestRecords<T>(vec_size, dataset, scalars);
|
|
||||||
auto write_opt = milvus_storage::WriteOption{nb};
|
|
||||||
space->Write(*rec, write_opt);
|
|
||||||
return std::move(space);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct TypedScalarIndexTestV2<int8_t>::Helper {
|
struct TypedScalarIndexTestV2<int8_t>::Helper {
|
||||||
using C = arrow::Int8Type;
|
using C = arrow::Int8Type;
|
||||||
|
|||||||
@ -349,116 +349,5 @@ TEST_F(StringIndexMarisaTest, BaseIndexCodec) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
using milvus::segcore::GeneratedData;
|
|
||||||
class StringIndexMarisaTestV2 : public StringIndexBaseTest {
|
|
||||||
std::shared_ptr<arrow::Schema>
|
|
||||||
TestSchema(int vec_size) {
|
|
||||||
arrow::FieldVector fields;
|
|
||||||
fields.push_back(arrow::field("pk", arrow::int64()));
|
|
||||||
fields.push_back(arrow::field("ts", arrow::int64()));
|
|
||||||
fields.push_back(arrow::field("scalar", arrow::utf8()));
|
|
||||||
fields.push_back(
|
|
||||||
arrow::field("vec", arrow::fixed_size_binary(vec_size)));
|
|
||||||
return std::make_shared<arrow::Schema>(fields);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::shared_ptr<arrow::RecordBatchReader>
|
|
||||||
TestRecords(int vec_size,
|
|
||||||
GeneratedData& dataset,
|
|
||||||
std::vector<std::string>& scalars) {
|
|
||||||
arrow::Int64Builder pk_builder;
|
|
||||||
arrow::Int64Builder ts_builder;
|
|
||||||
arrow::StringBuilder scalar_builder;
|
|
||||||
arrow::FixedSizeBinaryBuilder vec_builder(
|
|
||||||
arrow::fixed_size_binary(vec_size));
|
|
||||||
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
|
|
||||||
auto data = reinterpret_cast<char*>(xb_data.data());
|
|
||||||
for (auto i = 0; i < nb; ++i) {
|
|
||||||
EXPECT_TRUE(pk_builder.Append(i).ok());
|
|
||||||
EXPECT_TRUE(ts_builder.Append(i).ok());
|
|
||||||
EXPECT_TRUE(vec_builder.Append(data + i * vec_size).ok());
|
|
||||||
}
|
|
||||||
for (auto& v : scalars) {
|
|
||||||
EXPECT_TRUE(scalar_builder.Append(v).ok());
|
|
||||||
}
|
|
||||||
std::shared_ptr<arrow::Array> pk_array;
|
|
||||||
EXPECT_TRUE(pk_builder.Finish(&pk_array).ok());
|
|
||||||
std::shared_ptr<arrow::Array> ts_array;
|
|
||||||
EXPECT_TRUE(ts_builder.Finish(&ts_array).ok());
|
|
||||||
std::shared_ptr<arrow::Array> scalar_array;
|
|
||||||
EXPECT_TRUE(scalar_builder.Finish(&scalar_array).ok());
|
|
||||||
std::shared_ptr<arrow::Array> vec_array;
|
|
||||||
EXPECT_TRUE(vec_builder.Finish(&vec_array).ok());
|
|
||||||
auto schema = TestSchema(vec_size);
|
|
||||||
auto rec_batch = arrow::RecordBatch::Make(
|
|
||||||
schema, nb, {pk_array, ts_array, scalar_array, vec_array});
|
|
||||||
auto reader =
|
|
||||||
arrow::RecordBatchReader::Make({rec_batch}, schema).ValueOrDie();
|
|
||||||
return reader;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::shared_ptr<milvus_storage::Space>
|
|
||||||
TestSpace(int vec_size,
|
|
||||||
GeneratedData& dataset,
|
|
||||||
std::vector<std::string>& scalars) {
|
|
||||||
auto arrow_schema = TestSchema(vec_size);
|
|
||||||
milvus_storage::SchemaOptions schema_options{.primary_column = "pk",
|
|
||||||
.version_column = "ts",
|
|
||||||
.vector_column = "vec"};
|
|
||||||
auto schema = std::make_shared<milvus_storage::Schema>(arrow_schema,
|
|
||||||
schema_options);
|
|
||||||
EXPECT_TRUE(schema->Validate().ok());
|
|
||||||
|
|
||||||
auto space_res = milvus_storage::Space::Open(
|
|
||||||
"file://" + boost::filesystem::canonical(temp_path).string(),
|
|
||||||
milvus_storage::Options{schema});
|
|
||||||
EXPECT_TRUE(space_res.has_value());
|
|
||||||
|
|
||||||
auto space = std::move(space_res.value());
|
|
||||||
auto rec = TestRecords(vec_size, dataset, scalars);
|
|
||||||
auto write_opt = milvus_storage::WriteOption{nb};
|
|
||||||
space->Write(*rec, write_opt);
|
|
||||||
return std::move(space);
|
|
||||||
}
|
|
||||||
void
|
|
||||||
SetUp() override {
|
|
||||||
StringIndexBaseTest::SetUp();
|
|
||||||
temp_path = boost::filesystem::temp_directory_path() /
|
|
||||||
boost::filesystem::unique_path();
|
|
||||||
boost::filesystem::create_directory(temp_path);
|
|
||||||
|
|
||||||
auto vec_size = DIM * 4;
|
|
||||||
auto vec_field_data_type = milvus::DataType::VECTOR_FLOAT;
|
|
||||||
auto dataset = ::GenDataset(nb, knowhere::metric::L2, false);
|
|
||||||
|
|
||||||
space = TestSpace(vec_size, dataset, strs);
|
|
||||||
}
|
|
||||||
void
|
|
||||||
TearDown() override {
|
|
||||||
boost::filesystem::remove_all(temp_path);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
boost::filesystem::path temp_path;
|
|
||||||
std::shared_ptr<milvus_storage::Space> space;
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F(StringIndexMarisaTestV2, Base) {
|
|
||||||
auto storage_config = get_default_local_storage_config();
|
|
||||||
auto chunk_manager = milvus::storage::CreateChunkManager(storage_config);
|
|
||||||
milvus::storage::FileManagerContext file_manager_context(
|
|
||||||
{}, {.field_name = "scalar"}, chunk_manager, space);
|
|
||||||
auto index =
|
|
||||||
milvus::index::CreateStringIndexMarisa(file_manager_context, space);
|
|
||||||
index->BuildV2();
|
|
||||||
index->UploadV2();
|
|
||||||
|
|
||||||
auto new_index =
|
|
||||||
milvus::index::CreateStringIndexMarisa(file_manager_context, space);
|
|
||||||
new_index->LoadV2();
|
|
||||||
ASSERT_EQ(strs.size(), index->Count());
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace index
|
} // namespace index
|
||||||
} // namespace milvus
|
} // namespace milvus
|
||||||
|
|||||||
@ -543,10 +543,6 @@ func (s *Server) SaveBinlogPaths(ctx context.Context, req *datapb.SaveBinlogPath
|
|||||||
UpdateCheckPointOperator(req.GetSegmentID(), req.GetCheckPoints()),
|
UpdateCheckPointOperator(req.GetSegmentID(), req.GetCheckPoints()),
|
||||||
)
|
)
|
||||||
|
|
||||||
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
|
||||||
operators = append(operators, UpdateStorageVersionOperator(req.GetSegmentID(), req.GetStorageVersion()))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update segment info in memory and meta.
|
// Update segment info in memory and meta.
|
||||||
if err := s.meta.UpdateSegmentsInfo(operators...); err != nil {
|
if err := s.meta.UpdateSegmentsInfo(operators...); err != nil {
|
||||||
log.Error("save binlog and checkpoints failed", zap.Error(err))
|
log.Error("save binlog and checkpoints failed", zap.Error(err))
|
||||||
@ -882,18 +878,6 @@ func (s *Server) GetRecoveryInfoV2(ctx context.Context, req *datapb.GetRecoveryI
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
|
||||||
segmentInfos = append(segmentInfos, &datapb.SegmentInfo{
|
|
||||||
ID: segment.ID,
|
|
||||||
PartitionID: segment.PartitionID,
|
|
||||||
CollectionID: segment.CollectionID,
|
|
||||||
InsertChannel: segment.InsertChannel,
|
|
||||||
NumOfRows: segment.NumOfRows,
|
|
||||||
Level: segment.GetLevel(),
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
binlogs := segment.GetBinlogs()
|
binlogs := segment.GetBinlogs()
|
||||||
if len(binlogs) == 0 && segment.GetLevel() != datapb.SegmentLevel_L0 {
|
if len(binlogs) == 0 && segment.GetLevel() != datapb.SegmentLevel_L0 {
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -25,10 +25,8 @@ import (
|
|||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
||||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/internal/types"
|
"github.com/milvus-io/milvus/internal/types"
|
||||||
itypeutil "github.com/milvus-io/milvus/internal/util/typeutil"
|
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
"github.com/milvus-io/milvus/pkg/util/indexparams"
|
"github.com/milvus-io/milvus/pkg/util/indexparams"
|
||||||
@ -201,68 +199,27 @@ func (it *indexBuildTask) PreCheck(ctx context.Context, dependency *taskSchedule
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
it.req = &indexpb.CreateJobRequest{
|
||||||
storePath, err := itypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue(), segment.GetID())
|
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
|
||||||
if err != nil {
|
IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath),
|
||||||
log.Ctx(ctx).Warn("failed to get storage uri", zap.Error(err))
|
BuildID: it.taskID,
|
||||||
it.SetState(indexpb.JobState_JobStateInit, err.Error())
|
IndexVersion: segIndex.IndexVersion + 1,
|
||||||
return true
|
StorageConfig: storageConfig,
|
||||||
}
|
IndexParams: indexParams,
|
||||||
indexStorePath, err := itypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue()+"/index", segment.GetID())
|
TypeParams: typeParams,
|
||||||
if err != nil {
|
NumRows: segIndex.NumRows,
|
||||||
log.Ctx(ctx).Warn("failed to get storage uri", zap.Error(err))
|
CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(),
|
||||||
it.SetState(indexpb.JobState_JobStateInit, err.Error())
|
CollectionID: segment.GetCollectionID(),
|
||||||
return true
|
PartitionID: segment.GetPartitionID(),
|
||||||
}
|
SegmentID: segment.GetID(),
|
||||||
|
FieldID: fieldID,
|
||||||
it.req = &indexpb.CreateJobRequest{
|
FieldName: field.GetName(),
|
||||||
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
|
FieldType: field.GetDataType(),
|
||||||
IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath),
|
Dim: int64(dim),
|
||||||
BuildID: it.taskID,
|
DataIds: binlogIDs,
|
||||||
IndexVersion: segIndex.IndexVersion + 1,
|
OptionalScalarFields: optionalFields,
|
||||||
StorageConfig: storageConfig,
|
Field: field,
|
||||||
IndexParams: indexParams,
|
PartitionKeyIsolation: partitionKeyIsolation,
|
||||||
TypeParams: typeParams,
|
|
||||||
NumRows: segIndex.NumRows,
|
|
||||||
CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(),
|
|
||||||
CollectionID: segment.GetCollectionID(),
|
|
||||||
PartitionID: segment.GetPartitionID(),
|
|
||||||
SegmentID: segment.GetID(),
|
|
||||||
FieldID: fieldID,
|
|
||||||
FieldName: field.GetName(),
|
|
||||||
FieldType: field.GetDataType(),
|
|
||||||
StorePath: storePath,
|
|
||||||
StoreVersion: segment.GetStorageVersion(),
|
|
||||||
IndexStorePath: indexStorePath,
|
|
||||||
Dim: int64(dim),
|
|
||||||
DataIds: binlogIDs,
|
|
||||||
OptionalScalarFields: optionalFields,
|
|
||||||
Field: field,
|
|
||||||
PartitionKeyIsolation: partitionKeyIsolation,
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
it.req = &indexpb.CreateJobRequest{
|
|
||||||
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
|
|
||||||
IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath),
|
|
||||||
BuildID: it.taskID,
|
|
||||||
IndexVersion: segIndex.IndexVersion + 1,
|
|
||||||
StorageConfig: storageConfig,
|
|
||||||
IndexParams: indexParams,
|
|
||||||
TypeParams: typeParams,
|
|
||||||
NumRows: segIndex.NumRows,
|
|
||||||
CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(),
|
|
||||||
CollectionID: segment.GetCollectionID(),
|
|
||||||
PartitionID: segment.GetPartitionID(),
|
|
||||||
SegmentID: segment.GetID(),
|
|
||||||
FieldID: fieldID,
|
|
||||||
FieldName: field.GetName(),
|
|
||||||
FieldType: field.GetDataType(),
|
|
||||||
Dim: int64(dim),
|
|
||||||
DataIds: binlogIDs,
|
|
||||||
OptionalScalarFields: optionalFields,
|
|
||||||
Field: field,
|
|
||||||
PartitionKeyIsolation: partitionKeyIsolation,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Ctx(ctx).Info("index task pre check successfully", zap.Int64("taskID", it.GetTaskID()))
|
log.Ctx(ctx).Info("index task pre check successfully", zap.Int64("taskID", it.GetTaskID()))
|
||||||
|
|||||||
@ -911,15 +911,6 @@ func (s *taskSchedulerSuite) Test_scheduler() {
|
|||||||
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false")
|
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false")
|
||||||
s.scheduler(handler)
|
s.scheduler(handler)
|
||||||
})
|
})
|
||||||
|
|
||||||
s.Run("test scheduler with indexBuilderV2", func() {
|
|
||||||
paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("true")
|
|
||||||
defer paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false")
|
|
||||||
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true")
|
|
||||||
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false")
|
|
||||||
|
|
||||||
s.scheduler(handler)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
|
func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
|
||||||
@ -1289,26 +1280,11 @@ func (s *taskSchedulerSuite) Test_indexTaskFailCase() {
|
|||||||
|
|
||||||
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("True")
|
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("True")
|
||||||
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("False")
|
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("False")
|
||||||
err := Params.Save("common.storage.scheme", "fake")
|
|
||||||
defer Params.Reset("common.storage.scheme")
|
|
||||||
Params.CommonCfg.EnableStorageV2.SwapTempValue("True")
|
|
||||||
defer Params.CommonCfg.EnableStorageV2.SwapTempValue("False")
|
|
||||||
scheduler.Start()
|
scheduler.Start()
|
||||||
|
|
||||||
// get collection info failed --> init
|
// get collection info failed --> init
|
||||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
|
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
|
||||||
|
|
||||||
// partition key field is nil, get collection info failed --> init
|
|
||||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{
|
|
||||||
ID: collID,
|
|
||||||
Schema: &schemapb.CollectionSchema{
|
|
||||||
Fields: []*schemapb.FieldSchema{
|
|
||||||
{FieldID: s.fieldID, Name: "vec", TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "10"}}},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}, nil).Once()
|
|
||||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
|
|
||||||
|
|
||||||
// get collection info success, get dim failed --> init
|
// get collection info success, get dim failed --> init
|
||||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{
|
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{
|
||||||
ID: collID,
|
ID: collID,
|
||||||
@ -1318,38 +1294,11 @@ func (s *taskSchedulerSuite) Test_indexTaskFailCase() {
|
|||||||
{FieldID: s.fieldID, Name: "vec"},
|
{FieldID: s.fieldID, Name: "vec"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}, nil).Twice()
|
}, nil).Once()
|
||||||
|
|
||||||
// peek client success, update version success, get collection info success, get dim success, get storage uri failed --> init
|
|
||||||
s.NoError(err)
|
|
||||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, i int64) (*collectionInfo, error) {
|
|
||||||
return &collectionInfo{
|
|
||||||
ID: collID,
|
|
||||||
Schema: &schemapb.CollectionSchema{
|
|
||||||
Fields: []*schemapb.FieldSchema{
|
|
||||||
{FieldID: 100, Name: "pk", IsPrimaryKey: true, IsPartitionKey: true, DataType: schemapb.DataType_Int64},
|
|
||||||
{FieldID: s.fieldID, Name: "vec", TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "10"}}},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}, nil
|
|
||||||
}).Twice()
|
|
||||||
s.NoError(err)
|
|
||||||
|
|
||||||
// assign failed --> retry
|
// assign failed --> retry
|
||||||
workerManager.EXPECT().PickClient().Return(s.nodeID, in).Once()
|
workerManager.EXPECT().PickClient().Return(s.nodeID, in).Once()
|
||||||
catalog.EXPECT().AlterSegmentIndexes(mock.Anything, mock.Anything).Return(nil).Once()
|
catalog.EXPECT().AlterSegmentIndexes(mock.Anything, mock.Anything).Return(nil).Once()
|
||||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, i int64) (*collectionInfo, error) {
|
|
||||||
Params.Reset("common.storage.scheme")
|
|
||||||
return &collectionInfo{
|
|
||||||
ID: collID,
|
|
||||||
Schema: &schemapb.CollectionSchema{
|
|
||||||
Fields: []*schemapb.FieldSchema{
|
|
||||||
{FieldID: 100, Name: "pk", IsPrimaryKey: true, IsPartitionKey: true, DataType: schemapb.DataType_Int64},
|
|
||||||
{FieldID: s.fieldID, Name: "vec", TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "10"}}},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}, nil
|
|
||||||
}).Once()
|
|
||||||
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
|
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
|
||||||
|
|
||||||
// retry --> init
|
// retry --> init
|
||||||
|
|||||||
@ -30,12 +30,12 @@ import (
|
|||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus-storage/go/common/log"
|
|
||||||
"github.com/milvus-io/milvus/internal/datanode/allocator"
|
"github.com/milvus-io/milvus/internal/datanode/allocator"
|
||||||
"github.com/milvus-io/milvus/internal/datanode/io"
|
"github.com/milvus-io/milvus/internal/datanode/io"
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||||
"github.com/milvus-io/milvus/pkg/util/tsoutil"
|
"github.com/milvus-io/milvus/pkg/util/tsoutil"
|
||||||
|
|||||||
@ -24,8 +24,6 @@ import (
|
|||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
|
||||||
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
@ -113,54 +111,3 @@ func LoadStats(ctx context.Context, chunkManager storage.ChunkManager, schema *s
|
|||||||
log.Info("Successfully load pk stats", zap.Any("time", time.Since(startTs)), zap.Uint("size", size))
|
log.Info("Successfully load pk stats", zap.Any("time", time.Since(startTs)), zap.Uint("size", size))
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func LoadStatsV2(storageCache *metacache.StorageV2Cache, segment *datapb.SegmentInfo, schema *schemapb.CollectionSchema) ([]*storage.PkStatistics, error) {
|
|
||||||
space, err := storageCache.GetOrCreateSpace(segment.ID, syncmgr.SpaceCreatorFunc(segment.ID, schema, storageCache.ArrowSchema()))
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
getResult := func(stats []*storage.PrimaryKeyStats) []*storage.PkStatistics {
|
|
||||||
result := make([]*storage.PkStatistics, 0, len(stats))
|
|
||||||
for _, stat := range stats {
|
|
||||||
pkStat := &storage.PkStatistics{
|
|
||||||
PkFilter: stat.BF,
|
|
||||||
MinPK: stat.MinPk,
|
|
||||||
MaxPK: stat.MaxPk,
|
|
||||||
}
|
|
||||||
result = append(result, pkStat)
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
blobs := space.StatisticsBlobs()
|
|
||||||
deserBlobs := make([]*storage.Blob, 0)
|
|
||||||
for _, b := range blobs {
|
|
||||||
if b.Name == storage.CompoundStatsType.LogIdx() {
|
|
||||||
blobData := make([]byte, b.Size)
|
|
||||||
_, err = space.ReadBlob(b.Name, blobData)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
stats, err := storage.DeserializeStatsList(&storage.Blob{Value: blobData})
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return getResult(stats), nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, b := range blobs {
|
|
||||||
blobData := make([]byte, b.Size)
|
|
||||||
_, err = space.ReadBlob(b.Name, blobData)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
deserBlobs = append(deserBlobs, &storage.Blob{Value: blobData})
|
|
||||||
}
|
|
||||||
stats, err := storage.DeserializeStats(deserBlobs)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return getResult(stats), nil
|
|
||||||
}
|
|
||||||
|
|||||||
@ -32,7 +32,6 @@ import (
|
|||||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
||||||
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
@ -52,10 +51,6 @@ func NewSyncTask(ctx context.Context,
|
|||||||
insertData *storage.InsertData,
|
insertData *storage.InsertData,
|
||||||
deleteData *storage.DeleteData,
|
deleteData *storage.DeleteData,
|
||||||
) (syncmgr.Task, error) {
|
) (syncmgr.Task, error) {
|
||||||
if params.Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
|
||||||
return nil, merr.WrapErrImportFailed("storage v2 is not supported") // TODO: dyh, resolve storage v2
|
|
||||||
}
|
|
||||||
|
|
||||||
metaCache := metaCaches[vchannel]
|
metaCache := metaCaches[vchannel]
|
||||||
if _, ok := metaCache.GetSegmentByID(segmentID); !ok {
|
if _, ok := metaCache.GetSegmentByID(segmentID); !ok {
|
||||||
metaCache.AddSegment(&datapb.SegmentInfo{
|
metaCache.AddSegment(&datapb.SegmentInfo{
|
||||||
|
|||||||
@ -1,70 +0,0 @@
|
|||||||
// Licensed to the LF AI & Data foundation under one
|
|
||||||
// or more contributor license agreements. See the NOTICE file
|
|
||||||
// distributed with this work for additional information
|
|
||||||
// regarding copyright ownership. The ASF licenses this file
|
|
||||||
// to you under the Apache License, Version 2.0 (the
|
|
||||||
// "License"); you may not use this file except in compliance
|
|
||||||
// with the License. You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
package metacache
|
|
||||||
|
|
||||||
import (
|
|
||||||
"sync"
|
|
||||||
|
|
||||||
"github.com/apache/arrow/go/v12/arrow"
|
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
||||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
|
||||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
|
||||||
)
|
|
||||||
|
|
||||||
type StorageV2Cache struct {
|
|
||||||
arrowSchema *arrow.Schema
|
|
||||||
spaceMu sync.Mutex
|
|
||||||
spaces map[int64]*milvus_storage.Space
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StorageV2Cache) ArrowSchema() *arrow.Schema {
|
|
||||||
return s.arrowSchema
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StorageV2Cache) GetOrCreateSpace(segmentID int64, creator func() (*milvus_storage.Space, error)) (*milvus_storage.Space, error) {
|
|
||||||
s.spaceMu.Lock()
|
|
||||||
defer s.spaceMu.Unlock()
|
|
||||||
space, ok := s.spaces[segmentID]
|
|
||||||
if ok {
|
|
||||||
return space, nil
|
|
||||||
}
|
|
||||||
space, err := creator()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
s.spaces[segmentID] = space
|
|
||||||
return space, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// only for unit test
|
|
||||||
func (s *StorageV2Cache) SetSpace(segmentID int64, space *milvus_storage.Space) {
|
|
||||||
s.spaceMu.Lock()
|
|
||||||
defer s.spaceMu.Unlock()
|
|
||||||
s.spaces[segmentID] = space
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewStorageV2Cache(schema *schemapb.CollectionSchema) (*StorageV2Cache, error) {
|
|
||||||
arrowSchema, err := typeutil.ConvertToArrowSchema(schema.Fields)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return &StorageV2Cache{
|
|
||||||
arrowSchema: arrowSchema,
|
|
||||||
spaces: make(map[int64]*milvus_storage.Space),
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
@ -30,7 +30,6 @@ import (
|
|||||||
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
||||||
"github.com/milvus-io/milvus/internal/flushcommon/writebuffer"
|
"github.com/milvus-io/milvus/internal/flushcommon/writebuffer"
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/internal/util/flowgraph"
|
"github.com/milvus-io/milvus/internal/util/flowgraph"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
@ -129,12 +128,12 @@ func (dsService *DataSyncService) GetMetaCache() metacache.MetaCache {
|
|||||||
return dsService.metacache
|
return dsService.metacache
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMetaCacheWithTickler(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, tickler *util.Tickler, unflushed, flushed []*datapb.SegmentInfo, storageV2Cache *metacache.StorageV2Cache) (metacache.MetaCache, error) {
|
func getMetaCacheWithTickler(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, tickler *util.Tickler, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) {
|
||||||
tickler.SetTotal(int32(len(unflushed) + len(flushed)))
|
tickler.SetTotal(int32(len(unflushed) + len(flushed)))
|
||||||
return initMetaCache(initCtx, storageV2Cache, params.ChunkManager, info, tickler, unflushed, flushed)
|
return initMetaCache(initCtx, params.ChunkManager, info, tickler, unflushed, flushed)
|
||||||
}
|
}
|
||||||
|
|
||||||
func initMetaCache(initCtx context.Context, storageV2Cache *metacache.StorageV2Cache, chunkManager storage.ChunkManager, info *datapb.ChannelWatchInfo, tickler interface{ Inc() }, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) {
|
func initMetaCache(initCtx context.Context, chunkManager storage.ChunkManager, info *datapb.ChannelWatchInfo, tickler interface{ Inc() }, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) {
|
||||||
// tickler will update addSegment progress to watchInfo
|
// tickler will update addSegment progress to watchInfo
|
||||||
futures := make([]*conc.Future[any], 0, len(unflushed)+len(flushed))
|
futures := make([]*conc.Future[any], 0, len(unflushed)+len(flushed))
|
||||||
segmentPks := typeutil.NewConcurrentMap[int64, []*storage.PkStatistics]()
|
segmentPks := typeutil.NewConcurrentMap[int64, []*storage.PkStatistics]()
|
||||||
@ -152,11 +151,7 @@ func initMetaCache(initCtx context.Context, storageV2Cache *metacache.StorageV2C
|
|||||||
future := io.GetOrCreateStatsPool().Submit(func() (any, error) {
|
future := io.GetOrCreateStatsPool().Submit(func() (any, error) {
|
||||||
var stats []*storage.PkStatistics
|
var stats []*storage.PkStatistics
|
||||||
var err error
|
var err error
|
||||||
if params.Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
stats, err = compaction.LoadStats(initCtx, chunkManager, info.GetSchema(), segment.GetID(), segment.GetStatslogs())
|
||||||
stats, err = compaction.LoadStatsV2(storageV2Cache, segment, info.GetSchema())
|
|
||||||
} else {
|
|
||||||
stats, err = compaction.LoadStats(initCtx, chunkManager, info.GetSchema(), segment.GetID(), segment.GetStatslogs())
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -190,7 +185,7 @@ func initMetaCache(initCtx context.Context, storageV2Cache *metacache.StorageV2C
|
|||||||
return metacache, nil
|
return metacache, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getServiceWithChannel(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, unflushed, flushed []*datapb.SegmentInfo) (*DataSyncService, error) {
|
func getServiceWithChannel(initCtx context.Context, params *util.PipelineParams, info *datapb.ChannelWatchInfo, metacache metacache.MetaCache, unflushed, flushed []*datapb.SegmentInfo) (*DataSyncService, error) {
|
||||||
var (
|
var (
|
||||||
channelName = info.GetVchan().GetChannelName()
|
channelName = info.GetVchan().GetChannelName()
|
||||||
collectionID = info.GetVchan().GetCollectionID()
|
collectionID = info.GetVchan().GetCollectionID()
|
||||||
@ -204,7 +199,7 @@ func getServiceWithChannel(initCtx context.Context, params *util.PipelineParams,
|
|||||||
serverID: params.Session.ServerID,
|
serverID: params.Session.ServerID,
|
||||||
}
|
}
|
||||||
|
|
||||||
err := params.WriteBufferManager.Register(channelName, metacache, storageV2Cache,
|
err := params.WriteBufferManager.Register(channelName, metacache,
|
||||||
writebuffer.WithMetaWriter(syncmgr.BrokerMetaWriter(params.Broker, config.serverID)),
|
writebuffer.WithMetaWriter(syncmgr.BrokerMetaWriter(params.Broker, config.serverID)),
|
||||||
writebuffer.WithIDAllocator(params.Allocator))
|
writebuffer.WithIDAllocator(params.Allocator))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -287,21 +282,13 @@ func NewDataSyncService(initCtx context.Context, pipelineParams *util.PipelinePa
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var storageCache *metacache.StorageV2Cache
|
|
||||||
if params.Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
|
||||||
storageCache, err = metacache.NewStorageV2Cache(info.Schema)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// init metaCache meta
|
// init metaCache meta
|
||||||
metaCache, err := getMetaCacheWithTickler(initCtx, pipelineParams, info, tickler, unflushedSegmentInfos, flushedSegmentInfos, storageCache)
|
metaCache, err := getMetaCacheWithTickler(initCtx, pipelineParams, info, tickler, unflushedSegmentInfos, flushedSegmentInfos)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return getServiceWithChannel(initCtx, pipelineParams, info, metaCache, storageCache, unflushedSegmentInfos, flushedSegmentInfos)
|
return getServiceWithChannel(initCtx, pipelineParams, info, metaCache, unflushedSegmentInfos, flushedSegmentInfos)
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewDataSyncServiceWithMetaCache(metaCache metacache.MetaCache) *DataSyncService {
|
func NewDataSyncServiceWithMetaCache(metaCache metacache.MetaCache) *DataSyncService {
|
||||||
|
|||||||
@ -289,7 +289,7 @@ func TestGetChannelWithTickler(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
metaCache, err := getMetaCacheWithTickler(context.TODO(), pipelineParams, info, util.NewTickler(), unflushed, flushed, nil)
|
metaCache, err := getMetaCacheWithTickler(context.TODO(), pipelineParams, info, util.NewTickler(), unflushed, flushed)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.NotNil(t, metaCache)
|
assert.NotNil(t, metaCache)
|
||||||
assert.Equal(t, int64(1), metaCache.Collection())
|
assert.Equal(t, int64(1), metaCache.Collection())
|
||||||
|
|||||||
@ -20,7 +20,6 @@ import (
|
|||||||
// MetaWriter is the interface for SyncManager to write segment sync meta.
|
// MetaWriter is the interface for SyncManager to write segment sync meta.
|
||||||
type MetaWriter interface {
|
type MetaWriter interface {
|
||||||
UpdateSync(context.Context, *SyncTask) error
|
UpdateSync(context.Context, *SyncTask) error
|
||||||
UpdateSyncV2(*SyncTaskV2) error
|
|
||||||
DropChannel(context.Context, string) error
|
DropChannel(context.Context, string) error
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -138,82 +137,6 @@ func (b *brokerMetaWriter) UpdateSync(ctx context.Context, pack *SyncTask) error
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *brokerMetaWriter) UpdateSyncV2(pack *SyncTaskV2) error {
|
|
||||||
checkPoints := []*datapb.CheckPoint{}
|
|
||||||
|
|
||||||
// only current segment checkpoint info,
|
|
||||||
segment, ok := pack.metacache.GetSegmentByID(pack.segmentID)
|
|
||||||
if !ok {
|
|
||||||
return merr.WrapErrSegmentNotFound(pack.segmentID)
|
|
||||||
}
|
|
||||||
checkPoints = append(checkPoints, &datapb.CheckPoint{
|
|
||||||
SegmentID: pack.segmentID,
|
|
||||||
NumOfRows: segment.FlushedRows() + pack.batchSize,
|
|
||||||
Position: pack.checkpoint,
|
|
||||||
})
|
|
||||||
|
|
||||||
startPos := lo.Map(pack.metacache.GetSegmentsBy(metacache.WithSegmentState(commonpb.SegmentState_Growing, commonpb.SegmentState_Flushing),
|
|
||||||
metacache.WithStartPosNotRecorded()), func(info *metacache.SegmentInfo, _ int) *datapb.SegmentStartPosition {
|
|
||||||
return &datapb.SegmentStartPosition{
|
|
||||||
SegmentID: info.SegmentID(),
|
|
||||||
StartPosition: info.StartPosition(),
|
|
||||||
}
|
|
||||||
})
|
|
||||||
log.Info("SaveBinlogPath",
|
|
||||||
zap.Int64("SegmentID", pack.segmentID),
|
|
||||||
zap.Int64("CollectionID", pack.collectionID),
|
|
||||||
zap.Any("startPos", startPos),
|
|
||||||
zap.Any("checkPoints", checkPoints),
|
|
||||||
zap.String("vChannelName", pack.channelName),
|
|
||||||
)
|
|
||||||
|
|
||||||
req := &datapb.SaveBinlogPathsRequest{
|
|
||||||
Base: commonpbutil.NewMsgBase(
|
|
||||||
commonpbutil.WithSourceID(b.serverID),
|
|
||||||
),
|
|
||||||
SegmentID: pack.segmentID,
|
|
||||||
CollectionID: pack.collectionID,
|
|
||||||
|
|
||||||
CheckPoints: checkPoints,
|
|
||||||
StorageVersion: pack.storageVersion,
|
|
||||||
|
|
||||||
StartPositions: startPos,
|
|
||||||
Flushed: pack.isFlush,
|
|
||||||
Dropped: pack.isDrop,
|
|
||||||
Channel: pack.channelName,
|
|
||||||
}
|
|
||||||
err := retry.Do(context.Background(), func() error {
|
|
||||||
err := b.broker.SaveBinlogPaths(context.Background(), req)
|
|
||||||
// Segment not found during stale segment flush. Segment might get compacted already.
|
|
||||||
// Stop retry and still proceed to the end, ignoring this error.
|
|
||||||
if !pack.isFlush && errors.Is(err, merr.ErrSegmentNotFound) {
|
|
||||||
log.Warn("stale segment not found, could be compacted",
|
|
||||||
zap.Int64("segmentID", pack.segmentID))
|
|
||||||
log.Warn("failed to SaveBinlogPaths",
|
|
||||||
zap.Int64("segmentID", pack.segmentID),
|
|
||||||
zap.Error(err))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
// meta error, datanode handles a virtual channel does not belong here
|
|
||||||
if errors.IsAny(err, merr.ErrSegmentNotFound, merr.ErrChannelNotFound) {
|
|
||||||
log.Warn("meta error found, skip sync and start to drop virtual channel", zap.String("channel", pack.channelName))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}, b.opts...)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("failed to SaveBinlogPaths",
|
|
||||||
zap.Int64("segmentID", pack.segmentID),
|
|
||||||
zap.Error(err))
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (b *brokerMetaWriter) DropChannel(ctx context.Context, channelName string) error {
|
func (b *brokerMetaWriter) DropChannel(ctx context.Context, channelName string) error {
|
||||||
err := retry.Handle(ctx, func() (bool, error) {
|
err := retry.Handle(ctx, func() (bool, error) {
|
||||||
status, err := b.broker.DropVirtualChannel(context.Background(), &datapb.DropVirtualChannelRequest{
|
status, err := b.broker.DropVirtualChannel(context.Background(), &datapb.DropVirtualChannelRequest{
|
||||||
|
|||||||
@ -67,34 +67,6 @@ func (s *MetaWriterSuite) TestReturnError() {
|
|||||||
s.Error(err)
|
s.Error(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *MetaWriterSuite) TestNormalSaveV2() {
|
|
||||||
s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(nil)
|
|
||||||
|
|
||||||
bfs := metacache.NewBloomFilterSet()
|
|
||||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs)
|
|
||||||
metacache.UpdateNumOfRows(1000)(seg)
|
|
||||||
s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true)
|
|
||||||
s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg})
|
|
||||||
task := NewSyncTaskV2()
|
|
||||||
task.WithMetaCache(s.metacache)
|
|
||||||
err := s.writer.UpdateSyncV2(task)
|
|
||||||
s.NoError(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *MetaWriterSuite) TestReturnErrorV2() {
|
|
||||||
s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(errors.New("mocked"))
|
|
||||||
|
|
||||||
bfs := metacache.NewBloomFilterSet()
|
|
||||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs)
|
|
||||||
metacache.UpdateNumOfRows(1000)(seg)
|
|
||||||
s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true)
|
|
||||||
s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg})
|
|
||||||
task := NewSyncTaskV2()
|
|
||||||
task.WithMetaCache(s.metacache)
|
|
||||||
err := s.writer.UpdateSyncV2(task)
|
|
||||||
s.Error(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMetaWriter(t *testing.T) {
|
func TestMetaWriter(t *testing.T) {
|
||||||
suite.Run(t, new(MetaWriterSuite))
|
suite.Run(t, new(MetaWriterSuite))
|
||||||
}
|
}
|
||||||
|
|||||||
@ -107,48 +107,6 @@ func (_c *MockMetaWriter_UpdateSync_Call) RunAndReturn(run func(context.Context,
|
|||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateSyncV2 provides a mock function with given fields: _a0
|
|
||||||
func (_m *MockMetaWriter) UpdateSyncV2(_a0 *SyncTaskV2) error {
|
|
||||||
ret := _m.Called(_a0)
|
|
||||||
|
|
||||||
var r0 error
|
|
||||||
if rf, ok := ret.Get(0).(func(*SyncTaskV2) error); ok {
|
|
||||||
r0 = rf(_a0)
|
|
||||||
} else {
|
|
||||||
r0 = ret.Error(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
return r0
|
|
||||||
}
|
|
||||||
|
|
||||||
// MockMetaWriter_UpdateSyncV2_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'UpdateSyncV2'
|
|
||||||
type MockMetaWriter_UpdateSyncV2_Call struct {
|
|
||||||
*mock.Call
|
|
||||||
}
|
|
||||||
|
|
||||||
// UpdateSyncV2 is a helper method to define mock.On call
|
|
||||||
// - _a0 *SyncTaskV2
|
|
||||||
func (_e *MockMetaWriter_Expecter) UpdateSyncV2(_a0 interface{}) *MockMetaWriter_UpdateSyncV2_Call {
|
|
||||||
return &MockMetaWriter_UpdateSyncV2_Call{Call: _e.mock.On("UpdateSyncV2", _a0)}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (_c *MockMetaWriter_UpdateSyncV2_Call) Run(run func(_a0 *SyncTaskV2)) *MockMetaWriter_UpdateSyncV2_Call {
|
|
||||||
_c.Call.Run(func(args mock.Arguments) {
|
|
||||||
run(args[0].(*SyncTaskV2))
|
|
||||||
})
|
|
||||||
return _c
|
|
||||||
}
|
|
||||||
|
|
||||||
func (_c *MockMetaWriter_UpdateSyncV2_Call) Return(_a0 error) *MockMetaWriter_UpdateSyncV2_Call {
|
|
||||||
_c.Call.Return(_a0)
|
|
||||||
return _c
|
|
||||||
}
|
|
||||||
|
|
||||||
func (_c *MockMetaWriter_UpdateSyncV2_Call) RunAndReturn(run func(*SyncTaskV2) error) *MockMetaWriter_UpdateSyncV2_Call {
|
|
||||||
_c.Call.Return(run)
|
|
||||||
return _c
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewMockMetaWriter creates a new instance of MockMetaWriter. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
|
// NewMockMetaWriter creates a new instance of MockMetaWriter. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
|
||||||
// The first argument is typically a *testing.T value.
|
// The first argument is typically a *testing.T value.
|
||||||
func NewMockMetaWriter(t interface {
|
func NewMockMetaWriter(t interface {
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
// Code generated by mockery v2.30.1. DO NOT EDIT.
|
// Code generated by mockery v2.32.4. DO NOT EDIT.
|
||||||
|
|
||||||
package syncmgr
|
package syncmgr
|
||||||
|
|
||||||
|
|||||||
@ -1,256 +0,0 @@
|
|||||||
// Licensed to the LF AI & Data foundation under one
|
|
||||||
// or more contributor license agreements. See the NOTICE file
|
|
||||||
// distributed with this work for additional information
|
|
||||||
// regarding copyright ownership. The ASF licenses this file
|
|
||||||
// to you under the Apache License, Version 2.0 (the
|
|
||||||
// "License"); you may not use this file except in compliance
|
|
||||||
// with the License. You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
package syncmgr
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/apache/arrow/go/v12/arrow"
|
|
||||||
"github.com/apache/arrow/go/v12/arrow/array"
|
|
||||||
"github.com/apache/arrow/go/v12/arrow/memory"
|
|
||||||
"go.uber.org/zap"
|
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
||||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/schema"
|
|
||||||
"github.com/milvus-io/milvus/internal/allocator"
|
|
||||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
||||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
|
||||||
iTypeutil "github.com/milvus-io/milvus/internal/util/typeutil"
|
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
|
||||||
"github.com/milvus-io/milvus/pkg/metrics"
|
|
||||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
|
||||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
||||||
"github.com/milvus-io/milvus/pkg/util/timerecord"
|
|
||||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
|
||||||
)
|
|
||||||
|
|
||||||
type storageV2Serializer struct {
|
|
||||||
*storageV1Serializer
|
|
||||||
|
|
||||||
arrowSchema *arrow.Schema
|
|
||||||
storageV2Cache *metacache.StorageV2Cache
|
|
||||||
inCodec *storage.InsertCodec
|
|
||||||
metacache metacache.MetaCache
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewStorageV2Serializer(
|
|
||||||
storageV2Cache *metacache.StorageV2Cache,
|
|
||||||
allocator allocator.Interface,
|
|
||||||
metacache metacache.MetaCache,
|
|
||||||
metaWriter MetaWriter,
|
|
||||||
) (*storageV2Serializer, error) {
|
|
||||||
v1Serializer, err := NewStorageSerializer(allocator, metacache, metaWriter)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &storageV2Serializer{
|
|
||||||
storageV1Serializer: v1Serializer,
|
|
||||||
storageV2Cache: storageV2Cache,
|
|
||||||
arrowSchema: storageV2Cache.ArrowSchema(),
|
|
||||||
metacache: metacache,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *storageV2Serializer) EncodeBuffer(ctx context.Context, pack *SyncPack) (Task, error) {
|
|
||||||
task := NewSyncTaskV2()
|
|
||||||
tr := timerecord.NewTimeRecorder("storage_serializer_v2")
|
|
||||||
metricSegLevel := pack.level.String()
|
|
||||||
|
|
||||||
space, err := s.storageV2Cache.GetOrCreateSpace(pack.segmentID, SpaceCreatorFunc(pack.segmentID, s.schema, s.arrowSchema))
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("failed to get or create space", zap.Error(err))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
task.space = space
|
|
||||||
if len(pack.insertData) > 0 {
|
|
||||||
insertReader, err := s.serializeInsertData(pack)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("failed to serialize insert data with storagev2", zap.Error(err))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
task.reader = insertReader
|
|
||||||
|
|
||||||
singlePKStats, batchStatsBlob, err := s.serializeStatslog(pack)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("failed to serialized statslog", zap.Error(err))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
task.statsBlob = batchStatsBlob
|
|
||||||
s.metacache.UpdateSegments(metacache.RollStats(singlePKStats), metacache.WithSegmentIDs(pack.segmentID))
|
|
||||||
}
|
|
||||||
|
|
||||||
if pack.isFlush {
|
|
||||||
if pack.level != datapb.SegmentLevel_L0 {
|
|
||||||
mergedStatsBlob, err := s.serializeMergedPkStats(pack)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("failed to serialize merged stats log", zap.Error(err))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
task.mergedStatsBlob = mergedStatsBlob
|
|
||||||
}
|
|
||||||
task.WithFlush()
|
|
||||||
}
|
|
||||||
|
|
||||||
if pack.deltaData != nil {
|
|
||||||
deltaReader, err := s.serializeDeltaData(pack)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("failed to serialize delta data", zap.Error(err))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
task.deleteReader = deltaReader
|
|
||||||
}
|
|
||||||
|
|
||||||
if pack.isDrop {
|
|
||||||
task.WithDrop()
|
|
||||||
}
|
|
||||||
|
|
||||||
s.setTaskMeta(task, pack)
|
|
||||||
metrics.DataNodeEncodeBufferLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metricSegLevel).Observe(float64(tr.RecordSpan().Milliseconds()))
|
|
||||||
return task, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *storageV2Serializer) setTaskMeta(task *SyncTaskV2, pack *SyncPack) {
|
|
||||||
task.WithCollectionID(pack.collectionID).
|
|
||||||
WithPartitionID(pack.partitionID).
|
|
||||||
WithChannelName(pack.channelName).
|
|
||||||
WithSegmentID(pack.segmentID).
|
|
||||||
WithBatchSize(pack.batchSize).
|
|
||||||
WithSchema(s.metacache.Schema()).
|
|
||||||
WithStartPosition(pack.startPosition).
|
|
||||||
WithCheckpoint(pack.checkpoint).
|
|
||||||
WithLevel(pack.level).
|
|
||||||
WithTimeRange(pack.tsFrom, pack.tsTo).
|
|
||||||
WithMetaCache(s.metacache).
|
|
||||||
WithMetaWriter(s.metaWriter).
|
|
||||||
WithFailureCallback(func(err error) {
|
|
||||||
// TODO could change to unsub channel in the future
|
|
||||||
panic(err)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *storageV2Serializer) serializeInsertData(pack *SyncPack) (array.RecordReader, error) {
|
|
||||||
builder := array.NewRecordBuilder(memory.DefaultAllocator, s.arrowSchema)
|
|
||||||
defer builder.Release()
|
|
||||||
|
|
||||||
for _, chunk := range pack.insertData {
|
|
||||||
if err := iTypeutil.BuildRecord(builder, chunk, s.schema.GetFields()); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rec := builder.NewRecord()
|
|
||||||
defer rec.Release()
|
|
||||||
|
|
||||||
itr, err := array.NewRecordReader(s.arrowSchema, []arrow.Record{rec})
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
itr.Retain()
|
|
||||||
|
|
||||||
return itr, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *storageV2Serializer) serializeDeltaData(pack *SyncPack) (array.RecordReader, error) {
|
|
||||||
fields := make([]*schemapb.FieldSchema, 0, 2)
|
|
||||||
tsField := &schemapb.FieldSchema{
|
|
||||||
FieldID: common.TimeStampField,
|
|
||||||
Name: common.TimeStampFieldName,
|
|
||||||
DataType: schemapb.DataType_Int64,
|
|
||||||
}
|
|
||||||
fields = append(fields, s.pkField, tsField)
|
|
||||||
|
|
||||||
deltaArrowSchema, err := iTypeutil.ConvertToArrowSchema(fields)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
builder := array.NewRecordBuilder(memory.DefaultAllocator, deltaArrowSchema)
|
|
||||||
defer builder.Release()
|
|
||||||
|
|
||||||
switch s.pkField.GetDataType() {
|
|
||||||
case schemapb.DataType_Int64:
|
|
||||||
pb := builder.Field(0).(*array.Int64Builder)
|
|
||||||
for _, pk := range pack.deltaData.Pks {
|
|
||||||
pb.Append(pk.GetValue().(int64))
|
|
||||||
}
|
|
||||||
case schemapb.DataType_VarChar:
|
|
||||||
pb := builder.Field(0).(*array.StringBuilder)
|
|
||||||
for _, pk := range pack.deltaData.Pks {
|
|
||||||
pb.Append(pk.GetValue().(string))
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
return nil, merr.WrapErrParameterInvalidMsg("unexpected pk type %v", s.pkField.GetDataType())
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, ts := range pack.deltaData.Tss {
|
|
||||||
builder.Field(1).(*array.Int64Builder).Append(int64(ts))
|
|
||||||
}
|
|
||||||
|
|
||||||
rec := builder.NewRecord()
|
|
||||||
defer rec.Release()
|
|
||||||
|
|
||||||
reader, err := array.NewRecordReader(deltaArrowSchema, []arrow.Record{rec})
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
reader.Retain()
|
|
||||||
|
|
||||||
return reader, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func SpaceCreatorFunc(segmentID int64, collSchema *schemapb.CollectionSchema, arrowSchema *arrow.Schema) func() (*milvus_storage.Space, error) {
|
|
||||||
return func() (*milvus_storage.Space, error) {
|
|
||||||
url, err := iTypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue(), segmentID)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
pkSchema, err := typeutil.GetPrimaryFieldSchema(collSchema)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
vecSchema, err := typeutil.GetVectorFieldSchema(collSchema)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
space, err := milvus_storage.Open(
|
|
||||||
url,
|
|
||||||
options.NewSpaceOptionBuilder().
|
|
||||||
SetSchema(schema.NewSchema(
|
|
||||||
arrowSchema,
|
|
||||||
&schema.SchemaOptions{
|
|
||||||
PrimaryColumn: pkSchema.Name,
|
|
||||||
VectorColumn: vecSchema.Name,
|
|
||||||
VersionColumn: common.TimeStampFieldName,
|
|
||||||
},
|
|
||||||
)).
|
|
||||||
Build(),
|
|
||||||
)
|
|
||||||
return space, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,366 +0,0 @@
|
|||||||
// Licensed to the LF AI & Data foundation under one
|
|
||||||
// or more contributor license agreements. See the NOTICE file
|
|
||||||
// distributed with this work for additional information
|
|
||||||
// regarding copyright ownership. The ASF licenses this file
|
|
||||||
// to you under the Apache License, Version 2.0 (the
|
|
||||||
// "License"); you may not use this file except in compliance
|
|
||||||
// with the License. You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
package syncmgr
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"math/rand"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/samber/lo"
|
|
||||||
"github.com/stretchr/testify/mock"
|
|
||||||
"github.com/stretchr/testify/suite"
|
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
||||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/schema"
|
|
||||||
"github.com/milvus-io/milvus/internal/allocator"
|
|
||||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
|
||||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
||||||
"github.com/milvus-io/milvus/pkg/util/tsoutil"
|
|
||||||
)
|
|
||||||
|
|
||||||
type StorageV2SerializerSuite struct {
|
|
||||||
suite.Suite
|
|
||||||
|
|
||||||
collectionID int64
|
|
||||||
partitionID int64
|
|
||||||
segmentID int64
|
|
||||||
channelName string
|
|
||||||
|
|
||||||
schema *schemapb.CollectionSchema
|
|
||||||
storageCache *metacache.StorageV2Cache
|
|
||||||
mockAllocator *allocator.MockAllocator
|
|
||||||
mockCache *metacache.MockMetaCache
|
|
||||||
mockMetaWriter *MockMetaWriter
|
|
||||||
|
|
||||||
serializer *storageV2Serializer
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StorageV2SerializerSuite) SetupSuite() {
|
|
||||||
paramtable.Get().Init(paramtable.NewBaseTable())
|
|
||||||
|
|
||||||
s.collectionID = rand.Int63n(100) + 1000
|
|
||||||
s.partitionID = rand.Int63n(100) + 2000
|
|
||||||
s.segmentID = rand.Int63n(1000) + 10000
|
|
||||||
s.channelName = fmt.Sprintf("by-dev-rootcoord-dml0_%d_v1", s.collectionID)
|
|
||||||
s.schema = &schemapb.CollectionSchema{
|
|
||||||
Name: "sync_task_test_col",
|
|
||||||
Fields: []*schemapb.FieldSchema{
|
|
||||||
{FieldID: common.RowIDField, DataType: schemapb.DataType_Int64, Name: common.RowIDFieldName},
|
|
||||||
{FieldID: common.TimeStampField, DataType: schemapb.DataType_Int64, Name: common.TimeStampFieldName},
|
|
||||||
{
|
|
||||||
FieldID: 100,
|
|
||||||
Name: "pk",
|
|
||||||
DataType: schemapb.DataType_Int64,
|
|
||||||
IsPrimaryKey: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
FieldID: 101,
|
|
||||||
Name: "vector",
|
|
||||||
DataType: schemapb.DataType_FloatVector,
|
|
||||||
TypeParams: []*commonpb.KeyValuePair{
|
|
||||||
{Key: common.DimKey, Value: "128"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
s.mockAllocator = allocator.NewMockAllocator(s.T())
|
|
||||||
s.mockCache = metacache.NewMockMetaCache(s.T())
|
|
||||||
s.mockMetaWriter = NewMockMetaWriter(s.T())
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StorageV2SerializerSuite) SetupTest() {
|
|
||||||
storageCache, err := metacache.NewStorageV2Cache(s.schema)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
s.storageCache = storageCache
|
|
||||||
|
|
||||||
s.mockCache.EXPECT().Collection().Return(s.collectionID)
|
|
||||||
s.mockCache.EXPECT().Schema().Return(s.schema)
|
|
||||||
|
|
||||||
s.serializer, err = NewStorageV2Serializer(storageCache, s.mockAllocator, s.mockCache, s.mockMetaWriter)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StorageV2SerializerSuite) getSpace() *milvus_storage.Space {
|
|
||||||
tmpDir := s.T().TempDir()
|
|
||||||
space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder().
|
|
||||||
SetSchema(schema.NewSchema(s.storageCache.ArrowSchema(), &schema.SchemaOptions{
|
|
||||||
PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName,
|
|
||||||
})).Build())
|
|
||||||
s.Require().NoError(err)
|
|
||||||
return space
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StorageV2SerializerSuite) getBasicPack() *SyncPack {
|
|
||||||
pack := &SyncPack{}
|
|
||||||
|
|
||||||
pack.WithCollectionID(s.collectionID).
|
|
||||||
WithPartitionID(s.partitionID).
|
|
||||||
WithSegmentID(s.segmentID).
|
|
||||||
WithChannelName(s.channelName).
|
|
||||||
WithCheckpoint(&msgpb.MsgPosition{
|
|
||||||
Timestamp: 1000,
|
|
||||||
ChannelName: s.channelName,
|
|
||||||
})
|
|
||||||
|
|
||||||
return pack
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StorageV2SerializerSuite) getEmptyInsertBuffer() *storage.InsertData {
|
|
||||||
buf, err := storage.NewInsertData(s.schema)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
|
|
||||||
return buf
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StorageV2SerializerSuite) getInsertBuffer() *storage.InsertData {
|
|
||||||
buf := s.getEmptyInsertBuffer()
|
|
||||||
|
|
||||||
// generate data
|
|
||||||
for i := 0; i < 10; i++ {
|
|
||||||
data := make(map[storage.FieldID]any)
|
|
||||||
data[common.RowIDField] = int64(i + 1)
|
|
||||||
data[common.TimeStampField] = int64(i + 1)
|
|
||||||
data[100] = int64(i + 1)
|
|
||||||
vector := lo.RepeatBy(128, func(_ int) float32 {
|
|
||||||
return rand.Float32()
|
|
||||||
})
|
|
||||||
data[101] = vector
|
|
||||||
err := buf.Append(data)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
}
|
|
||||||
return buf
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StorageV2SerializerSuite) getDeleteBuffer() *storage.DeleteData {
|
|
||||||
buf := &storage.DeleteData{}
|
|
||||||
for i := 0; i < 10; i++ {
|
|
||||||
pk := storage.NewInt64PrimaryKey(int64(i + 1))
|
|
||||||
ts := tsoutil.ComposeTSByTime(time.Now(), 0)
|
|
||||||
buf.Append(pk, ts)
|
|
||||||
}
|
|
||||||
return buf
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StorageV2SerializerSuite) getDeleteBufferZeroTs() *storage.DeleteData {
|
|
||||||
buf := &storage.DeleteData{}
|
|
||||||
for i := 0; i < 10; i++ {
|
|
||||||
pk := storage.NewInt64PrimaryKey(int64(i + 1))
|
|
||||||
buf.Append(pk, 0)
|
|
||||||
}
|
|
||||||
return buf
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StorageV2SerializerSuite) getBfs() *metacache.BloomFilterSet {
|
|
||||||
bfs := metacache.NewBloomFilterSet()
|
|
||||||
fd, err := storage.NewFieldData(schemapb.DataType_Int64, &schemapb.FieldSchema{
|
|
||||||
FieldID: 101,
|
|
||||||
Name: "ID",
|
|
||||||
IsPrimaryKey: true,
|
|
||||||
DataType: schemapb.DataType_Int64,
|
|
||||||
}, 16)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
|
|
||||||
ids := []int64{1, 2, 3, 4, 5, 6, 7}
|
|
||||||
for _, id := range ids {
|
|
||||||
err = fd.AppendRow(id)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
bfs.UpdatePKRange(fd)
|
|
||||||
return bfs
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StorageV2SerializerSuite) TestSerializeInsert() {
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
|
||||||
defer cancel()
|
|
||||||
s.storageCache.SetSpace(s.segmentID, s.getSpace())
|
|
||||||
|
|
||||||
s.Run("no_data", func() {
|
|
||||||
pack := s.getBasicPack()
|
|
||||||
pack.WithTimeRange(50, 100)
|
|
||||||
pack.WithDrop()
|
|
||||||
|
|
||||||
task, err := s.serializer.EncodeBuffer(ctx, pack)
|
|
||||||
s.NoError(err)
|
|
||||||
taskV1, ok := task.(*SyncTaskV2)
|
|
||||||
s.Require().True(ok)
|
|
||||||
s.Equal(s.collectionID, taskV1.collectionID)
|
|
||||||
s.Equal(s.partitionID, taskV1.partitionID)
|
|
||||||
s.Equal(s.channelName, taskV1.channelName)
|
|
||||||
s.Equal(&msgpb.MsgPosition{
|
|
||||||
Timestamp: 1000,
|
|
||||||
ChannelName: s.channelName,
|
|
||||||
}, taskV1.checkpoint)
|
|
||||||
s.EqualValues(50, taskV1.tsFrom)
|
|
||||||
s.EqualValues(100, taskV1.tsTo)
|
|
||||||
s.True(taskV1.isDrop)
|
|
||||||
})
|
|
||||||
|
|
||||||
s.Run("empty_insert_data", func() {
|
|
||||||
pack := s.getBasicPack()
|
|
||||||
pack.WithTimeRange(50, 100)
|
|
||||||
pack.WithInsertData([]*storage.InsertData{s.getEmptyInsertBuffer()}).WithBatchSize(0)
|
|
||||||
|
|
||||||
_, err := s.serializer.EncodeBuffer(ctx, pack)
|
|
||||||
s.Error(err)
|
|
||||||
})
|
|
||||||
|
|
||||||
s.Run("with_normal_data", func() {
|
|
||||||
pack := s.getBasicPack()
|
|
||||||
pack.WithTimeRange(50, 100)
|
|
||||||
pack.WithInsertData([]*storage.InsertData{s.getInsertBuffer()}).WithBatchSize(10)
|
|
||||||
|
|
||||||
s.mockCache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return().Once()
|
|
||||||
|
|
||||||
task, err := s.serializer.EncodeBuffer(ctx, pack)
|
|
||||||
s.NoError(err)
|
|
||||||
|
|
||||||
taskV2, ok := task.(*SyncTaskV2)
|
|
||||||
s.Require().True(ok)
|
|
||||||
s.Equal(s.collectionID, taskV2.collectionID)
|
|
||||||
s.Equal(s.partitionID, taskV2.partitionID)
|
|
||||||
s.Equal(s.channelName, taskV2.channelName)
|
|
||||||
s.Equal(&msgpb.MsgPosition{
|
|
||||||
Timestamp: 1000,
|
|
||||||
ChannelName: s.channelName,
|
|
||||||
}, taskV2.checkpoint)
|
|
||||||
s.EqualValues(50, taskV2.tsFrom)
|
|
||||||
s.EqualValues(100, taskV2.tsTo)
|
|
||||||
s.NotNil(taskV2.reader)
|
|
||||||
s.NotNil(taskV2.statsBlob)
|
|
||||||
})
|
|
||||||
|
|
||||||
s.Run("with_flush_segment_not_found", func() {
|
|
||||||
pack := s.getBasicPack()
|
|
||||||
pack.WithFlush()
|
|
||||||
|
|
||||||
s.mockCache.EXPECT().GetSegmentByID(s.segmentID).Return(nil, false).Once()
|
|
||||||
_, err := s.serializer.EncodeBuffer(ctx, pack)
|
|
||||||
s.Error(err)
|
|
||||||
})
|
|
||||||
|
|
||||||
s.Run("with_flush", func() {
|
|
||||||
pack := s.getBasicPack()
|
|
||||||
pack.WithTimeRange(50, 100)
|
|
||||||
pack.WithInsertData([]*storage.InsertData{s.getInsertBuffer()}).WithBatchSize(10)
|
|
||||||
pack.WithFlush()
|
|
||||||
|
|
||||||
bfs := s.getBfs()
|
|
||||||
segInfo := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs)
|
|
||||||
metacache.UpdateNumOfRows(1000)(segInfo)
|
|
||||||
s.mockCache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Run(func(action metacache.SegmentAction, filters ...metacache.SegmentFilter) {
|
|
||||||
action(segInfo)
|
|
||||||
}).Return().Once()
|
|
||||||
s.mockCache.EXPECT().GetSegmentByID(s.segmentID).Return(segInfo, true).Once()
|
|
||||||
|
|
||||||
task, err := s.serializer.EncodeBuffer(ctx, pack)
|
|
||||||
s.NoError(err)
|
|
||||||
|
|
||||||
taskV2, ok := task.(*SyncTaskV2)
|
|
||||||
s.Require().True(ok)
|
|
||||||
s.Equal(s.collectionID, taskV2.collectionID)
|
|
||||||
s.Equal(s.partitionID, taskV2.partitionID)
|
|
||||||
s.Equal(s.channelName, taskV2.channelName)
|
|
||||||
s.Equal(&msgpb.MsgPosition{
|
|
||||||
Timestamp: 1000,
|
|
||||||
ChannelName: s.channelName,
|
|
||||||
}, taskV2.checkpoint)
|
|
||||||
s.EqualValues(50, taskV2.tsFrom)
|
|
||||||
s.EqualValues(100, taskV2.tsTo)
|
|
||||||
s.NotNil(taskV2.mergedStatsBlob)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StorageV2SerializerSuite) TestSerializeDelete() {
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
s.Run("serialize_failed", func() {
|
|
||||||
pkField := s.serializer.pkField
|
|
||||||
s.serializer.pkField = &schemapb.FieldSchema{}
|
|
||||||
defer func() {
|
|
||||||
s.serializer.pkField = pkField
|
|
||||||
}()
|
|
||||||
pack := s.getBasicPack()
|
|
||||||
pack.WithDeleteData(s.getDeleteBufferZeroTs())
|
|
||||||
pack.WithTimeRange(50, 100)
|
|
||||||
|
|
||||||
_, err := s.serializer.EncodeBuffer(ctx, pack)
|
|
||||||
s.Error(err)
|
|
||||||
})
|
|
||||||
|
|
||||||
s.Run("serialize_failed_bad_pk", func() {
|
|
||||||
pkField := s.serializer.pkField
|
|
||||||
s.serializer.pkField = &schemapb.FieldSchema{
|
|
||||||
DataType: schemapb.DataType_Array,
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
s.serializer.pkField = pkField
|
|
||||||
}()
|
|
||||||
pack := s.getBasicPack()
|
|
||||||
pack.WithDeleteData(s.getDeleteBufferZeroTs())
|
|
||||||
pack.WithTimeRange(50, 100)
|
|
||||||
|
|
||||||
_, err := s.serializer.EncodeBuffer(ctx, pack)
|
|
||||||
s.Error(err)
|
|
||||||
})
|
|
||||||
|
|
||||||
s.Run("serialize_normal", func() {
|
|
||||||
pack := s.getBasicPack()
|
|
||||||
pack.WithDeleteData(s.getDeleteBuffer())
|
|
||||||
pack.WithTimeRange(50, 100)
|
|
||||||
|
|
||||||
task, err := s.serializer.EncodeBuffer(ctx, pack)
|
|
||||||
s.NoError(err)
|
|
||||||
|
|
||||||
taskV2, ok := task.(*SyncTaskV2)
|
|
||||||
s.Require().True(ok)
|
|
||||||
s.Equal(s.collectionID, taskV2.collectionID)
|
|
||||||
s.Equal(s.partitionID, taskV2.partitionID)
|
|
||||||
s.Equal(s.channelName, taskV2.channelName)
|
|
||||||
s.Equal(&msgpb.MsgPosition{
|
|
||||||
Timestamp: 1000,
|
|
||||||
ChannelName: s.channelName,
|
|
||||||
}, taskV2.checkpoint)
|
|
||||||
s.EqualValues(50, taskV2.tsFrom)
|
|
||||||
s.EqualValues(100, taskV2.tsTo)
|
|
||||||
s.NotNil(taskV2.deleteReader)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StorageV2SerializerSuite) TestBadSchema() {
|
|
||||||
mockCache := metacache.NewMockMetaCache(s.T())
|
|
||||||
mockCache.EXPECT().Collection().Return(s.collectionID).Once()
|
|
||||||
mockCache.EXPECT().Schema().Return(&schemapb.CollectionSchema{}).Once()
|
|
||||||
_, err := NewStorageV2Serializer(s.storageCache, s.mockAllocator, mockCache, s.mockMetaWriter)
|
|
||||||
s.Error(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestStorageV2Serializer(t *testing.T) {
|
|
||||||
suite.Run(t, new(StorageV2SerializerSuite))
|
|
||||||
}
|
|
||||||
@ -99,7 +99,6 @@ func (mgr *syncManager) SyncData(ctx context.Context, task Task, callbacks ...fu
|
|||||||
switch t := task.(type) {
|
switch t := task.(type) {
|
||||||
case *SyncTask:
|
case *SyncTask:
|
||||||
t.WithChunkManager(mgr.chunkManager)
|
t.WithChunkManager(mgr.chunkManager)
|
||||||
case *SyncTaskV2:
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return mgr.safeSubmitTask(ctx, task, callbacks...)
|
return mgr.safeSubmitTask(ctx, task, callbacks...)
|
||||||
|
|||||||
@ -1,235 +0,0 @@
|
|||||||
// Licensed to the LF AI & Data foundation under one
|
|
||||||
// or more contributor license agreements. See the NOTICE file
|
|
||||||
// distributed with this work for additional information
|
|
||||||
// regarding copyright ownership. The ASF licenses this file
|
|
||||||
// to you under the Apache License, Version 2.0 (the
|
|
||||||
// "License"); you may not use this file except in compliance
|
|
||||||
// with the License. You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
package syncmgr
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
|
|
||||||
"github.com/apache/arrow/go/v12/arrow"
|
|
||||||
"github.com/apache/arrow/go/v12/arrow/array"
|
|
||||||
"go.uber.org/zap"
|
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
||||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
|
||||||
"github.com/milvus-io/milvus/internal/allocator"
|
|
||||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
|
||||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
|
||||||
"github.com/milvus-io/milvus/pkg/util/retry"
|
|
||||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
|
||||||
)
|
|
||||||
|
|
||||||
type SyncTaskV2 struct {
|
|
||||||
*SyncTask
|
|
||||||
arrowSchema *arrow.Schema
|
|
||||||
reader array.RecordReader
|
|
||||||
statsBlob *storage.Blob
|
|
||||||
deleteReader array.RecordReader
|
|
||||||
storageVersion int64
|
|
||||||
space *milvus_storage.Space
|
|
||||||
|
|
||||||
failureCallback func(err error)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) getLogger() *log.MLogger {
|
|
||||||
return log.Ctx(context.Background()).With(
|
|
||||||
zap.Int64("collectionID", t.collectionID),
|
|
||||||
zap.Int64("partitionID", t.partitionID),
|
|
||||||
zap.Int64("segmentID", t.segmentID),
|
|
||||||
zap.String("channel", t.channelName),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) handleError(err error) {
|
|
||||||
if t.failureCallback != nil {
|
|
||||||
t.failureCallback(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) Run(ctx context.Context) error {
|
|
||||||
log := t.getLogger()
|
|
||||||
var err error
|
|
||||||
|
|
||||||
_, ok := t.metacache.GetSegmentByID(t.segmentID)
|
|
||||||
if !ok {
|
|
||||||
log.Warn("failed to sync data, segment not found in metacache")
|
|
||||||
t.handleError(err)
|
|
||||||
return merr.WrapErrSegmentNotFound(t.segmentID)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = t.writeSpace(); err != nil {
|
|
||||||
t.handleError(err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = t.writeMeta(); err != nil {
|
|
||||||
t.handleError(err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
actions := []metacache.SegmentAction{metacache.FinishSyncing(t.batchSize)}
|
|
||||||
switch {
|
|
||||||
case t.isDrop:
|
|
||||||
actions = append(actions, metacache.UpdateState(commonpb.SegmentState_Dropped))
|
|
||||||
case t.isFlush:
|
|
||||||
actions = append(actions, metacache.UpdateState(commonpb.SegmentState_Flushed))
|
|
||||||
}
|
|
||||||
|
|
||||||
t.metacache.UpdateSegments(metacache.MergeSegmentAction(actions...), metacache.WithSegmentIDs(t.segmentID))
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) writeSpace() error {
|
|
||||||
defer func() {
|
|
||||||
if t.reader != nil {
|
|
||||||
t.reader.Release()
|
|
||||||
}
|
|
||||||
if t.deleteReader != nil {
|
|
||||||
t.deleteReader.Release()
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
txn := t.space.NewTransaction()
|
|
||||||
if t.reader != nil {
|
|
||||||
txn.Write(t.reader, &options.DefaultWriteOptions)
|
|
||||||
}
|
|
||||||
if t.deleteReader != nil {
|
|
||||||
txn.Delete(t.deleteReader)
|
|
||||||
}
|
|
||||||
if t.statsBlob != nil {
|
|
||||||
txn.WriteBlob(t.statsBlob.Value, t.statsBlob.Key, false)
|
|
||||||
}
|
|
||||||
|
|
||||||
return txn.Commit()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) writeMeta() error {
|
|
||||||
t.storageVersion = t.space.GetCurrentVersion()
|
|
||||||
return t.metaWriter.UpdateSyncV2(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewSyncTaskV2() *SyncTaskV2 {
|
|
||||||
return &SyncTaskV2{
|
|
||||||
SyncTask: NewSyncTask(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithChunkManager(cm storage.ChunkManager) *SyncTaskV2 {
|
|
||||||
t.chunkManager = cm
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithAllocator(allocator allocator.Interface) *SyncTaskV2 {
|
|
||||||
t.allocator = allocator
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithStartPosition(start *msgpb.MsgPosition) *SyncTaskV2 {
|
|
||||||
t.startPosition = start
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithCheckpoint(cp *msgpb.MsgPosition) *SyncTaskV2 {
|
|
||||||
t.checkpoint = cp
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithCollectionID(collID int64) *SyncTaskV2 {
|
|
||||||
t.collectionID = collID
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithPartitionID(partID int64) *SyncTaskV2 {
|
|
||||||
t.partitionID = partID
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithSegmentID(segID int64) *SyncTaskV2 {
|
|
||||||
t.segmentID = segID
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithChannelName(chanName string) *SyncTaskV2 {
|
|
||||||
t.channelName = chanName
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithSchema(schema *schemapb.CollectionSchema) *SyncTaskV2 {
|
|
||||||
t.schema = schema
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithTimeRange(from, to typeutil.Timestamp) *SyncTaskV2 {
|
|
||||||
t.tsFrom, t.tsTo = from, to
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithFlush() *SyncTaskV2 {
|
|
||||||
t.isFlush = true
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithDrop() *SyncTaskV2 {
|
|
||||||
t.isDrop = true
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithMetaCache(metacache metacache.MetaCache) *SyncTaskV2 {
|
|
||||||
t.metacache = metacache
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithMetaWriter(metaWriter MetaWriter) *SyncTaskV2 {
|
|
||||||
t.metaWriter = metaWriter
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithWriteRetryOptions(opts ...retry.Option) *SyncTaskV2 {
|
|
||||||
t.writeRetryOpts = opts
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithFailureCallback(callback func(error)) *SyncTaskV2 {
|
|
||||||
t.failureCallback = callback
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithBatchSize(batchSize int64) *SyncTaskV2 {
|
|
||||||
t.batchSize = batchSize
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithSpace(space *milvus_storage.Space) *SyncTaskV2 {
|
|
||||||
t.space = space
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithArrowSchema(arrowSchema *arrow.Schema) *SyncTaskV2 {
|
|
||||||
t.arrowSchema = arrowSchema
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *SyncTaskV2) WithLevel(level datapb.SegmentLevel) *SyncTaskV2 {
|
|
||||||
t.level = level
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
@ -1,403 +0,0 @@
|
|||||||
// Licensed to the LF AI & Data foundation under one
|
|
||||||
// or more contributor license agreements. See the NOTICE file
|
|
||||||
// distributed with this work for additional information
|
|
||||||
// regarding copyright ownership. The ASF licenses this file
|
|
||||||
// to you under the Apache License, Version 2.0 (the
|
|
||||||
// "License"); you may not use this file except in compliance
|
|
||||||
// with the License. You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
package syncmgr
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"math/rand"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/apache/arrow/go/v12/arrow"
|
|
||||||
"github.com/apache/arrow/go/v12/arrow/array"
|
|
||||||
"github.com/apache/arrow/go/v12/arrow/memory"
|
|
||||||
"github.com/samber/lo"
|
|
||||||
"github.com/stretchr/testify/mock"
|
|
||||||
"github.com/stretchr/testify/suite"
|
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
||||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/schema"
|
|
||||||
"github.com/milvus-io/milvus/internal/allocator"
|
|
||||||
"github.com/milvus-io/milvus/internal/datanode/broker"
|
|
||||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
|
||||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
|
||||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
||||||
"github.com/milvus-io/milvus/pkg/util/tsoutil"
|
|
||||||
)
|
|
||||||
|
|
||||||
type SyncTaskSuiteV2 struct {
|
|
||||||
suite.Suite
|
|
||||||
|
|
||||||
collectionID int64
|
|
||||||
partitionID int64
|
|
||||||
segmentID int64
|
|
||||||
channelName string
|
|
||||||
|
|
||||||
metacache *metacache.MockMetaCache
|
|
||||||
allocator *allocator.MockGIDAllocator
|
|
||||||
schema *schemapb.CollectionSchema
|
|
||||||
arrowSchema *arrow.Schema
|
|
||||||
broker *broker.MockBroker
|
|
||||||
|
|
||||||
space *milvus_storage.Space
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SyncTaskSuiteV2) SetupSuite() {
|
|
||||||
paramtable.Get().Init(paramtable.NewBaseTable())
|
|
||||||
|
|
||||||
s.collectionID = 100
|
|
||||||
s.partitionID = 101
|
|
||||||
s.segmentID = 1001
|
|
||||||
s.channelName = "by-dev-rootcoord-dml_0_100v0"
|
|
||||||
|
|
||||||
s.schema = &schemapb.CollectionSchema{
|
|
||||||
Name: "sync_task_test_col",
|
|
||||||
Fields: []*schemapb.FieldSchema{
|
|
||||||
{FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64},
|
|
||||||
{FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64},
|
|
||||||
{
|
|
||||||
FieldID: 100,
|
|
||||||
Name: "pk",
|
|
||||||
DataType: schemapb.DataType_Int64,
|
|
||||||
IsPrimaryKey: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
FieldID: 101,
|
|
||||||
Name: "vector",
|
|
||||||
DataType: schemapb.DataType_FloatVector,
|
|
||||||
TypeParams: []*commonpb.KeyValuePair{
|
|
||||||
{Key: common.DimKey, Value: "128"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
arrowSchema, err := typeutil.ConvertToArrowSchema(s.schema.Fields)
|
|
||||||
s.NoError(err)
|
|
||||||
s.arrowSchema = arrowSchema
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SyncTaskSuiteV2) SetupTest() {
|
|
||||||
s.allocator = allocator.NewMockGIDAllocator()
|
|
||||||
s.allocator.AllocF = func(count uint32) (int64, int64, error) {
|
|
||||||
return time.Now().Unix(), int64(count), nil
|
|
||||||
}
|
|
||||||
s.allocator.AllocOneF = func() (allocator.UniqueID, error) {
|
|
||||||
return time.Now().Unix(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
s.broker = broker.NewMockBroker(s.T())
|
|
||||||
s.metacache = metacache.NewMockMetaCache(s.T())
|
|
||||||
|
|
||||||
tmpDir := s.T().TempDir()
|
|
||||||
space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder().
|
|
||||||
SetSchema(schema.NewSchema(s.arrowSchema, &schema.SchemaOptions{
|
|
||||||
PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName,
|
|
||||||
})).Build())
|
|
||||||
s.Require().NoError(err)
|
|
||||||
s.space = space
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SyncTaskSuiteV2) getEmptyInsertBuffer() *storage.InsertData {
|
|
||||||
buf, err := storage.NewInsertData(s.schema)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
|
|
||||||
return buf
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SyncTaskSuiteV2) getInsertBuffer() *storage.InsertData {
|
|
||||||
buf := s.getEmptyInsertBuffer()
|
|
||||||
|
|
||||||
// generate data
|
|
||||||
for i := 0; i < 10; i++ {
|
|
||||||
data := make(map[storage.FieldID]any)
|
|
||||||
data[common.RowIDField] = int64(i + 1)
|
|
||||||
data[common.TimeStampField] = int64(i + 1)
|
|
||||||
data[100] = int64(i + 1)
|
|
||||||
vector := lo.RepeatBy(128, func(_ int) float32 {
|
|
||||||
return rand.Float32()
|
|
||||||
})
|
|
||||||
data[101] = vector
|
|
||||||
err := buf.Append(data)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
}
|
|
||||||
return buf
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SyncTaskSuiteV2) getDeleteBuffer() *storage.DeleteData {
|
|
||||||
buf := &storage.DeleteData{}
|
|
||||||
for i := 0; i < 10; i++ {
|
|
||||||
pk := storage.NewInt64PrimaryKey(int64(i + 1))
|
|
||||||
ts := tsoutil.ComposeTSByTime(time.Now(), 0)
|
|
||||||
buf.Append(pk, ts)
|
|
||||||
}
|
|
||||||
return buf
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SyncTaskSuiteV2) getDeleteBufferZeroTs() *storage.DeleteData {
|
|
||||||
buf := &storage.DeleteData{}
|
|
||||||
for i := 0; i < 10; i++ {
|
|
||||||
pk := storage.NewInt64PrimaryKey(int64(i + 1))
|
|
||||||
buf.Append(pk, 0)
|
|
||||||
}
|
|
||||||
return buf
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SyncTaskSuiteV2) getSuiteSyncTask() *SyncTaskV2 {
|
|
||||||
pack := &SyncPack{}
|
|
||||||
|
|
||||||
pack.WithCollectionID(s.collectionID).
|
|
||||||
WithPartitionID(s.partitionID).
|
|
||||||
WithSegmentID(s.segmentID).
|
|
||||||
WithChannelName(s.channelName).
|
|
||||||
WithCheckpoint(&msgpb.MsgPosition{
|
|
||||||
Timestamp: 1000,
|
|
||||||
ChannelName: s.channelName,
|
|
||||||
})
|
|
||||||
pack.WithInsertData([]*storage.InsertData{s.getInsertBuffer()}).WithBatchSize(10)
|
|
||||||
pack.WithDeleteData(s.getDeleteBuffer())
|
|
||||||
|
|
||||||
storageCache, err := metacache.NewStorageV2Cache(s.schema)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
|
|
||||||
s.metacache.EXPECT().Collection().Return(s.collectionID)
|
|
||||||
s.metacache.EXPECT().Schema().Return(s.schema)
|
|
||||||
serializer, err := NewStorageV2Serializer(storageCache, s.allocator, s.metacache, nil)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
task, err := serializer.EncodeBuffer(context.Background(), pack)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
taskV2, ok := task.(*SyncTaskV2)
|
|
||||||
s.Require().True(ok)
|
|
||||||
taskV2.WithMetaCache(s.metacache)
|
|
||||||
|
|
||||||
return taskV2
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SyncTaskSuiteV2) TestRunNormal() {
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
|
||||||
defer cancel()
|
|
||||||
s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(nil)
|
|
||||||
bfs := metacache.NewBloomFilterSet()
|
|
||||||
fd, err := storage.NewFieldData(schemapb.DataType_Int64, &schemapb.FieldSchema{
|
|
||||||
FieldID: 101,
|
|
||||||
Name: "ID",
|
|
||||||
IsPrimaryKey: true,
|
|
||||||
DataType: schemapb.DataType_Int64,
|
|
||||||
}, 16)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
|
|
||||||
ids := []int64{1, 2, 3, 4, 5, 6, 7}
|
|
||||||
for _, id := range ids {
|
|
||||||
err = fd.AppendRow(id)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
bfs.UpdatePKRange(fd)
|
|
||||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs)
|
|
||||||
metacache.UpdateNumOfRows(1000)(seg)
|
|
||||||
s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true)
|
|
||||||
s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg})
|
|
||||||
s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return()
|
|
||||||
|
|
||||||
s.Run("without_insert_delete", func() {
|
|
||||||
task := s.getSuiteSyncTask()
|
|
||||||
task.WithMetaWriter(BrokerMetaWriter(s.broker, 1))
|
|
||||||
task.WithTimeRange(50, 100)
|
|
||||||
task.WithCheckpoint(&msgpb.MsgPosition{
|
|
||||||
ChannelName: s.channelName,
|
|
||||||
MsgID: []byte{1, 2, 3, 4},
|
|
||||||
Timestamp: 100,
|
|
||||||
})
|
|
||||||
|
|
||||||
err := task.Run(ctx)
|
|
||||||
s.NoError(err)
|
|
||||||
})
|
|
||||||
|
|
||||||
s.Run("with_insert_delete_cp", func() {
|
|
||||||
task := s.getSuiteSyncTask()
|
|
||||||
task.WithTimeRange(50, 100)
|
|
||||||
task.WithMetaWriter(BrokerMetaWriter(s.broker, 1))
|
|
||||||
task.WithCheckpoint(&msgpb.MsgPosition{
|
|
||||||
ChannelName: s.channelName,
|
|
||||||
MsgID: []byte{1, 2, 3, 4},
|
|
||||||
Timestamp: 100,
|
|
||||||
})
|
|
||||||
|
|
||||||
err := task.Run(ctx)
|
|
||||||
s.NoError(err)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SyncTaskSuiteV2) TestBuildRecord() {
|
|
||||||
fieldSchemas := []*schemapb.FieldSchema{
|
|
||||||
{FieldID: 1, Name: "field0", DataType: schemapb.DataType_Bool},
|
|
||||||
{FieldID: 2, Name: "field1", DataType: schemapb.DataType_Int8},
|
|
||||||
{FieldID: 3, Name: "field2", DataType: schemapb.DataType_Int16},
|
|
||||||
{FieldID: 4, Name: "field3", DataType: schemapb.DataType_Int32},
|
|
||||||
{FieldID: 5, Name: "field4", DataType: schemapb.DataType_Int64},
|
|
||||||
{FieldID: 6, Name: "field5", DataType: schemapb.DataType_Float},
|
|
||||||
{FieldID: 7, Name: "field6", DataType: schemapb.DataType_Double},
|
|
||||||
{FieldID: 8, Name: "field7", DataType: schemapb.DataType_String},
|
|
||||||
{FieldID: 9, Name: "field8", DataType: schemapb.DataType_VarChar},
|
|
||||||
{FieldID: 10, Name: "field9", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "8"}}},
|
|
||||||
{FieldID: 11, Name: "field10", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
|
|
||||||
{FieldID: 12, Name: "field11", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int32},
|
|
||||||
{FieldID: 13, Name: "field12", DataType: schemapb.DataType_JSON},
|
|
||||||
{FieldID: 14, Name: "field12", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
|
|
||||||
}
|
|
||||||
|
|
||||||
schema, err := typeutil.ConvertToArrowSchema(fieldSchemas)
|
|
||||||
s.NoError(err)
|
|
||||||
|
|
||||||
b := array.NewRecordBuilder(memory.NewGoAllocator(), schema)
|
|
||||||
defer b.Release()
|
|
||||||
|
|
||||||
data := &storage.InsertData{
|
|
||||||
Data: map[int64]storage.FieldData{
|
|
||||||
1: &storage.BoolFieldData{Data: []bool{true, false}},
|
|
||||||
2: &storage.Int8FieldData{Data: []int8{3, 4}},
|
|
||||||
3: &storage.Int16FieldData{Data: []int16{3, 4}},
|
|
||||||
4: &storage.Int32FieldData{Data: []int32{3, 4}},
|
|
||||||
5: &storage.Int64FieldData{Data: []int64{3, 4}},
|
|
||||||
6: &storage.FloatFieldData{Data: []float32{3, 4}},
|
|
||||||
7: &storage.DoubleFieldData{Data: []float64{3, 4}},
|
|
||||||
8: &storage.StringFieldData{Data: []string{"3", "4"}},
|
|
||||||
9: &storage.StringFieldData{Data: []string{"3", "4"}},
|
|
||||||
10: &storage.BinaryVectorFieldData{Data: []byte{0, 255}, Dim: 8},
|
|
||||||
11: &storage.FloatVectorFieldData{
|
|
||||||
Data: []float32{4, 5, 6, 7, 4, 5, 6, 7},
|
|
||||||
Dim: 4,
|
|
||||||
},
|
|
||||||
12: &storage.ArrayFieldData{
|
|
||||||
ElementType: schemapb.DataType_Int32,
|
|
||||||
Data: []*schemapb.ScalarField{
|
|
||||||
{
|
|
||||||
Data: &schemapb.ScalarField_IntData{
|
|
||||||
IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Data: &schemapb.ScalarField_IntData{
|
|
||||||
IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
13: &storage.JSONFieldData{
|
|
||||||
Data: [][]byte{
|
|
||||||
[]byte(`{"batch":2}`),
|
|
||||||
[]byte(`{"key":"world"}`),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
14: &storage.Float16VectorFieldData{
|
|
||||||
Data: []byte{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255},
|
|
||||||
Dim: 4,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
err = typeutil.BuildRecord(b, data, fieldSchemas)
|
|
||||||
s.NoError(err)
|
|
||||||
s.EqualValues(2, b.NewRecord().NumRows())
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SyncTaskSuiteV2) TestBuildRecordNullable() {
|
|
||||||
fieldSchemas := []*schemapb.FieldSchema{
|
|
||||||
{FieldID: 1, Name: "field0", DataType: schemapb.DataType_Bool},
|
|
||||||
{FieldID: 2, Name: "field1", DataType: schemapb.DataType_Int8},
|
|
||||||
{FieldID: 3, Name: "field2", DataType: schemapb.DataType_Int16},
|
|
||||||
{FieldID: 4, Name: "field3", DataType: schemapb.DataType_Int32},
|
|
||||||
{FieldID: 5, Name: "field4", DataType: schemapb.DataType_Int64},
|
|
||||||
{FieldID: 6, Name: "field5", DataType: schemapb.DataType_Float},
|
|
||||||
{FieldID: 7, Name: "field6", DataType: schemapb.DataType_Double},
|
|
||||||
{FieldID: 8, Name: "field7", DataType: schemapb.DataType_String},
|
|
||||||
{FieldID: 9, Name: "field8", DataType: schemapb.DataType_VarChar},
|
|
||||||
{FieldID: 10, Name: "field9", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "8"}}},
|
|
||||||
{FieldID: 11, Name: "field10", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
|
|
||||||
{FieldID: 12, Name: "field11", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int32},
|
|
||||||
{FieldID: 13, Name: "field12", DataType: schemapb.DataType_JSON},
|
|
||||||
{FieldID: 14, Name: "field12", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}},
|
|
||||||
}
|
|
||||||
|
|
||||||
schema, err := typeutil.ConvertToArrowSchema(fieldSchemas)
|
|
||||||
s.NoError(err)
|
|
||||||
|
|
||||||
b := array.NewRecordBuilder(memory.NewGoAllocator(), schema)
|
|
||||||
defer b.Release()
|
|
||||||
|
|
||||||
data := &storage.InsertData{
|
|
||||||
Data: map[int64]storage.FieldData{
|
|
||||||
1: &storage.BoolFieldData{Data: []bool{true, false}, ValidData: []bool{true, true}},
|
|
||||||
2: &storage.Int8FieldData{Data: []int8{3, 4}, ValidData: []bool{true, true}},
|
|
||||||
3: &storage.Int16FieldData{Data: []int16{3, 4}, ValidData: []bool{true, true}},
|
|
||||||
4: &storage.Int32FieldData{Data: []int32{3, 4}, ValidData: []bool{true, true}},
|
|
||||||
5: &storage.Int64FieldData{Data: []int64{3, 4}, ValidData: []bool{true, true}},
|
|
||||||
6: &storage.FloatFieldData{Data: []float32{3, 4}, ValidData: []bool{true, true}},
|
|
||||||
7: &storage.DoubleFieldData{Data: []float64{3, 4}, ValidData: []bool{true, true}},
|
|
||||||
8: &storage.StringFieldData{Data: []string{"3", "4"}, ValidData: []bool{true, true}},
|
|
||||||
9: &storage.StringFieldData{Data: []string{"3", "4"}, ValidData: []bool{true, true}},
|
|
||||||
10: &storage.BinaryVectorFieldData{Data: []byte{0, 255}, Dim: 8},
|
|
||||||
11: &storage.FloatVectorFieldData{
|
|
||||||
Data: []float32{4, 5, 6, 7, 4, 5, 6, 7},
|
|
||||||
Dim: 4,
|
|
||||||
},
|
|
||||||
12: &storage.ArrayFieldData{
|
|
||||||
ElementType: schemapb.DataType_Int32,
|
|
||||||
Data: []*schemapb.ScalarField{
|
|
||||||
{
|
|
||||||
Data: &schemapb.ScalarField_IntData{
|
|
||||||
IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Data: &schemapb.ScalarField_IntData{
|
|
||||||
IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
ValidData: []bool{true, true},
|
|
||||||
},
|
|
||||||
13: &storage.JSONFieldData{
|
|
||||||
Data: [][]byte{
|
|
||||||
[]byte(`{"batch":2}`),
|
|
||||||
[]byte(`{"key":"world"}`),
|
|
||||||
},
|
|
||||||
ValidData: []bool{true, true},
|
|
||||||
},
|
|
||||||
14: &storage.Float16VectorFieldData{
|
|
||||||
Data: []byte{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255},
|
|
||||||
Dim: 4,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
err = typeutil.BuildRecord(b, data, fieldSchemas)
|
|
||||||
s.NoError(err)
|
|
||||||
s.EqualValues(2, b.NewRecord().NumRows())
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSyncTaskV2(t *testing.T) {
|
|
||||||
suite.Run(t, new(SyncTaskSuiteV2))
|
|
||||||
}
|
|
||||||
@ -19,8 +19,8 @@ type bfWriteBuffer struct {
|
|||||||
metacache metacache.MetaCache
|
metacache metacache.MetaCache
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewBFWriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) {
|
func NewBFWriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) {
|
||||||
base, err := newWriteBufferBase(channel, metacache, storageV2Cache, syncMgr, option)
|
base, err := newWriteBufferBase(channel, metacache, syncMgr, option)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@ -13,16 +13,11 @@ import (
|
|||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/schema"
|
|
||||||
"github.com/milvus-io/milvus/internal/datanode/broker"
|
"github.com/milvus-io/milvus/internal/datanode/broker"
|
||||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
||||||
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/metrics"
|
"github.com/milvus-io/milvus/pkg/metrics"
|
||||||
"github.com/milvus-io/milvus/pkg/mq/msgstream"
|
"github.com/milvus-io/milvus/pkg/mq/msgstream"
|
||||||
@ -41,7 +36,6 @@ type BFWriteBufferSuite struct {
|
|||||||
metacacheInt64 *metacache.MockMetaCache
|
metacacheInt64 *metacache.MockMetaCache
|
||||||
metacacheVarchar *metacache.MockMetaCache
|
metacacheVarchar *metacache.MockMetaCache
|
||||||
broker *broker.MockBroker
|
broker *broker.MockBroker
|
||||||
storageV2Cache *metacache.StorageV2Cache
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BFWriteBufferSuite) SetupSuite() {
|
func (s *BFWriteBufferSuite) SetupSuite() {
|
||||||
@ -89,10 +83,6 @@ func (s *BFWriteBufferSuite) SetupSuite() {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
storageCache, err := metacache.NewStorageV2Cache(s.collInt64Schema)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
s.storageV2Cache = storageCache
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BFWriteBufferSuite) composeInsertMsg(segmentID int64, rowCount int, dim int, pkType schemapb.DataType) ([]int64, *msgstream.InsertMsg) {
|
func (s *BFWriteBufferSuite) composeInsertMsg(segmentID int64, rowCount int, dim int, pkType schemapb.DataType) ([]int64, *msgstream.InsertMsg) {
|
||||||
@ -201,16 +191,11 @@ func (s *BFWriteBufferSuite) SetupTest() {
|
|||||||
s.metacacheVarchar.EXPECT().Collection().Return(s.collID).Maybe()
|
s.metacacheVarchar.EXPECT().Collection().Return(s.collID).Maybe()
|
||||||
|
|
||||||
s.broker = broker.NewMockBroker(s.T())
|
s.broker = broker.NewMockBroker(s.T())
|
||||||
var err error
|
|
||||||
s.storageV2Cache, err = metacache.NewStorageV2Cache(s.collInt64Schema)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BFWriteBufferSuite) TestBufferData() {
|
func (s *BFWriteBufferSuite) TestBufferData() {
|
||||||
s.Run("normal_run_int64", func() {
|
s.Run("normal_run_int64", func() {
|
||||||
storageCache, err := metacache.NewStorageV2Cache(s.collInt64Schema)
|
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.syncMgr, &writeBufferOption{})
|
||||||
s.Require().NoError(err)
|
|
||||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, storageCache, s.syncMgr, &writeBufferOption{})
|
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
|
|
||||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
||||||
@ -237,9 +222,7 @@ func (s *BFWriteBufferSuite) TestBufferData() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
s.Run("normal_run_varchar", func() {
|
s.Run("normal_run_varchar", func() {
|
||||||
storageCache, err := metacache.NewStorageV2Cache(s.collVarcharSchema)
|
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, s.syncMgr, &writeBufferOption{})
|
||||||
s.Require().NoError(err)
|
|
||||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, storageCache, s.syncMgr, &writeBufferOption{})
|
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
|
|
||||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
||||||
@ -261,9 +244,7 @@ func (s *BFWriteBufferSuite) TestBufferData() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
s.Run("int_pk_type_not_match", func() {
|
s.Run("int_pk_type_not_match", func() {
|
||||||
storageCache, err := metacache.NewStorageV2Cache(s.collInt64Schema)
|
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.syncMgr, &writeBufferOption{})
|
||||||
s.Require().NoError(err)
|
|
||||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, storageCache, s.syncMgr, &writeBufferOption{})
|
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
|
|
||||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
||||||
@ -281,9 +262,7 @@ func (s *BFWriteBufferSuite) TestBufferData() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
s.Run("varchar_pk_not_match", func() {
|
s.Run("varchar_pk_not_match", func() {
|
||||||
storageCache, err := metacache.NewStorageV2Cache(s.collVarcharSchema)
|
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, s.syncMgr, &writeBufferOption{})
|
||||||
s.Require().NoError(err)
|
|
||||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheVarchar, storageCache, s.syncMgr, &writeBufferOption{})
|
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
|
|
||||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
||||||
@ -305,7 +284,7 @@ func (s *BFWriteBufferSuite) TestAutoSync() {
|
|||||||
paramtable.Get().Save(paramtable.Get().DataNodeCfg.FlushInsertBufferSize.Key, "1")
|
paramtable.Get().Save(paramtable.Get().DataNodeCfg.FlushInsertBufferSize.Key, "1")
|
||||||
|
|
||||||
s.Run("normal_auto_sync", func() {
|
s.Run("normal_auto_sync", func() {
|
||||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, nil, s.syncMgr, &writeBufferOption{
|
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.syncMgr, &writeBufferOption{
|
||||||
syncPolicies: []SyncPolicy{
|
syncPolicies: []SyncPolicy{
|
||||||
GetFullBufferPolicy(),
|
GetFullBufferPolicy(),
|
||||||
GetSyncStaleBufferPolicy(paramtable.Get().DataNodeCfg.SyncPeriod.GetAsDuration(time.Second)),
|
GetSyncStaleBufferPolicy(paramtable.Get().DataNodeCfg.SyncPeriod.GetAsDuration(time.Second)),
|
||||||
@ -340,92 +319,11 @@ func (s *BFWriteBufferSuite) TestAutoSync() {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BFWriteBufferSuite) TestBufferDataWithStorageV2() {
|
|
||||||
params.Params.CommonCfg.EnableStorageV2.SwapTempValue("true")
|
|
||||||
defer paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false")
|
|
||||||
params.Params.CommonCfg.StorageScheme.SwapTempValue("file")
|
|
||||||
tmpDir := s.T().TempDir()
|
|
||||||
arrowSchema, err := typeutil.ConvertToArrowSchema(s.collInt64Schema.Fields)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder().
|
|
||||||
SetSchema(schema.NewSchema(arrowSchema, &schema.SchemaOptions{
|
|
||||||
PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName,
|
|
||||||
})).Build())
|
|
||||||
s.Require().NoError(err)
|
|
||||||
s.storageV2Cache.SetSpace(1000, space)
|
|
||||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.storageV2Cache, s.syncMgr, &writeBufferOption{})
|
|
||||||
s.NoError(err)
|
|
||||||
|
|
||||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
|
||||||
s.metacacheInt64.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg})
|
|
||||||
s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false)
|
|
||||||
s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return()
|
|
||||||
s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return()
|
|
||||||
|
|
||||||
pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_Int64)
|
|
||||||
delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) }))
|
|
||||||
|
|
||||||
err = wb.BufferData([]*msgstream.InsertMsg{msg}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200})
|
|
||||||
s.NoError(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *BFWriteBufferSuite) TestAutoSyncWithStorageV2() {
|
|
||||||
params.Params.CommonCfg.EnableStorageV2.SwapTempValue("true")
|
|
||||||
defer paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false")
|
|
||||||
paramtable.Get().Save(paramtable.Get().DataNodeCfg.FlushInsertBufferSize.Key, "1")
|
|
||||||
tmpDir := s.T().TempDir()
|
|
||||||
arrowSchema, err := typeutil.ConvertToArrowSchema(s.collInt64Schema.Fields)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
|
|
||||||
space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder().
|
|
||||||
SetSchema(schema.NewSchema(arrowSchema, &schema.SchemaOptions{
|
|
||||||
PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName,
|
|
||||||
})).Build())
|
|
||||||
s.Require().NoError(err)
|
|
||||||
s.storageV2Cache.SetSpace(1002, space)
|
|
||||||
|
|
||||||
s.Run("normal_auto_sync", func() {
|
|
||||||
wb, err := NewBFWriteBuffer(s.channelName, s.metacacheInt64, s.storageV2Cache, s.syncMgr, &writeBufferOption{
|
|
||||||
syncPolicies: []SyncPolicy{
|
|
||||||
GetFullBufferPolicy(),
|
|
||||||
GetSyncStaleBufferPolicy(paramtable.Get().DataNodeCfg.SyncPeriod.GetAsDuration(time.Second)),
|
|
||||||
GetSealedSegmentsPolicy(s.metacacheInt64),
|
|
||||||
},
|
|
||||||
})
|
|
||||||
s.NoError(err)
|
|
||||||
|
|
||||||
seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
|
||||||
seg1 := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1002}, metacache.NewBloomFilterSet())
|
|
||||||
segCompacted := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet())
|
|
||||||
|
|
||||||
s.metacacheInt64.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg, segCompacted})
|
|
||||||
s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false).Once()
|
|
||||||
s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(seg, true).Once()
|
|
||||||
s.metacacheInt64.EXPECT().GetSegmentByID(int64(1002)).Return(seg1, true)
|
|
||||||
s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything).Return([]int64{1002})
|
|
||||||
s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return()
|
|
||||||
s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return()
|
|
||||||
s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything, mock.Anything).Return()
|
|
||||||
s.syncMgr.EXPECT().SyncData(mock.Anything, mock.Anything, mock.Anything).Return(nil)
|
|
||||||
|
|
||||||
pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_Int64)
|
|
||||||
delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) }))
|
|
||||||
|
|
||||||
metrics.DataNodeFlowGraphBufferDataSize.Reset()
|
|
||||||
err = wb.BufferData([]*msgstream.InsertMsg{msg}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200})
|
|
||||||
s.NoError(err)
|
|
||||||
|
|
||||||
value, err := metrics.DataNodeFlowGraphBufferDataSize.GetMetricWithLabelValues(fmt.Sprint(paramtable.GetNodeID()), fmt.Sprint(s.metacacheInt64.Collection()))
|
|
||||||
s.NoError(err)
|
|
||||||
s.MetricsEqual(value, 0)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *BFWriteBufferSuite) TestCreateFailure() {
|
func (s *BFWriteBufferSuite) TestCreateFailure() {
|
||||||
metacache := metacache.NewMockMetaCache(s.T())
|
metacache := metacache.NewMockMetaCache(s.T())
|
||||||
metacache.EXPECT().Collection().Return(s.collID)
|
metacache.EXPECT().Collection().Return(s.collID)
|
||||||
metacache.EXPECT().Schema().Return(&schemapb.CollectionSchema{})
|
metacache.EXPECT().Schema().Return(&schemapb.CollectionSchema{})
|
||||||
_, err := NewBFWriteBuffer(s.channelName, metacache, s.storageV2Cache, s.syncMgr, &writeBufferOption{})
|
_, err := NewBFWriteBuffer(s.channelName, metacache, s.syncMgr, &writeBufferOption{})
|
||||||
s.Error(err)
|
s.Error(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -33,11 +33,11 @@ type l0WriteBuffer struct {
|
|||||||
idAllocator allocator.Interface
|
idAllocator allocator.Interface
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewL0WriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) {
|
func NewL0WriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) {
|
||||||
if option.idAllocator == nil {
|
if option.idAllocator == nil {
|
||||||
return nil, merr.WrapErrServiceInternal("id allocator is nil when creating l0 write buffer")
|
return nil, merr.WrapErrServiceInternal("id allocator is nil when creating l0 write buffer")
|
||||||
}
|
}
|
||||||
base, err := newWriteBufferBase(channel, metacache, storageV2Cache, syncMgr, option)
|
base, err := newWriteBufferBase(channel, metacache, syncMgr, option)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@ -28,13 +28,12 @@ import (
|
|||||||
|
|
||||||
type L0WriteBufferSuite struct {
|
type L0WriteBufferSuite struct {
|
||||||
testutils.PromMetricsSuite
|
testutils.PromMetricsSuite
|
||||||
channelName string
|
channelName string
|
||||||
collID int64
|
collID int64
|
||||||
collSchema *schemapb.CollectionSchema
|
collSchema *schemapb.CollectionSchema
|
||||||
syncMgr *syncmgr.MockSyncManager
|
syncMgr *syncmgr.MockSyncManager
|
||||||
metacache *metacache.MockMetaCache
|
metacache *metacache.MockMetaCache
|
||||||
allocator *allocator.MockGIDAllocator
|
allocator *allocator.MockGIDAllocator
|
||||||
storageCache *metacache.StorageV2Cache
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *L0WriteBufferSuite) SetupSuite() {
|
func (s *L0WriteBufferSuite) SetupSuite() {
|
||||||
@ -61,10 +60,6 @@ func (s *L0WriteBufferSuite) SetupSuite() {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
s.channelName = "by-dev-rootcoord-dml_0v0"
|
s.channelName = "by-dev-rootcoord-dml_0v0"
|
||||||
|
|
||||||
storageCache, err := metacache.NewStorageV2Cache(s.collSchema)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
s.storageCache = storageCache
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *L0WriteBufferSuite) composeInsertMsg(segmentID int64, rowCount int, dim int, pkType schemapb.DataType) ([]int64, *msgstream.InsertMsg) {
|
func (s *L0WriteBufferSuite) composeInsertMsg(segmentID int64, rowCount int, dim int, pkType schemapb.DataType) ([]int64, *msgstream.InsertMsg) {
|
||||||
@ -173,7 +168,7 @@ func (s *L0WriteBufferSuite) SetupTest() {
|
|||||||
|
|
||||||
func (s *L0WriteBufferSuite) TestBufferData() {
|
func (s *L0WriteBufferSuite) TestBufferData() {
|
||||||
s.Run("normal_run", func() {
|
s.Run("normal_run", func() {
|
||||||
wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
|
wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
|
||||||
idAllocator: s.allocator,
|
idAllocator: s.allocator,
|
||||||
})
|
})
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
@ -202,7 +197,7 @@ func (s *L0WriteBufferSuite) TestBufferData() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
s.Run("pk_type_not_match", func() {
|
s.Run("pk_type_not_match", func() {
|
||||||
wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
|
wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
|
||||||
idAllocator: s.allocator,
|
idAllocator: s.allocator,
|
||||||
})
|
})
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
@ -225,7 +220,7 @@ func (s *L0WriteBufferSuite) TestCreateFailure() {
|
|||||||
metacache := metacache.NewMockMetaCache(s.T())
|
metacache := metacache.NewMockMetaCache(s.T())
|
||||||
metacache.EXPECT().Collection().Return(s.collID)
|
metacache.EXPECT().Collection().Return(s.collID)
|
||||||
metacache.EXPECT().Schema().Return(&schemapb.CollectionSchema{})
|
metacache.EXPECT().Schema().Return(&schemapb.CollectionSchema{})
|
||||||
_, err := NewL0WriteBuffer(s.channelName, metacache, s.storageCache, s.syncMgr, &writeBufferOption{
|
_, err := NewL0WriteBuffer(s.channelName, metacache, s.syncMgr, &writeBufferOption{
|
||||||
idAllocator: s.allocator,
|
idAllocator: s.allocator,
|
||||||
})
|
})
|
||||||
s.Error(err)
|
s.Error(err)
|
||||||
|
|||||||
@ -23,7 +23,7 @@ import (
|
|||||||
//go:generate mockery --name=BufferManager --structname=MockBufferManager --output=./ --filename=mock_manager.go --with-expecter --inpackage
|
//go:generate mockery --name=BufferManager --structname=MockBufferManager --output=./ --filename=mock_manager.go --with-expecter --inpackage
|
||||||
type BufferManager interface {
|
type BufferManager interface {
|
||||||
// Register adds a WriteBuffer with provided schema & options.
|
// Register adds a WriteBuffer with provided schema & options.
|
||||||
Register(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error
|
Register(channel string, metacache metacache.MetaCache, opts ...WriteBufferOption) error
|
||||||
// SealSegments notifies writeBuffer corresponding to provided channel to seal segments.
|
// SealSegments notifies writeBuffer corresponding to provided channel to seal segments.
|
||||||
// which will cause segment start flush procedure.
|
// which will cause segment start flush procedure.
|
||||||
SealSegments(ctx context.Context, channel string, segmentIDs []int64) error
|
SealSegments(ctx context.Context, channel string, segmentIDs []int64) error
|
||||||
@ -140,7 +140,7 @@ func (m *bufferManager) Stop() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Register a new WriteBuffer for channel.
|
// Register a new WriteBuffer for channel.
|
||||||
func (m *bufferManager) Register(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error {
|
func (m *bufferManager) Register(channel string, metacache metacache.MetaCache, opts ...WriteBufferOption) error {
|
||||||
m.mut.Lock()
|
m.mut.Lock()
|
||||||
defer m.mut.Unlock()
|
defer m.mut.Unlock()
|
||||||
|
|
||||||
@ -148,7 +148,7 @@ func (m *bufferManager) Register(channel string, metacache metacache.MetaCache,
|
|||||||
if ok {
|
if ok {
|
||||||
return merr.WrapErrChannelReduplicate(channel)
|
return merr.WrapErrChannelReduplicate(channel)
|
||||||
}
|
}
|
||||||
buf, err := NewWriteBuffer(channel, metacache, storageV2Cache, m.syncMgr, opts...)
|
buf, err := NewWriteBuffer(channel, metacache, m.syncMgr, opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@ -73,13 +73,10 @@ func (s *ManagerSuite) SetupTest() {
|
|||||||
func (s *ManagerSuite) TestRegister() {
|
func (s *ManagerSuite) TestRegister() {
|
||||||
manager := s.manager
|
manager := s.manager
|
||||||
|
|
||||||
storageCache, err := metacache.NewStorageV2Cache(s.collSchema)
|
err := manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator))
|
||||||
s.Require().NoError(err)
|
|
||||||
|
|
||||||
err = manager.Register(s.channelName, s.metacache, storageCache, WithIDAllocator(s.allocator))
|
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
|
|
||||||
err = manager.Register(s.channelName, s.metacache, storageCache, WithIDAllocator(s.allocator))
|
err = manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator))
|
||||||
s.Error(err)
|
s.Error(err)
|
||||||
s.ErrorIs(err, merr.ErrChannelReduplicate)
|
s.ErrorIs(err, merr.ErrChannelReduplicate)
|
||||||
}
|
}
|
||||||
@ -183,9 +180,7 @@ func (s *ManagerSuite) TestRemoveChannel() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
s.Run("remove_channel", func() {
|
s.Run("remove_channel", func() {
|
||||||
storageCache, err := metacache.NewStorageV2Cache(s.collSchema)
|
err := manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator))
|
||||||
s.Require().NoError(err)
|
|
||||||
err = manager.Register(s.channelName, s.metacache, storageCache, WithIDAllocator(s.allocator))
|
|
||||||
s.Require().NoError(err)
|
s.Require().NoError(err)
|
||||||
|
|
||||||
s.NotPanics(func() {
|
s.NotPanics(func() {
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
// Code generated by mockery v2.30.1. DO NOT EDIT.
|
// Code generated by mockery v2.32.4. DO NOT EDIT.
|
||||||
|
|
||||||
package writebuffer
|
package writebuffer
|
||||||
|
|
||||||
@ -278,20 +278,20 @@ func (_c *MockBufferManager_NotifyCheckpointUpdated_Call) RunAndReturn(run func(
|
|||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|
||||||
// Register provides a mock function with given fields: channel, _a1, storageV2Cache, opts
|
// Register provides a mock function with given fields: channel, _a1, opts
|
||||||
func (_m *MockBufferManager) Register(channel string, _a1 metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error {
|
func (_m *MockBufferManager) Register(channel string, _a1 metacache.MetaCache, opts ...WriteBufferOption) error {
|
||||||
_va := make([]interface{}, len(opts))
|
_va := make([]interface{}, len(opts))
|
||||||
for _i := range opts {
|
for _i := range opts {
|
||||||
_va[_i] = opts[_i]
|
_va[_i] = opts[_i]
|
||||||
}
|
}
|
||||||
var _ca []interface{}
|
var _ca []interface{}
|
||||||
_ca = append(_ca, channel, _a1, storageV2Cache)
|
_ca = append(_ca, channel, _a1)
|
||||||
_ca = append(_ca, _va...)
|
_ca = append(_ca, _va...)
|
||||||
ret := _m.Called(_ca...)
|
ret := _m.Called(_ca...)
|
||||||
|
|
||||||
var r0 error
|
var r0 error
|
||||||
if rf, ok := ret.Get(0).(func(string, metacache.MetaCache, *metacache.StorageV2Cache, ...WriteBufferOption) error); ok {
|
if rf, ok := ret.Get(0).(func(string, metacache.MetaCache, ...WriteBufferOption) error); ok {
|
||||||
r0 = rf(channel, _a1, storageV2Cache, opts...)
|
r0 = rf(channel, _a1, opts...)
|
||||||
} else {
|
} else {
|
||||||
r0 = ret.Error(0)
|
r0 = ret.Error(0)
|
||||||
}
|
}
|
||||||
@ -307,22 +307,21 @@ type MockBufferManager_Register_Call struct {
|
|||||||
// Register is a helper method to define mock.On call
|
// Register is a helper method to define mock.On call
|
||||||
// - channel string
|
// - channel string
|
||||||
// - _a1 metacache.MetaCache
|
// - _a1 metacache.MetaCache
|
||||||
// - storageV2Cache *metacache.StorageV2Cache
|
|
||||||
// - opts ...WriteBufferOption
|
// - opts ...WriteBufferOption
|
||||||
func (_e *MockBufferManager_Expecter) Register(channel interface{}, _a1 interface{}, storageV2Cache interface{}, opts ...interface{}) *MockBufferManager_Register_Call {
|
func (_e *MockBufferManager_Expecter) Register(channel interface{}, _a1 interface{}, opts ...interface{}) *MockBufferManager_Register_Call {
|
||||||
return &MockBufferManager_Register_Call{Call: _e.mock.On("Register",
|
return &MockBufferManager_Register_Call{Call: _e.mock.On("Register",
|
||||||
append([]interface{}{channel, _a1, storageV2Cache}, opts...)...)}
|
append([]interface{}{channel, _a1}, opts...)...)}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (_c *MockBufferManager_Register_Call) Run(run func(channel string, _a1 metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption)) *MockBufferManager_Register_Call {
|
func (_c *MockBufferManager_Register_Call) Run(run func(channel string, _a1 metacache.MetaCache, opts ...WriteBufferOption)) *MockBufferManager_Register_Call {
|
||||||
_c.Call.Run(func(args mock.Arguments) {
|
_c.Call.Run(func(args mock.Arguments) {
|
||||||
variadicArgs := make([]WriteBufferOption, len(args)-3)
|
variadicArgs := make([]WriteBufferOption, len(args)-2)
|
||||||
for i, a := range args[3:] {
|
for i, a := range args[2:] {
|
||||||
if a != nil {
|
if a != nil {
|
||||||
variadicArgs[i] = a.(WriteBufferOption)
|
variadicArgs[i] = a.(WriteBufferOption)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
run(args[0].(string), args[1].(metacache.MetaCache), args[2].(*metacache.StorageV2Cache), variadicArgs...)
|
run(args[0].(string), args[1].(metacache.MetaCache), variadicArgs...)
|
||||||
})
|
})
|
||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
@ -332,7 +331,7 @@ func (_c *MockBufferManager_Register_Call) Return(_a0 error) *MockBufferManager_
|
|||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|
||||||
func (_c *MockBufferManager_Register_Call) RunAndReturn(run func(string, metacache.MetaCache, *metacache.StorageV2Cache, ...WriteBufferOption) error) *MockBufferManager_Register_Call {
|
func (_c *MockBufferManager_Register_Call) RunAndReturn(run func(string, metacache.MetaCache, ...WriteBufferOption) error) *MockBufferManager_Register_Call {
|
||||||
_c.Call.Return(run)
|
_c.Call.Return(run)
|
||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|||||||
@ -16,7 +16,6 @@ import (
|
|||||||
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
|
||||||
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
"github.com/milvus-io/milvus/internal/flushcommon/syncmgr"
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
"github.com/milvus-io/milvus/pkg/metrics"
|
"github.com/milvus-io/milvus/pkg/metrics"
|
||||||
@ -100,7 +99,7 @@ func (c *checkpointCandidates) GetEarliestWithDefault(def *checkpointCandidate)
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewWriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, opts ...WriteBufferOption) (WriteBuffer, error) {
|
func NewWriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, opts ...WriteBufferOption) (WriteBuffer, error) {
|
||||||
option := defaultWBOption(metacache)
|
option := defaultWBOption(metacache)
|
||||||
for _, opt := range opts {
|
for _, opt := range opts {
|
||||||
opt(option)
|
opt(option)
|
||||||
@ -108,9 +107,9 @@ func NewWriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cach
|
|||||||
|
|
||||||
switch option.deletePolicy {
|
switch option.deletePolicy {
|
||||||
case DeletePolicyBFPkOracle:
|
case DeletePolicyBFPkOracle:
|
||||||
return NewBFWriteBuffer(channel, metacache, storageV2Cache, syncMgr, option)
|
return NewBFWriteBuffer(channel, metacache, syncMgr, option)
|
||||||
case DeletePolicyL0Delta:
|
case DeletePolicyL0Delta:
|
||||||
return NewL0WriteBuffer(channel, metacache, storageV2Cache, syncMgr, option)
|
return NewL0WriteBuffer(channel, metacache, syncMgr, option)
|
||||||
default:
|
default:
|
||||||
return nil, merr.WrapErrParameterInvalid("valid delete policy config", option.deletePolicy)
|
return nil, merr.WrapErrParameterInvalid("valid delete policy config", option.deletePolicy)
|
||||||
}
|
}
|
||||||
@ -140,34 +139,23 @@ type writeBufferBase struct {
|
|||||||
checkpoint *msgpb.MsgPosition
|
checkpoint *msgpb.MsgPosition
|
||||||
flushTimestamp *atomic.Uint64
|
flushTimestamp *atomic.Uint64
|
||||||
|
|
||||||
storagev2Cache *metacache.StorageV2Cache
|
|
||||||
|
|
||||||
// pre build logger
|
// pre build logger
|
||||||
logger *log.MLogger
|
logger *log.MLogger
|
||||||
cpRatedLogger *log.MLogger
|
cpRatedLogger *log.MLogger
|
||||||
}
|
}
|
||||||
|
|
||||||
func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (*writeBufferBase, error) {
|
func newWriteBufferBase(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (*writeBufferBase, error) {
|
||||||
flushTs := atomic.NewUint64(nonFlushTS)
|
flushTs := atomic.NewUint64(nonFlushTS)
|
||||||
flushTsPolicy := GetFlushTsPolicy(flushTs, metacache)
|
flushTsPolicy := GetFlushTsPolicy(flushTs, metacache)
|
||||||
option.syncPolicies = append(option.syncPolicies, flushTsPolicy)
|
option.syncPolicies = append(option.syncPolicies, flushTsPolicy)
|
||||||
|
|
||||||
var serializer syncmgr.Serializer
|
var serializer syncmgr.Serializer
|
||||||
var err error
|
var err error
|
||||||
if params.Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
serializer, err = syncmgr.NewStorageSerializer(
|
||||||
serializer, err = syncmgr.NewStorageV2Serializer(
|
option.idAllocator,
|
||||||
storageV2Cache,
|
metacache,
|
||||||
option.idAllocator,
|
option.metaWriter,
|
||||||
metacache,
|
)
|
||||||
option.metaWriter,
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
serializer, err = syncmgr.NewStorageSerializer(
|
|
||||||
option.idAllocator,
|
|
||||||
metacache,
|
|
||||||
option.metaWriter,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -201,7 +189,6 @@ func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2
|
|||||||
syncCheckpoint: newCheckpointCandiates(),
|
syncCheckpoint: newCheckpointCandiates(),
|
||||||
syncPolicies: option.syncPolicies,
|
syncPolicies: option.syncPolicies,
|
||||||
flushTimestamp: flushTs,
|
flushTimestamp: flushTs,
|
||||||
storagev2Cache: storageV2Cache,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
wb.logger = log.With(zap.Int64("collectionID", wb.collectionID),
|
wb.logger = log.With(zap.Int64("collectionID", wb.collectionID),
|
||||||
@ -660,8 +647,6 @@ func (wb *writeBufferBase) Close(ctx context.Context, drop bool) {
|
|||||||
switch t := syncTask.(type) {
|
switch t := syncTask.(type) {
|
||||||
case *syncmgr.SyncTask:
|
case *syncmgr.SyncTask:
|
||||||
t.WithDrop()
|
t.WithDrop()
|
||||||
case *syncmgr.SyncTaskV2:
|
|
||||||
t.WithDrop()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
f := wb.syncMgr.SyncData(ctx, syncTask, func(err error) error {
|
f := wb.syncMgr.SyncData(ctx, syncTask, func(err error) error {
|
||||||
|
|||||||
@ -22,13 +22,12 @@ import (
|
|||||||
|
|
||||||
type WriteBufferSuite struct {
|
type WriteBufferSuite struct {
|
||||||
suite.Suite
|
suite.Suite
|
||||||
collID int64
|
collID int64
|
||||||
channelName string
|
channelName string
|
||||||
collSchema *schemapb.CollectionSchema
|
collSchema *schemapb.CollectionSchema
|
||||||
wb *writeBufferBase
|
wb *writeBufferBase
|
||||||
syncMgr *syncmgr.MockSyncManager
|
syncMgr *syncmgr.MockSyncManager
|
||||||
metacache *metacache.MockMetaCache
|
metacache *metacache.MockMetaCache
|
||||||
storageCache *metacache.StorageV2Cache
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *WriteBufferSuite) SetupSuite() {
|
func (s *WriteBufferSuite) SetupSuite() {
|
||||||
@ -47,14 +46,12 @@ func (s *WriteBufferSuite) SetupSuite() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *WriteBufferSuite) SetupTest() {
|
func (s *WriteBufferSuite) SetupTest() {
|
||||||
storageCache, err := metacache.NewStorageV2Cache(s.collSchema)
|
|
||||||
s.Require().NoError(err)
|
|
||||||
s.storageCache = storageCache
|
|
||||||
s.syncMgr = syncmgr.NewMockSyncManager(s.T())
|
s.syncMgr = syncmgr.NewMockSyncManager(s.T())
|
||||||
s.metacache = metacache.NewMockMetaCache(s.T())
|
s.metacache = metacache.NewMockMetaCache(s.T())
|
||||||
s.metacache.EXPECT().Schema().Return(s.collSchema).Maybe()
|
s.metacache.EXPECT().Schema().Return(s.collSchema).Maybe()
|
||||||
s.metacache.EXPECT().Collection().Return(s.collID).Maybe()
|
s.metacache.EXPECT().Collection().Return(s.collID).Maybe()
|
||||||
s.wb, err = newWriteBufferBase(s.channelName, s.metacache, storageCache, s.syncMgr, &writeBufferOption{
|
var err error
|
||||||
|
s.wb, err = newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
|
||||||
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
|
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
|
||||||
return metacache.NewBloomFilterSet()
|
return metacache.NewBloomFilterSet()
|
||||||
},
|
},
|
||||||
@ -66,7 +63,7 @@ func (s *WriteBufferSuite) TestDefaultOption() {
|
|||||||
s.Run("default BFPkOracle", func() {
|
s.Run("default BFPkOracle", func() {
|
||||||
paramtable.Get().Save(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key, "false")
|
paramtable.Get().Save(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key, "false")
|
||||||
defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key)
|
defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key)
|
||||||
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr)
|
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr)
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
_, ok := wb.(*bfWriteBuffer)
|
_, ok := wb.(*bfWriteBuffer)
|
||||||
s.True(ok)
|
s.True(ok)
|
||||||
@ -75,7 +72,7 @@ func (s *WriteBufferSuite) TestDefaultOption() {
|
|||||||
s.Run("default L0Delta policy", func() {
|
s.Run("default L0Delta policy", func() {
|
||||||
paramtable.Get().Save(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key, "true")
|
paramtable.Get().Save(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key, "true")
|
||||||
defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key)
|
defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.EnableLevelZeroSegment.Key)
|
||||||
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithIDAllocator(allocator.NewMockGIDAllocator()))
|
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithIDAllocator(allocator.NewMockGIDAllocator()))
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
_, ok := wb.(*l0WriteBuffer)
|
_, ok := wb.(*l0WriteBuffer)
|
||||||
s.True(ok)
|
s.True(ok)
|
||||||
@ -83,18 +80,18 @@ func (s *WriteBufferSuite) TestDefaultOption() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *WriteBufferSuite) TestWriteBufferType() {
|
func (s *WriteBufferSuite) TestWriteBufferType() {
|
||||||
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle))
|
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle))
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
|
|
||||||
_, ok := wb.(*bfWriteBuffer)
|
_, ok := wb.(*bfWriteBuffer)
|
||||||
s.True(ok)
|
s.True(ok)
|
||||||
|
|
||||||
wb, err = NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(DeletePolicyL0Delta), WithIDAllocator(allocator.NewMockGIDAllocator()))
|
wb, err = NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyL0Delta), WithIDAllocator(allocator.NewMockGIDAllocator()))
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
_, ok = wb.(*l0WriteBuffer)
|
_, ok = wb.(*l0WriteBuffer)
|
||||||
s.True(ok)
|
s.True(ok)
|
||||||
|
|
||||||
_, err = NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(""))
|
_, err = NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(""))
|
||||||
s.Error(err)
|
s.Error(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,7 +111,7 @@ func (s *WriteBufferSuite) TestFlushSegments() {
|
|||||||
s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything, mock.Anything).Return()
|
s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything, mock.Anything).Return()
|
||||||
s.metacache.EXPECT().GetSegmentByID(mock.Anything, mock.Anything, mock.Anything).Return(nil, true)
|
s.metacache.EXPECT().GetSegmentByID(mock.Anything, mock.Anything, mock.Anything).Return(nil, true)
|
||||||
|
|
||||||
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.storageCache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle))
|
wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle))
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
|
|
||||||
err = wb.SealSegments(context.Background(), []int64{segmentID})
|
err = wb.SealSegments(context.Background(), []int64{segmentID})
|
||||||
@ -265,7 +262,7 @@ func (s *WriteBufferSuite) TestGetCheckpoint() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *WriteBufferSuite) TestSyncSegmentsError() {
|
func (s *WriteBufferSuite) TestSyncSegmentsError() {
|
||||||
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
|
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
|
||||||
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
|
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
|
||||||
return metacache.NewBloomFilterSet()
|
return metacache.NewBloomFilterSet()
|
||||||
},
|
},
|
||||||
@ -298,7 +295,7 @@ func (s *WriteBufferSuite) TestSyncSegmentsError() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *WriteBufferSuite) TestEvictBuffer() {
|
func (s *WriteBufferSuite) TestEvictBuffer() {
|
||||||
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
|
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
|
||||||
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
|
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
|
||||||
return metacache.NewBloomFilterSet()
|
return metacache.NewBloomFilterSet()
|
||||||
},
|
},
|
||||||
@ -367,7 +364,7 @@ func (s *WriteBufferSuite) TestEvictBuffer() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *WriteBufferSuite) TestDropPartitions() {
|
func (s *WriteBufferSuite) TestDropPartitions() {
|
||||||
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.storageCache, s.syncMgr, &writeBufferOption{
|
wb, err := newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{
|
||||||
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
|
pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet {
|
||||||
return metacache.NewBloomFilterSet()
|
return metacache.NewBloomFilterSet()
|
||||||
},
|
},
|
||||||
|
|||||||
@ -97,12 +97,7 @@ func (i *IndexNode) CreateJob(ctx context.Context, req *indexpb.CreateJobRequest
|
|||||||
metrics.IndexNodeBuildIndexTaskCounter.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.FailLabel).Inc()
|
metrics.IndexNodeBuildIndexTaskCounter.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.FailLabel).Inc()
|
||||||
return merr.Status(err), nil
|
return merr.Status(err), nil
|
||||||
}
|
}
|
||||||
var task task
|
task := newIndexBuildTask(taskCtx, taskCancel, req, cm, i)
|
||||||
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
|
||||||
task = newIndexBuildTaskV2(taskCtx, taskCancel, req, i)
|
|
||||||
} else {
|
|
||||||
task = newIndexBuildTask(taskCtx, taskCancel, req, cm, i)
|
|
||||||
}
|
|
||||||
ret := merr.Success()
|
ret := merr.Success()
|
||||||
if err := i.sched.TaskQueue.Enqueue(task); err != nil {
|
if err := i.sched.TaskQueue.Enqueue(task); err != nil {
|
||||||
log.Warn("IndexNode failed to schedule",
|
log.Warn("IndexNode failed to schedule",
|
||||||
@ -327,12 +322,7 @@ func (i *IndexNode) CreateJobV2(ctx context.Context, req *indexpb.CreateJobV2Req
|
|||||||
metrics.IndexNodeBuildIndexTaskCounter.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.FailLabel).Inc()
|
metrics.IndexNodeBuildIndexTaskCounter.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.FailLabel).Inc()
|
||||||
return merr.Status(err), nil
|
return merr.Status(err), nil
|
||||||
}
|
}
|
||||||
var task task
|
task := newIndexBuildTask(taskCtx, taskCancel, indexRequest, cm, i)
|
||||||
if Params.CommonCfg.EnableStorageV2.GetAsBool() {
|
|
||||||
task = newIndexBuildTaskV2(taskCtx, taskCancel, indexRequest, i)
|
|
||||||
} else {
|
|
||||||
task = newIndexBuildTask(taskCtx, taskCancel, indexRequest, cm, i)
|
|
||||||
}
|
|
||||||
ret := merr.Success()
|
ret := merr.Success()
|
||||||
if err := i.sched.TaskQueue.Enqueue(task); err != nil {
|
if err := i.sched.TaskQueue.Enqueue(task); err != nil {
|
||||||
log.Warn("IndexNode failed to schedule",
|
log.Warn("IndexNode failed to schedule",
|
||||||
|
|||||||
@ -43,187 +43,6 @@ import (
|
|||||||
"github.com/milvus-io/milvus/pkg/util/timerecord"
|
"github.com/milvus-io/milvus/pkg/util/timerecord"
|
||||||
)
|
)
|
||||||
|
|
||||||
type indexBuildTaskV2 struct {
|
|
||||||
*indexBuildTask
|
|
||||||
}
|
|
||||||
|
|
||||||
func newIndexBuildTaskV2(ctx context.Context,
|
|
||||||
cancel context.CancelFunc,
|
|
||||||
req *indexpb.CreateJobRequest,
|
|
||||||
node *IndexNode,
|
|
||||||
) *indexBuildTaskV2 {
|
|
||||||
t := &indexBuildTaskV2{
|
|
||||||
indexBuildTask: &indexBuildTask{
|
|
||||||
ident: fmt.Sprintf("%s/%d", req.GetClusterID(), req.GetBuildID()),
|
|
||||||
cancel: cancel,
|
|
||||||
ctx: ctx,
|
|
||||||
req: req,
|
|
||||||
tr: timerecord.NewTimeRecorder(fmt.Sprintf("IndexBuildID: %d, ClusterID: %s", req.GetBuildID(), req.GetClusterID())),
|
|
||||||
node: node,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
t.parseParams()
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func (it *indexBuildTaskV2) parseParams() {
|
|
||||||
// fill field for requests before v2.5.0
|
|
||||||
if it.req.GetField() == nil || it.req.GetField().GetDataType() == schemapb.DataType_None {
|
|
||||||
it.req.Field = &schemapb.FieldSchema{
|
|
||||||
FieldID: it.req.GetFieldID(),
|
|
||||||
Name: it.req.GetFieldName(),
|
|
||||||
DataType: it.req.GetFieldType(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (it *indexBuildTaskV2) Execute(ctx context.Context) error {
|
|
||||||
log := log.Ctx(ctx).With(zap.String("clusterID", it.req.GetClusterID()), zap.Int64("buildID", it.req.GetBuildID()),
|
|
||||||
zap.Int64("collection", it.req.GetCollectionID()), zap.Int64("segmentID", it.req.GetSegmentID()),
|
|
||||||
zap.Int32("currentIndexVersion", it.req.GetCurrentIndexVersion()))
|
|
||||||
|
|
||||||
indexType := it.newIndexParams[common.IndexTypeKey]
|
|
||||||
if indexType == indexparamcheck.IndexDISKANN {
|
|
||||||
// check index node support disk index
|
|
||||||
if !Params.IndexNodeCfg.EnableDisk.GetAsBool() {
|
|
||||||
log.Warn("IndexNode don't support build disk index",
|
|
||||||
zap.String("index type", it.newIndexParams[common.IndexTypeKey]),
|
|
||||||
zap.Bool("enable disk", Params.IndexNodeCfg.EnableDisk.GetAsBool()))
|
|
||||||
return merr.WrapErrIndexNotSupported("disk index")
|
|
||||||
}
|
|
||||||
|
|
||||||
// check load size and size of field data
|
|
||||||
localUsedSize, err := indexcgowrapper.GetLocalUsedSize(paramtable.Get().LocalStorageCfg.Path.GetValue())
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("IndexNode get local used size failed")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
fieldDataSize, err := estimateFieldDataSize(it.req.GetDim(), it.req.GetNumRows(), it.req.GetField().GetDataType())
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("IndexNode get local used size failed")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
usedLocalSizeWhenBuild := int64(float64(fieldDataSize)*diskUsageRatio) + localUsedSize
|
|
||||||
maxUsedLocalSize := int64(Params.IndexNodeCfg.DiskCapacityLimit.GetAsFloat() * Params.IndexNodeCfg.MaxDiskUsagePercentage.GetAsFloat())
|
|
||||||
|
|
||||||
if usedLocalSizeWhenBuild > maxUsedLocalSize {
|
|
||||||
log.Warn("IndexNode don't has enough disk size to build disk ann index",
|
|
||||||
zap.Int64("usedLocalSizeWhenBuild", usedLocalSizeWhenBuild),
|
|
||||||
zap.Int64("maxUsedLocalSize", maxUsedLocalSize))
|
|
||||||
return merr.WrapErrServiceDiskLimitExceeded(float32(usedLocalSizeWhenBuild), float32(maxUsedLocalSize))
|
|
||||||
}
|
|
||||||
|
|
||||||
err = indexparams.SetDiskIndexBuildParams(it.newIndexParams, int64(fieldDataSize))
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("failed to fill disk index params", zap.Error(err))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
storageConfig := &indexcgopb.StorageConfig{
|
|
||||||
Address: it.req.GetStorageConfig().GetAddress(),
|
|
||||||
AccessKeyID: it.req.GetStorageConfig().GetAccessKeyID(),
|
|
||||||
SecretAccessKey: it.req.GetStorageConfig().GetSecretAccessKey(),
|
|
||||||
UseSSL: it.req.GetStorageConfig().GetUseSSL(),
|
|
||||||
BucketName: it.req.GetStorageConfig().GetBucketName(),
|
|
||||||
RootPath: it.req.GetStorageConfig().GetRootPath(),
|
|
||||||
UseIAM: it.req.GetStorageConfig().GetUseIAM(),
|
|
||||||
IAMEndpoint: it.req.GetStorageConfig().GetIAMEndpoint(),
|
|
||||||
StorageType: it.req.GetStorageConfig().GetStorageType(),
|
|
||||||
UseVirtualHost: it.req.GetStorageConfig().GetUseVirtualHost(),
|
|
||||||
Region: it.req.GetStorageConfig().GetRegion(),
|
|
||||||
CloudProvider: it.req.GetStorageConfig().GetCloudProvider(),
|
|
||||||
RequestTimeoutMs: it.req.GetStorageConfig().GetRequestTimeoutMs(),
|
|
||||||
SslCACert: it.req.GetStorageConfig().GetSslCACert(),
|
|
||||||
}
|
|
||||||
|
|
||||||
optFields := make([]*indexcgopb.OptionalFieldInfo, 0, len(it.req.GetOptionalScalarFields()))
|
|
||||||
for _, optField := range it.req.GetOptionalScalarFields() {
|
|
||||||
optFields = append(optFields, &indexcgopb.OptionalFieldInfo{
|
|
||||||
FieldID: optField.GetFieldID(),
|
|
||||||
FieldName: optField.GetFieldName(),
|
|
||||||
FieldType: optField.GetFieldType(),
|
|
||||||
DataPaths: optField.GetDataPaths(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
buildIndexParams := &indexcgopb.BuildIndexInfo{
|
|
||||||
ClusterID: it.req.GetClusterID(),
|
|
||||||
BuildID: it.req.GetBuildID(),
|
|
||||||
CollectionID: it.req.GetCollectionID(),
|
|
||||||
PartitionID: it.req.GetPartitionID(),
|
|
||||||
SegmentID: it.req.GetSegmentID(),
|
|
||||||
IndexVersion: it.req.GetIndexVersion(),
|
|
||||||
CurrentIndexVersion: it.req.GetCurrentIndexVersion(),
|
|
||||||
NumRows: it.req.GetNumRows(),
|
|
||||||
Dim: it.req.GetDim(),
|
|
||||||
IndexFilePrefix: it.req.GetIndexFilePrefix(),
|
|
||||||
InsertFiles: it.req.GetDataPaths(),
|
|
||||||
FieldSchema: it.req.GetField(),
|
|
||||||
StorageConfig: storageConfig,
|
|
||||||
IndexParams: mapToKVPairs(it.newIndexParams),
|
|
||||||
TypeParams: mapToKVPairs(it.newTypeParams),
|
|
||||||
StorePath: it.req.GetStorePath(),
|
|
||||||
StoreVersion: it.req.GetStoreVersion(),
|
|
||||||
IndexStorePath: it.req.GetIndexStorePath(),
|
|
||||||
OptFields: optFields,
|
|
||||||
PartitionKeyIsolation: it.req.GetPartitionKeyIsolation(),
|
|
||||||
}
|
|
||||||
|
|
||||||
var err error
|
|
||||||
it.index, err = indexcgowrapper.CreateIndexV2(ctx, buildIndexParams)
|
|
||||||
if err != nil {
|
|
||||||
if it.index != nil && it.index.CleanLocalData() != nil {
|
|
||||||
log.Warn("failed to clean cached data on disk after build index failed")
|
|
||||||
}
|
|
||||||
log.Warn("failed to build index", zap.Error(err))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
buildIndexLatency := it.tr.RecordSpan()
|
|
||||||
metrics.IndexNodeKnowhereBuildIndexLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(buildIndexLatency.Milliseconds()))
|
|
||||||
|
|
||||||
log.Info("Successfully build index")
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (it *indexBuildTaskV2) PostExecute(ctx context.Context) error {
|
|
||||||
log := log.Ctx(ctx).With(zap.String("clusterID", it.req.GetClusterID()), zap.Int64("buildID", it.req.GetBuildID()),
|
|
||||||
zap.Int64("collection", it.req.GetCollectionID()), zap.Int64("segmentID", it.req.GetSegmentID()),
|
|
||||||
zap.Int32("currentIndexVersion", it.req.GetCurrentIndexVersion()))
|
|
||||||
|
|
||||||
gcIndex := func() {
|
|
||||||
if err := it.index.Delete(); err != nil {
|
|
||||||
log.Warn("IndexNode indexBuildTask Execute CIndexDelete failed", zap.Error(err))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
version, err := it.index.UpLoadV2()
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("failed to upload index", zap.Error(err))
|
|
||||||
gcIndex()
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
encodeIndexFileDur := it.tr.Record("index serialize and upload done")
|
|
||||||
metrics.IndexNodeEncodeIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(encodeIndexFileDur.Seconds())
|
|
||||||
|
|
||||||
// early release index for gc, and we can ensure that Delete is idempotent.
|
|
||||||
gcIndex()
|
|
||||||
|
|
||||||
// use serialized size before encoding
|
|
||||||
var serializedSize uint64
|
|
||||||
saveFileKeys := make([]string, 0)
|
|
||||||
|
|
||||||
it.node.storeIndexFilesAndStatisticV2(it.req.GetClusterID(), it.req.GetBuildID(), saveFileKeys, serializedSize, it.req.GetCurrentIndexVersion(), version)
|
|
||||||
log.Debug("save index files done", zap.Strings("IndexFiles", saveFileKeys))
|
|
||||||
saveIndexFileDur := it.tr.RecordSpan()
|
|
||||||
metrics.IndexNodeSaveIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(saveIndexFileDur.Seconds())
|
|
||||||
it.tr.Elapse("index building all done")
|
|
||||||
log.Info("Successfully save index files")
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// IndexBuildTask is used to record the information of the index tasks.
|
// IndexBuildTask is used to record the information of the index tasks.
|
||||||
type indexBuildTask struct {
|
type indexBuildTask struct {
|
||||||
ident string
|
ident string
|
||||||
|
|||||||
@ -20,21 +20,14 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/apache/arrow/go/v12/arrow"
|
|
||||||
"github.com/apache/arrow/go/v12/arrow/array"
|
|
||||||
"github.com/apache/arrow/go/v12/arrow/memory"
|
|
||||||
"github.com/stretchr/testify/suite"
|
"github.com/stretchr/testify/suite"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/schema"
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/etcdpb"
|
"github.com/milvus-io/milvus/internal/proto/etcdpb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/internal/util/dependency"
|
"github.com/milvus-io/milvus/internal/util/dependency"
|
||||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/util/metautil"
|
"github.com/milvus-io/milvus/pkg/util/metautil"
|
||||||
"github.com/milvus-io/milvus/pkg/util/metric"
|
"github.com/milvus-io/milvus/pkg/util/metric"
|
||||||
@ -139,105 +132,6 @@ func TestIndexBuildTask(t *testing.T) {
|
|||||||
suite.Run(t, new(IndexBuildTaskSuite))
|
suite.Run(t, new(IndexBuildTaskSuite))
|
||||||
}
|
}
|
||||||
|
|
||||||
type IndexBuildTaskV2Suite struct {
|
|
||||||
suite.Suite
|
|
||||||
schema *schemapb.CollectionSchema
|
|
||||||
arrowSchema *arrow.Schema
|
|
||||||
space *milvus_storage.Space
|
|
||||||
}
|
|
||||||
|
|
||||||
func (suite *IndexBuildTaskV2Suite) SetupSuite() {
|
|
||||||
paramtable.Init()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (suite *IndexBuildTaskV2Suite) SetupTest() {
|
|
||||||
suite.schema = &schemapb.CollectionSchema{
|
|
||||||
Name: "test",
|
|
||||||
Description: "test",
|
|
||||||
AutoID: false,
|
|
||||||
Fields: []*schemapb.FieldSchema{
|
|
||||||
{FieldID: 1, Name: "pk", DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
||||||
{FieldID: 2, Name: "ts", DataType: schemapb.DataType_Int64},
|
|
||||||
{FieldID: 3, Name: "vec", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}}},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
var err error
|
|
||||||
suite.arrowSchema, err = typeutil.ConvertToArrowSchema(suite.schema.Fields)
|
|
||||||
suite.NoError(err)
|
|
||||||
|
|
||||||
tmpDir := suite.T().TempDir()
|
|
||||||
opt := options.NewSpaceOptionBuilder().
|
|
||||||
SetSchema(schema.NewSchema(
|
|
||||||
suite.arrowSchema,
|
|
||||||
&schema.SchemaOptions{
|
|
||||||
PrimaryColumn: "pk",
|
|
||||||
VectorColumn: "vec",
|
|
||||||
VersionColumn: "ts",
|
|
||||||
})).
|
|
||||||
Build()
|
|
||||||
suite.space, err = milvus_storage.Open("file://"+tmpDir, opt)
|
|
||||||
suite.NoError(err)
|
|
||||||
|
|
||||||
b := array.NewRecordBuilder(memory.DefaultAllocator, suite.arrowSchema)
|
|
||||||
defer b.Release()
|
|
||||||
b.Field(0).(*array.Int64Builder).AppendValues([]int64{1}, nil)
|
|
||||||
b.Field(1).(*array.Int64Builder).AppendValues([]int64{1}, nil)
|
|
||||||
fb := b.Field(2).(*array.FixedSizeBinaryBuilder)
|
|
||||||
fb.Reserve(1)
|
|
||||||
fb.Append([]byte{1, 2, 3, 4})
|
|
||||||
|
|
||||||
rec := b.NewRecord()
|
|
||||||
defer rec.Release()
|
|
||||||
reader, err := array.NewRecordReader(suite.arrowSchema, []arrow.Record{rec})
|
|
||||||
suite.NoError(err)
|
|
||||||
err = suite.space.Write(reader, &options.DefaultWriteOptions)
|
|
||||||
suite.NoError(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (suite *IndexBuildTaskV2Suite) TestBuildIndex() {
|
|
||||||
req := &indexpb.CreateJobRequest{
|
|
||||||
BuildID: 1,
|
|
||||||
IndexVersion: 1,
|
|
||||||
IndexID: 0,
|
|
||||||
IndexName: "",
|
|
||||||
IndexParams: []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: metric.L2}, {Key: common.DimKey, Value: "1"}},
|
|
||||||
TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}},
|
|
||||||
NumRows: 10,
|
|
||||||
StorageConfig: &indexpb.StorageConfig{
|
|
||||||
RootPath: "/tmp/milvus/data",
|
|
||||||
StorageType: "local",
|
|
||||||
},
|
|
||||||
CollectionID: 1,
|
|
||||||
PartitionID: 1,
|
|
||||||
SegmentID: 1,
|
|
||||||
FieldID: 3,
|
|
||||||
FieldName: "vec",
|
|
||||||
FieldType: schemapb.DataType_FloatVector,
|
|
||||||
StorePath: "file://" + suite.space.Path(),
|
|
||||||
StoreVersion: suite.space.GetCurrentVersion(),
|
|
||||||
IndexStorePath: "file://" + suite.space.Path(),
|
|
||||||
Dim: 4,
|
|
||||||
OptionalScalarFields: []*indexpb.OptionalFieldInfo{
|
|
||||||
{FieldID: 1, FieldName: "pk", FieldType: 5, DataIds: []int64{0}},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
task := newIndexBuildTaskV2(context.Background(), nil, req, NewIndexNode(context.Background(), dependency.NewDefaultFactory(true)))
|
|
||||||
|
|
||||||
var err error
|
|
||||||
err = task.PreExecute(context.Background())
|
|
||||||
suite.NoError(err)
|
|
||||||
err = task.Execute(context.Background())
|
|
||||||
suite.NoError(err)
|
|
||||||
err = task.PostExecute(context.Background())
|
|
||||||
suite.NoError(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIndexBuildTaskV2Suite(t *testing.T) {
|
|
||||||
suite.Run(t, new(IndexBuildTaskV2Suite))
|
|
||||||
}
|
|
||||||
|
|
||||||
type AnalyzeTaskSuite struct {
|
type AnalyzeTaskSuite struct {
|
||||||
suite.Suite
|
suite.Suite
|
||||||
schema *schemapb.CollectionSchema
|
schema *schemapb.CollectionSchema
|
||||||
|
|||||||
@ -222,13 +222,9 @@ func (li *LoadIndexInfo) appendIndexData(ctx context.Context, indexKeys []string
|
|||||||
|
|
||||||
var status C.CStatus
|
var status C.CStatus
|
||||||
GetLoadPool().Submit(func() (any, error) {
|
GetLoadPool().Submit(func() (any, error) {
|
||||||
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
|
traceCtx := ParseCTraceContext(ctx)
|
||||||
status = C.AppendIndexV3(li.cLoadIndexInfo)
|
status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo)
|
||||||
} else {
|
runtime.KeepAlive(traceCtx)
|
||||||
traceCtx := ParseCTraceContext(ctx)
|
|
||||||
status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo)
|
|
||||||
runtime.KeepAlive(traceCtx)
|
|
||||||
}
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}).Await()
|
}).Await()
|
||||||
|
|
||||||
@ -265,13 +261,9 @@ func (li *LoadIndexInfo) finish(ctx context.Context, info *cgopb.LoadIndexInfo)
|
|||||||
}
|
}
|
||||||
|
|
||||||
_, _ = GetLoadPool().Submit(func() (any, error) {
|
_, _ = GetLoadPool().Submit(func() (any, error) {
|
||||||
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
|
traceCtx := ParseCTraceContext(ctx)
|
||||||
status = C.AppendIndexV3(li.cLoadIndexInfo)
|
status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo)
|
||||||
} else {
|
runtime.KeepAlive(traceCtx)
|
||||||
traceCtx := ParseCTraceContext(ctx)
|
|
||||||
status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo)
|
|
||||||
runtime.KeepAlive(traceCtx)
|
|
||||||
}
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}).Await()
|
}).Await()
|
||||||
|
|
||||||
|
|||||||
@ -29,12 +29,10 @@ import "C"
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/apache/arrow/go/v12/arrow/array"
|
|
||||||
"github.com/cockroachdb/errors"
|
"github.com/cockroachdb/errors"
|
||||||
"go.opentelemetry.io/otel"
|
"go.opentelemetry.io/otel"
|
||||||
"go.uber.org/atomic"
|
"go.uber.org/atomic"
|
||||||
@ -44,8 +42,6 @@ import (
|
|||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/cgopb"
|
"github.com/milvus-io/milvus/internal/proto/cgopb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
@ -55,7 +51,6 @@ import (
|
|||||||
"github.com/milvus-io/milvus/internal/querynodev2/segments/state"
|
"github.com/milvus-io/milvus/internal/querynodev2/segments/state"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/internal/util/cgo"
|
"github.com/milvus-io/milvus/internal/util/cgo"
|
||||||
typeutil_internal "github.com/milvus-io/milvus/internal/util/typeutil"
|
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
"github.com/milvus-io/milvus/pkg/metrics"
|
"github.com/milvus-io/milvus/pkg/metrics"
|
||||||
@ -259,7 +254,6 @@ type LocalSegment struct {
|
|||||||
lastDeltaTimestamp *atomic.Uint64
|
lastDeltaTimestamp *atomic.Uint64
|
||||||
fields *typeutil.ConcurrentMap[int64, *FieldInfo]
|
fields *typeutil.ConcurrentMap[int64, *FieldInfo]
|
||||||
fieldIndexes *typeutil.ConcurrentMap[int64, *IndexedFieldInfo]
|
fieldIndexes *typeutil.ConcurrentMap[int64, *IndexedFieldInfo]
|
||||||
space *milvus_storage.Space
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewSegment(ctx context.Context,
|
func NewSegment(ctx context.Context,
|
||||||
@ -336,76 +330,6 @@ func NewSegment(ctx context.Context,
|
|||||||
return segment, nil
|
return segment, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewSegmentV2(
|
|
||||||
ctx context.Context,
|
|
||||||
collection *Collection,
|
|
||||||
segmentType SegmentType,
|
|
||||||
version int64,
|
|
||||||
loadInfo *querypb.SegmentLoadInfo,
|
|
||||||
) (Segment, error) {
|
|
||||||
/*
|
|
||||||
CSegmentInterface
|
|
||||||
NewSegment(CCollection collection, uint64_t segment_id, SegmentType seg_type);
|
|
||||||
*/
|
|
||||||
if loadInfo.GetLevel() == datapb.SegmentLevel_L0 {
|
|
||||||
return NewL0Segment(collection, segmentType, version, loadInfo)
|
|
||||||
}
|
|
||||||
base, err := newBaseSegment(collection, segmentType, version, loadInfo)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
var segmentPtr C.CSegmentInterface
|
|
||||||
var status C.CStatus
|
|
||||||
var locker *state.LoadStateLock
|
|
||||||
switch segmentType {
|
|
||||||
case SegmentTypeSealed:
|
|
||||||
status = C.NewSegment(collection.collectionPtr, C.Sealed, C.int64_t(loadInfo.GetSegmentID()), &segmentPtr)
|
|
||||||
locker = state.NewLoadStateLock(state.LoadStateOnlyMeta)
|
|
||||||
case SegmentTypeGrowing:
|
|
||||||
status = C.NewSegment(collection.collectionPtr, C.Growing, C.int64_t(loadInfo.GetSegmentID()), &segmentPtr)
|
|
||||||
locker = state.NewLoadStateLock(state.LoadStateDataLoaded)
|
|
||||||
default:
|
|
||||||
return nil, fmt.Errorf("illegal segment type %d when create segment %d", segmentType, loadInfo.GetSegmentID())
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := HandleCStatus(ctx, &status, "NewSegmentFailed"); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Info("create segment",
|
|
||||||
zap.Int64("collectionID", loadInfo.GetCollectionID()),
|
|
||||||
zap.Int64("partitionID", loadInfo.GetPartitionID()),
|
|
||||||
zap.Int64("segmentID", loadInfo.GetSegmentID()),
|
|
||||||
zap.String("segmentType", segmentType.String()))
|
|
||||||
|
|
||||||
url, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), loadInfo.GetSegmentID())
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
space, err := milvus_storage.Open(url, options.NewSpaceOptionBuilder().SetVersion(loadInfo.GetStorageVersion()).Build())
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
segment := &LocalSegment{
|
|
||||||
baseSegment: base,
|
|
||||||
ptrLock: locker,
|
|
||||||
ptr: segmentPtr,
|
|
||||||
lastDeltaTimestamp: atomic.NewUint64(0),
|
|
||||||
fields: typeutil.NewConcurrentMap[int64, *FieldInfo](),
|
|
||||||
fieldIndexes: typeutil.NewConcurrentMap[int64, *IndexedFieldInfo](),
|
|
||||||
space: space,
|
|
||||||
memSize: atomic.NewInt64(-1),
|
|
||||||
rowNum: atomic.NewInt64(-1),
|
|
||||||
insertCount: atomic.NewInt64(0),
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := segment.initializeSegment(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return segment, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *LocalSegment) initializeSegment() error {
|
func (s *LocalSegment) initializeSegment() error {
|
||||||
loadInfo := s.loadInfo.Load()
|
loadInfo := s.loadInfo.Load()
|
||||||
indexedFieldInfos, fieldBinlogs := separateIndexAndBinlog(loadInfo)
|
indexedFieldInfos, fieldBinlogs := separateIndexAndBinlog(loadInfo)
|
||||||
@ -932,18 +856,7 @@ func (s *LocalSegment) LoadMultiFieldData(ctx context.Context) error {
|
|||||||
|
|
||||||
var status C.CStatus
|
var status C.CStatus
|
||||||
GetLoadPool().Submit(func() (any, error) {
|
GetLoadPool().Submit(func() (any, error) {
|
||||||
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
|
status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
|
||||||
uri, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), s.ID())
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
loadFieldDataInfo.appendURI(uri)
|
|
||||||
loadFieldDataInfo.appendStorageVersion(s.space.GetCurrentVersion())
|
|
||||||
status = C.LoadFieldDataV2(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
|
|
||||||
} else {
|
|
||||||
status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
|
|
||||||
}
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}).Await()
|
}).Await()
|
||||||
if err := HandleCStatus(ctx, &status, "LoadMultiFieldData failed",
|
if err := HandleCStatus(ctx, &status, "LoadMultiFieldData failed",
|
||||||
@ -1019,18 +932,7 @@ func (s *LocalSegment) LoadFieldData(ctx context.Context, fieldID int64, rowCoun
|
|||||||
var status C.CStatus
|
var status C.CStatus
|
||||||
GetLoadPool().Submit(func() (any, error) {
|
GetLoadPool().Submit(func() (any, error) {
|
||||||
log.Info("submitted loadFieldData task to load pool")
|
log.Info("submitted loadFieldData task to load pool")
|
||||||
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
|
status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
|
||||||
uri, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), s.ID())
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
loadFieldDataInfo.appendURI(uri)
|
|
||||||
loadFieldDataInfo.appendStorageVersion(s.space.GetCurrentVersion())
|
|
||||||
status = C.LoadFieldDataV2(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
|
|
||||||
} else {
|
|
||||||
status = C.LoadFieldData(s.ptr, loadFieldDataInfo.cLoadFieldDataInfo)
|
|
||||||
}
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}).Await()
|
}).Await()
|
||||||
if err := HandleCStatus(ctx, &status, "LoadFieldData failed",
|
if err := HandleCStatus(ctx, &status, "LoadFieldData failed",
|
||||||
@ -1046,95 +948,6 @@ func (s *LocalSegment) LoadFieldData(ctx context.Context, fieldID int64, rowCoun
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *LocalSegment) LoadDeltaData2(ctx context.Context, schema *schemapb.CollectionSchema) error {
|
|
||||||
deleteReader, err := s.space.ScanDelete()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if !deleteReader.Schema().HasField(common.TimeStampFieldName) {
|
|
||||||
return fmt.Errorf("can not read timestamp field in space")
|
|
||||||
}
|
|
||||||
pkFieldSchema, err := typeutil.GetPrimaryFieldSchema(schema)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
ids := &schemapb.IDs{}
|
|
||||||
var pkint64s []int64
|
|
||||||
var pkstrings []string
|
|
||||||
var tss []int64
|
|
||||||
for deleteReader.Next() {
|
|
||||||
rec := deleteReader.Record()
|
|
||||||
indices := rec.Schema().FieldIndices(common.TimeStampFieldName)
|
|
||||||
tss = append(tss, rec.Column(indices[0]).(*array.Int64).Int64Values()...)
|
|
||||||
indices = rec.Schema().FieldIndices(pkFieldSchema.Name)
|
|
||||||
switch pkFieldSchema.DataType {
|
|
||||||
case schemapb.DataType_Int64:
|
|
||||||
pkint64s = append(pkint64s, rec.Column(indices[0]).(*array.Int64).Int64Values()...)
|
|
||||||
case schemapb.DataType_VarChar:
|
|
||||||
columnData := rec.Column(indices[0]).(*array.String)
|
|
||||||
for i := 0; i < columnData.Len(); i++ {
|
|
||||||
pkstrings = append(pkstrings, columnData.Value(i))
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("unknown data type %v", pkFieldSchema.DataType)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if err := deleteReader.Err(); err != nil && err != io.EOF {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
switch pkFieldSchema.DataType {
|
|
||||||
case schemapb.DataType_Int64:
|
|
||||||
ids.IdField = &schemapb.IDs_IntId{
|
|
||||||
IntId: &schemapb.LongArray{
|
|
||||||
Data: pkint64s,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
case schemapb.DataType_VarChar:
|
|
||||||
ids.IdField = &schemapb.IDs_StrId{
|
|
||||||
StrId: &schemapb.StringArray{
|
|
||||||
Data: pkstrings,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("unknown data type %v", pkFieldSchema.DataType)
|
|
||||||
}
|
|
||||||
|
|
||||||
idsBlob, err := proto.Marshal(ids)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(tss) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
loadInfo := C.CLoadDeletedRecordInfo{
|
|
||||||
timestamps: unsafe.Pointer(&tss[0]),
|
|
||||||
primary_keys: (*C.uint8_t)(unsafe.Pointer(&idsBlob[0])),
|
|
||||||
primary_keys_size: C.uint64_t(len(idsBlob)),
|
|
||||||
row_count: C.int64_t(len(tss)),
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
CStatus
|
|
||||||
LoadDeletedRecord(CSegmentInterface c_segment, CLoadDeletedRecordInfo deleted_record_info)
|
|
||||||
*/
|
|
||||||
var status C.CStatus
|
|
||||||
GetDynamicPool().Submit(func() (any, error) {
|
|
||||||
status = C.LoadDeletedRecord(s.ptr, loadInfo)
|
|
||||||
return nil, nil
|
|
||||||
}).Await()
|
|
||||||
|
|
||||||
if err := HandleCStatus(ctx, &status, "LoadDeletedRecord failed"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Info("load deleted record done",
|
|
||||||
zap.Int("rowNum", len(tss)),
|
|
||||||
zap.String("segmentType", s.Type().String()))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *LocalSegment) AddFieldDataInfo(ctx context.Context, rowCount int64, fields []*datapb.FieldBinlog) error {
|
func (s *LocalSegment) AddFieldDataInfo(ctx context.Context, rowCount int64, fields []*datapb.FieldBinlog) error {
|
||||||
if !s.ptrLock.RLockIf(state.IsNotReleased) {
|
if !s.ptrLock.RLockIf(state.IsNotReleased) {
|
||||||
return merr.WrapErrSegmentNotLoaded(s.ID(), "segment released")
|
return merr.WrapErrSegmentNotLoaded(s.ID(), "segment released")
|
||||||
@ -1331,13 +1144,6 @@ func (s *LocalSegment) LoadIndex(ctx context.Context, indexInfo *querypb.FieldIn
|
|||||||
IndexStoreVersion: indexInfo.GetIndexStoreVersion(),
|
IndexStoreVersion: indexInfo.GetIndexStoreVersion(),
|
||||||
}
|
}
|
||||||
|
|
||||||
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
|
|
||||||
uri, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), s.ID())
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
indexInfoProto.Uri = uri
|
|
||||||
}
|
|
||||||
newLoadIndexInfoSpan := tr.RecordSpan()
|
newLoadIndexInfoSpan := tr.RecordSpan()
|
||||||
|
|
||||||
// 2.
|
// 2.
|
||||||
|
|||||||
@ -20,7 +20,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/segcorepb"
|
"github.com/milvus-io/milvus/internal/proto/segcorepb"
|
||||||
@ -79,7 +78,6 @@ type Segment interface {
|
|||||||
Insert(ctx context.Context, rowIDs []int64, timestamps []typeutil.Timestamp, record *segcorepb.InsertRecord) error
|
Insert(ctx context.Context, rowIDs []int64, timestamps []typeutil.Timestamp, record *segcorepb.InsertRecord) error
|
||||||
Delete(ctx context.Context, primaryKeys []storage.PrimaryKey, timestamps []typeutil.Timestamp) error
|
Delete(ctx context.Context, primaryKeys []storage.PrimaryKey, timestamps []typeutil.Timestamp) error
|
||||||
LoadDeltaData(ctx context.Context, deltaData *storage.DeleteData) error
|
LoadDeltaData(ctx context.Context, deltaData *storage.DeleteData) error
|
||||||
LoadDeltaData2(ctx context.Context, schema *schemapb.CollectionSchema) error // storageV2
|
|
||||||
LastDeltaTimestamp() uint64
|
LastDeltaTimestamp() uint64
|
||||||
Release(ctx context.Context, opts ...releaseOption)
|
Release(ctx context.Context, opts ...releaseOption)
|
||||||
|
|
||||||
|
|||||||
@ -23,7 +23,6 @@ import (
|
|||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/segcorepb"
|
"github.com/milvus-io/milvus/internal/proto/segcorepb"
|
||||||
@ -161,10 +160,6 @@ func (s *L0Segment) LoadDeltaData(ctx context.Context, deltaData *storage.Delete
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *L0Segment) LoadDeltaData2(ctx context.Context, schema *schemapb.CollectionSchema) error {
|
|
||||||
return merr.WrapErrServiceInternal("not implemented")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *L0Segment) DeleteRecords() ([]storage.PrimaryKey, []uint64) {
|
func (s *L0Segment) DeleteRecords() ([]storage.PrimaryKey, []uint64) {
|
||||||
s.dataGuard.RLock()
|
s.dataGuard.RLock()
|
||||||
defer s.dataGuard.RUnlock()
|
defer s.dataGuard.RUnlock()
|
||||||
|
|||||||
@ -27,7 +27,6 @@ import "C"
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"path"
|
"path"
|
||||||
"runtime/debug"
|
"runtime/debug"
|
||||||
"strconv"
|
"strconv"
|
||||||
@ -43,14 +42,11 @@ import (
|
|||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||||
"github.com/milvus-io/milvus/internal/querynodev2/pkoracle"
|
"github.com/milvus-io/milvus/internal/querynodev2/pkoracle"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
typeutil_internal "github.com/milvus-io/milvus/internal/util/typeutil"
|
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
"github.com/milvus-io/milvus/pkg/metrics"
|
"github.com/milvus-io/milvus/pkg/metrics"
|
||||||
@ -126,406 +122,6 @@ type resourceEstimateFactor struct {
|
|||||||
deltaDataExpansionFactor float64
|
deltaDataExpansionFactor float64
|
||||||
}
|
}
|
||||||
|
|
||||||
type segmentLoaderV2 struct {
|
|
||||||
*segmentLoader
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewLoaderV2(
|
|
||||||
manager *Manager,
|
|
||||||
cm storage.ChunkManager,
|
|
||||||
) *segmentLoaderV2 {
|
|
||||||
return &segmentLoaderV2{
|
|
||||||
segmentLoader: NewLoader(manager, cm),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (loader *segmentLoaderV2) LoadDelta(ctx context.Context, collectionID int64, segment Segment) error {
|
|
||||||
collection := loader.manager.Collection.Get(collectionID)
|
|
||||||
if collection == nil {
|
|
||||||
err := merr.WrapErrCollectionNotFound(collectionID)
|
|
||||||
log.Warn("failed to get collection while loading delta", zap.Error(err))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return segment.LoadDeltaData2(ctx, collection.Schema())
|
|
||||||
}
|
|
||||||
|
|
||||||
func (loader *segmentLoaderV2) Load(ctx context.Context,
|
|
||||||
collectionID int64,
|
|
||||||
segmentType SegmentType,
|
|
||||||
version int64,
|
|
||||||
segments ...*querypb.SegmentLoadInfo,
|
|
||||||
) ([]Segment, error) {
|
|
||||||
log := log.Ctx(ctx).With(
|
|
||||||
zap.Int64("collectionID", collectionID),
|
|
||||||
zap.String("segmentType", segmentType.String()),
|
|
||||||
)
|
|
||||||
|
|
||||||
if len(segments) == 0 {
|
|
||||||
log.Info("no segment to load")
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
// Filter out loaded & loading segments
|
|
||||||
infos := loader.prepare(ctx, segmentType, segments...)
|
|
||||||
defer loader.unregister(infos...)
|
|
||||||
|
|
||||||
log = log.With(
|
|
||||||
zap.Int64s("requestSegments", lo.Map(segments, func(s *querypb.SegmentLoadInfo, _ int) int64 { return s.GetSegmentID() })),
|
|
||||||
zap.Int64s("preparedSegments", lo.Map(infos, func(s *querypb.SegmentLoadInfo, _ int) int64 { return s.GetSegmentID() })),
|
|
||||||
)
|
|
||||||
|
|
||||||
// continue to wait other task done
|
|
||||||
log.Info("start loading...", zap.Int("segmentNum", len(segments)), zap.Int("afterFilter", len(infos)))
|
|
||||||
|
|
||||||
// Check memory & storage limit
|
|
||||||
requestResourceResult, err := loader.requestResource(ctx, infos...)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("request resource failed", zap.Error(err))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer loader.freeRequest(requestResourceResult.Resource)
|
|
||||||
|
|
||||||
newSegments := typeutil.NewConcurrentMap[int64, Segment]()
|
|
||||||
loaded := typeutil.NewConcurrentMap[int64, Segment]()
|
|
||||||
defer func() {
|
|
||||||
newSegments.Range(func(_ int64, s Segment) bool {
|
|
||||||
s.Release(context.Background())
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
debug.FreeOSMemory()
|
|
||||||
}()
|
|
||||||
|
|
||||||
for _, info := range infos {
|
|
||||||
loadInfo := info
|
|
||||||
|
|
||||||
collection := loader.manager.Collection.Get(loadInfo.GetCollectionID())
|
|
||||||
if collection == nil {
|
|
||||||
err := merr.WrapErrCollectionNotFound(loadInfo.GetCollectionID())
|
|
||||||
log.Warn("failed to get collection", zap.Error(err))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
segment, err := NewSegmentV2(ctx, collection, segmentType, version, loadInfo)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("load segment failed when create new segment",
|
|
||||||
zap.Int64("partitionID", loadInfo.GetPartitionID()),
|
|
||||||
zap.Int64("segmentID", loadInfo.GetSegmentID()),
|
|
||||||
zap.Error(err),
|
|
||||||
)
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
newSegments.Insert(loadInfo.GetSegmentID(), segment)
|
|
||||||
}
|
|
||||||
|
|
||||||
loadSegmentFunc := func(idx int) error {
|
|
||||||
loadInfo := infos[idx]
|
|
||||||
partitionID := loadInfo.PartitionID
|
|
||||||
segmentID := loadInfo.SegmentID
|
|
||||||
segment, _ := newSegments.Get(segmentID)
|
|
||||||
|
|
||||||
metrics.QueryNodeLoadSegmentConcurrency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), "LoadSegment").Inc()
|
|
||||||
defer metrics.QueryNodeLoadSegmentConcurrency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), "LoadSegment").Dec()
|
|
||||||
tr := timerecord.NewTimeRecorder("loadDurationPerSegment")
|
|
||||||
|
|
||||||
var err error
|
|
||||||
if loadInfo.GetLevel() == datapb.SegmentLevel_L0 {
|
|
||||||
err = loader.LoadDelta(ctx, collectionID, segment)
|
|
||||||
} else {
|
|
||||||
err = loader.LoadSegment(ctx, segment.(*LocalSegment), loadInfo)
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("load segment failed when load data into memory",
|
|
||||||
zap.Int64("partitionID", partitionID),
|
|
||||||
zap.Int64("segmentID", segmentID),
|
|
||||||
zap.Error(err),
|
|
||||||
)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
loader.manager.Segment.Put(ctx, segmentType, segment)
|
|
||||||
newSegments.GetAndRemove(segmentID)
|
|
||||||
loaded.Insert(segmentID, segment)
|
|
||||||
log.Info("load segment done", zap.Int64("segmentID", segmentID))
|
|
||||||
loader.notifyLoadFinish(loadInfo)
|
|
||||||
|
|
||||||
metrics.QueryNodeLoadSegmentLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(tr.ElapseSpan().Milliseconds()))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start to load,
|
|
||||||
// Make sure we can always benefit from concurrency, and not spawn too many idle goroutines
|
|
||||||
log.Info("start to load segments in parallel",
|
|
||||||
zap.Int("segmentNum", len(infos)),
|
|
||||||
zap.Int("concurrencyLevel", requestResourceResult.ConcurrencyLevel))
|
|
||||||
err = funcutil.ProcessFuncParallel(len(infos),
|
|
||||||
requestResourceResult.ConcurrencyLevel, loadSegmentFunc, "loadSegmentFunc")
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("failed to load some segments", zap.Error(err))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for all segments loaded
|
|
||||||
segmentIDs := lo.Map(segments, func(info *querypb.SegmentLoadInfo, _ int) int64 { return info.GetSegmentID() })
|
|
||||||
if err := loader.waitSegmentLoadDone(ctx, segmentType, segmentIDs, version); err != nil {
|
|
||||||
log.Warn("failed to wait the filtered out segments load done", zap.Error(err))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Info("all segment load done")
|
|
||||||
var result []Segment
|
|
||||||
loaded.Range(func(_ int64, s Segment) bool {
|
|
||||||
result = append(result, s)
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
return result, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (loader *segmentLoaderV2) LoadBloomFilterSet(ctx context.Context, collectionID int64, version int64, infos ...*querypb.SegmentLoadInfo) ([]*pkoracle.BloomFilterSet, error) {
|
|
||||||
log := log.Ctx(ctx).With(
|
|
||||||
zap.Int64("collectionID", collectionID),
|
|
||||||
zap.Int64s("segmentIDs", lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) int64 {
|
|
||||||
return info.GetSegmentID()
|
|
||||||
})),
|
|
||||||
)
|
|
||||||
|
|
||||||
segmentNum := len(infos)
|
|
||||||
if segmentNum == 0 {
|
|
||||||
log.Info("no segment to load")
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
collection := loader.manager.Collection.Get(collectionID)
|
|
||||||
if collection == nil {
|
|
||||||
err := merr.WrapErrCollectionNotFound(collectionID)
|
|
||||||
log.Warn("failed to get collection while loading segment", zap.Error(err))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Info("start loading remote...", zap.Int("segmentNum", segmentNum))
|
|
||||||
|
|
||||||
loadedBfs := typeutil.NewConcurrentSet[*pkoracle.BloomFilterSet]()
|
|
||||||
// TODO check memory for bf size
|
|
||||||
loadRemoteFunc := func(idx int) error {
|
|
||||||
loadInfo := infos[idx]
|
|
||||||
partitionID := loadInfo.PartitionID
|
|
||||||
segmentID := loadInfo.SegmentID
|
|
||||||
bfs := pkoracle.NewBloomFilterSet(segmentID, partitionID, commonpb.SegmentState_Sealed)
|
|
||||||
|
|
||||||
log.Info("loading bloom filter for remote...")
|
|
||||||
err := loader.loadBloomFilter(ctx, segmentID, bfs, loadInfo.StorageVersion)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("load remote segment bloom filter failed",
|
|
||||||
zap.Int64("partitionID", partitionID),
|
|
||||||
zap.Int64("segmentID", segmentID),
|
|
||||||
zap.Error(err),
|
|
||||||
)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
loadedBfs.Insert(bfs)
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
err := funcutil.ProcessFuncParallel(segmentNum, segmentNum, loadRemoteFunc, "loadRemoteFunc")
|
|
||||||
if err != nil {
|
|
||||||
// no partial success here
|
|
||||||
log.Warn("failed to load remote segment", zap.Error(err))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return loadedBfs.Collect(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (loader *segmentLoaderV2) loadBloomFilter(ctx context.Context, segmentID int64, bfs *pkoracle.BloomFilterSet,
|
|
||||||
storeVersion int64,
|
|
||||||
) error {
|
|
||||||
log := log.Ctx(ctx).With(
|
|
||||||
zap.Int64("segmentID", segmentID),
|
|
||||||
)
|
|
||||||
|
|
||||||
startTs := time.Now()
|
|
||||||
|
|
||||||
url, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), segmentID)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
space, err := milvus_storage.Open(url, options.NewSpaceOptionBuilder().SetVersion(storeVersion).Build())
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
statsBlobs := space.StatisticsBlobs()
|
|
||||||
blobs := []*storage.Blob{}
|
|
||||||
|
|
||||||
for _, statsBlob := range statsBlobs {
|
|
||||||
blob := make([]byte, statsBlob.Size)
|
|
||||||
_, err := space.ReadBlob(statsBlob.Name, blob)
|
|
||||||
if err != nil && err != io.EOF {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
blobs = append(blobs, &storage.Blob{Value: blob})
|
|
||||||
}
|
|
||||||
|
|
||||||
var stats []*storage.PrimaryKeyStats
|
|
||||||
|
|
||||||
stats, err = storage.DeserializeStats(blobs)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("failed to deserialize stats", zap.Error(err))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
var size uint
|
|
||||||
for _, stat := range stats {
|
|
||||||
pkStat := &storage.PkStatistics{
|
|
||||||
PkFilter: stat.BF,
|
|
||||||
MinPK: stat.MinPk,
|
|
||||||
MaxPK: stat.MaxPk,
|
|
||||||
}
|
|
||||||
size += stat.BF.Cap()
|
|
||||||
bfs.AddHistoricalStats(pkStat)
|
|
||||||
}
|
|
||||||
log.Info("Successfully load pk stats", zap.Duration("time", time.Since(startTs)), zap.Uint("size", size), zap.Int("BFNum", len(stats)))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (loader *segmentLoaderV2) LoadSegment(ctx context.Context,
|
|
||||||
seg Segment,
|
|
||||||
loadInfo *querypb.SegmentLoadInfo,
|
|
||||||
) (err error) {
|
|
||||||
segment := seg.(*LocalSegment)
|
|
||||||
// TODO: we should create a transaction-like api to load segment for segment interface,
|
|
||||||
// but not do many things in segment loader.
|
|
||||||
stateLockGuard, err := segment.StartLoadData()
|
|
||||||
// segment can not do load now.
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
// segment is already loaded.
|
|
||||||
// TODO: if stateLockGuard is nil, we should not call LoadSegment anymore.
|
|
||||||
// but current Load is not clear enough to do an actual state transition, keep previous logic to avoid introduced bug.
|
|
||||||
if stateLockGuard != nil {
|
|
||||||
stateLockGuard.Done(err)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
log := log.Ctx(ctx).With(
|
|
||||||
zap.Int64("collectionID", segment.Collection()),
|
|
||||||
zap.Int64("partitionID", segment.Partition()),
|
|
||||||
zap.String("shard", segment.Shard().VirtualName()),
|
|
||||||
zap.Int64("segmentID", segment.ID()),
|
|
||||||
)
|
|
||||||
log.Info("start loading segment files",
|
|
||||||
zap.Int64("rowNum", loadInfo.GetNumOfRows()),
|
|
||||||
zap.String("segmentType", segment.Type().String()))
|
|
||||||
|
|
||||||
collection := loader.manager.Collection.Get(segment.Collection())
|
|
||||||
if collection == nil {
|
|
||||||
err := merr.WrapErrCollectionNotFound(segment.Collection())
|
|
||||||
log.Warn("failed to get collection while loading segment", zap.Error(err))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// pkField := GetPkField(collection.Schema())
|
|
||||||
|
|
||||||
// TODO(xige-16): Optimize the data loading process and reduce data copying
|
|
||||||
// for now, there will be multiple copies in the process of data loading into segCore
|
|
||||||
defer debug.FreeOSMemory()
|
|
||||||
|
|
||||||
if segment.Type() == SegmentTypeSealed {
|
|
||||||
fieldsMap := typeutil.NewConcurrentMap[int64, *schemapb.FieldSchema]()
|
|
||||||
for _, field := range collection.Schema().GetFields() {
|
|
||||||
fieldsMap.Insert(field.FieldID, field)
|
|
||||||
}
|
|
||||||
// fieldID2IndexInfo := make(map[int64]*querypb.FieldIndexInfo)
|
|
||||||
indexedFieldInfos := make(map[int64]*IndexedFieldInfo)
|
|
||||||
for _, indexInfo := range loadInfo.IndexInfos {
|
|
||||||
if indexInfo.GetIndexStoreVersion() > 0 {
|
|
||||||
fieldID := indexInfo.FieldID
|
|
||||||
fieldInfo := &IndexedFieldInfo{
|
|
||||||
IndexInfo: indexInfo,
|
|
||||||
}
|
|
||||||
indexedFieldInfos[fieldID] = fieldInfo
|
|
||||||
fieldsMap.Remove(fieldID)
|
|
||||||
// fieldID2IndexInfo[fieldID] = indexInfo
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := segment.AddFieldDataInfo(ctx, loadInfo.GetNumOfRows(), loadInfo.GetBinlogPaths()); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Info("load fields...",
|
|
||||||
zap.Int("fieldNum", fieldsMap.Len()),
|
|
||||||
zap.Int64s("indexedFields", lo.Keys(indexedFieldInfos)),
|
|
||||||
)
|
|
||||||
|
|
||||||
schemaHelper, err := typeutil.CreateSchemaHelper(collection.Schema())
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
tr := timerecord.NewTimeRecorder("segmentLoader.LoadIndex")
|
|
||||||
if err := loader.loadFieldsIndex(ctx, schemaHelper, segment, loadInfo.GetNumOfRows(), indexedFieldInfos); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
metrics.QueryNodeLoadIndexLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(tr.ElapseSpan().Milliseconds()))
|
|
||||||
|
|
||||||
if err := loader.loadSealedSegmentFields(ctx, segment, fieldsMap, loadInfo.GetNumOfRows()); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// https://github.com/milvus-io/milvus/23654
|
|
||||||
// legacy entry num = 0
|
|
||||||
if err := loader.patchEntryNumber(ctx, segment, loadInfo); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if err := segment.LoadMultiFieldData(ctx); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// load statslog if it's growing segment
|
|
||||||
if segment.segmentType == SegmentTypeGrowing {
|
|
||||||
log.Info("loading statslog...")
|
|
||||||
// pkStatsBinlogs, logType := loader.filterPKStatsBinlogs(loadInfo.Statslogs, pkField.GetFieldID())
|
|
||||||
err := loader.loadBloomFilter(ctx, segment.ID(), segment.bloomFilterSet, loadInfo.StorageVersion)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Info("loading delta...")
|
|
||||||
return loader.LoadDelta(ctx, segment.Collection(), segment)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (loader *segmentLoaderV2) LoadLazySegment(ctx context.Context,
|
|
||||||
segment Segment,
|
|
||||||
loadInfo *querypb.SegmentLoadInfo,
|
|
||||||
) (err error) {
|
|
||||||
return merr.ErrOperationNotSupported
|
|
||||||
}
|
|
||||||
|
|
||||||
func (loader *segmentLoaderV2) loadSealedSegmentFields(ctx context.Context, segment *LocalSegment, fields *typeutil.ConcurrentMap[int64, *schemapb.FieldSchema], rowCount int64) error {
|
|
||||||
runningGroup, _ := errgroup.WithContext(ctx)
|
|
||||||
fields.Range(func(fieldID int64, field *schemapb.FieldSchema) bool {
|
|
||||||
runningGroup.Go(func() error {
|
|
||||||
return segment.LoadFieldData(ctx, fieldID, rowCount, nil, false)
|
|
||||||
})
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
|
|
||||||
err := runningGroup.Wait()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Ctx(ctx).Info("load field binlogs done for sealed segment",
|
|
||||||
zap.Int64("collection", segment.Collection()),
|
|
||||||
zap.Int64("segment", segment.ID()),
|
|
||||||
zap.String("segmentType", segment.Type().String()))
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewLoader(
|
func NewLoader(
|
||||||
manager *Manager,
|
manager *Manager,
|
||||||
cm storage.ChunkManager,
|
cm storage.ChunkManager,
|
||||||
|
|||||||
@ -23,9 +23,6 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/apache/arrow/go/v12/arrow"
|
|
||||||
"github.com/apache/arrow/go/v12/arrow/array"
|
|
||||||
"github.com/apache/arrow/go/v12/arrow/memory"
|
|
||||||
"github.com/cockroachdb/errors"
|
"github.com/cockroachdb/errors"
|
||||||
"github.com/stretchr/testify/mock"
|
"github.com/stretchr/testify/mock"
|
||||||
"github.com/stretchr/testify/suite"
|
"github.com/stretchr/testify/suite"
|
||||||
@ -33,14 +30,10 @@ import (
|
|||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/options"
|
|
||||||
"github.com/milvus-io/milvus-storage/go/storage/schema"
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/internal/util/initcore"
|
"github.com/milvus-io/milvus/internal/util/initcore"
|
||||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/util/contextutil"
|
"github.com/milvus-io/milvus/pkg/util/contextutil"
|
||||||
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
||||||
@ -911,152 +904,3 @@ func TestSegmentLoader(t *testing.T) {
|
|||||||
suite.Run(t, &SegmentLoaderSuite{})
|
suite.Run(t, &SegmentLoaderSuite{})
|
||||||
suite.Run(t, &SegmentLoaderDetailSuite{})
|
suite.Run(t, &SegmentLoaderDetailSuite{})
|
||||||
}
|
}
|
||||||
|
|
||||||
type SegmentLoaderV2Suite struct {
|
|
||||||
suite.Suite
|
|
||||||
loader *segmentLoaderV2
|
|
||||||
|
|
||||||
// Dependencies
|
|
||||||
manager *Manager
|
|
||||||
rootPath string
|
|
||||||
chunkManager storage.ChunkManager
|
|
||||||
|
|
||||||
// Data
|
|
||||||
collectionID int64
|
|
||||||
partitionID int64
|
|
||||||
segmentID int64
|
|
||||||
schema *schemapb.CollectionSchema
|
|
||||||
segmentNum int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (suite *SegmentLoaderV2Suite) SetupSuite() {
|
|
||||||
paramtable.Init()
|
|
||||||
suite.rootPath = suite.T().Name()
|
|
||||||
suite.collectionID = rand.Int63()
|
|
||||||
suite.partitionID = rand.Int63()
|
|
||||||
suite.segmentID = rand.Int63()
|
|
||||||
suite.segmentNum = 5
|
|
||||||
}
|
|
||||||
|
|
||||||
func (suite *SegmentLoaderV2Suite) SetupTest() {
|
|
||||||
paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("true")
|
|
||||||
// Dependencies
|
|
||||||
suite.manager = NewManager()
|
|
||||||
ctx := context.Background()
|
|
||||||
// TODO:: cpp chunk manager not support local chunk manager
|
|
||||||
// suite.chunkManager = storage.NewLocalChunkManager(storage.RootPath(
|
|
||||||
// fmt.Sprintf("/tmp/milvus-ut/%d", rand.Int63())))
|
|
||||||
chunkManagerFactory := storage.NewTestChunkManagerFactory(paramtable.Get(), suite.rootPath)
|
|
||||||
suite.chunkManager, _ = chunkManagerFactory.NewPersistentStorageChunkManager(ctx)
|
|
||||||
suite.loader = NewLoaderV2(suite.manager, suite.chunkManager)
|
|
||||||
initcore.InitRemoteChunkManager(paramtable.Get())
|
|
||||||
|
|
||||||
// Data
|
|
||||||
suite.schema = GenTestCollectionSchema("test", schemapb.DataType_Int64, false)
|
|
||||||
indexMeta := GenTestIndexMeta(suite.collectionID, suite.schema)
|
|
||||||
loadMeta := &querypb.LoadMetaInfo{
|
|
||||||
LoadType: querypb.LoadType_LoadCollection,
|
|
||||||
CollectionID: suite.collectionID,
|
|
||||||
PartitionIDs: []int64{suite.partitionID},
|
|
||||||
}
|
|
||||||
suite.manager.Collection.PutOrRef(suite.collectionID, suite.schema, indexMeta, loadMeta)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (suite *SegmentLoaderV2Suite) TearDownTest() {
|
|
||||||
ctx := context.Background()
|
|
||||||
for i := 0; i < suite.segmentNum; i++ {
|
|
||||||
suite.manager.Segment.Remove(context.Background(), suite.segmentID+int64(i), querypb.DataScope_All)
|
|
||||||
}
|
|
||||||
suite.chunkManager.RemoveWithPrefix(ctx, suite.rootPath)
|
|
||||||
paramtable.Get().CommonCfg.EnableStorageV2.SwapTempValue("false")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (suite *SegmentLoaderV2Suite) TestLoad() {
|
|
||||||
tmpDir := suite.T().TempDir()
|
|
||||||
paramtable.Get().CommonCfg.StorageScheme.SwapTempValue("file")
|
|
||||||
paramtable.Get().CommonCfg.StoragePathPrefix.SwapTempValue(tmpDir)
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
msgLength := 4
|
|
||||||
|
|
||||||
arrowSchema, err := typeutil.ConvertToArrowSchema(suite.schema.Fields)
|
|
||||||
suite.NoError(err)
|
|
||||||
opt := options.NewSpaceOptionBuilder().
|
|
||||||
SetSchema(schema.NewSchema(
|
|
||||||
arrowSchema,
|
|
||||||
&schema.SchemaOptions{
|
|
||||||
PrimaryColumn: "int64Field",
|
|
||||||
VectorColumn: "floatVectorField",
|
|
||||||
VersionColumn: "Timestamp",
|
|
||||||
})).
|
|
||||||
Build()
|
|
||||||
uri, err := typeutil.GetStorageURI("file", tmpDir, suite.segmentID)
|
|
||||||
suite.NoError(err)
|
|
||||||
space, err := milvus_storage.Open(uri, opt)
|
|
||||||
suite.NoError(err)
|
|
||||||
|
|
||||||
b := array.NewRecordBuilder(memory.DefaultAllocator, arrowSchema)
|
|
||||||
defer b.Release()
|
|
||||||
insertData, err := genInsertData(msgLength, suite.schema)
|
|
||||||
suite.NoError(err)
|
|
||||||
|
|
||||||
err = typeutil.BuildRecord(b, insertData, suite.schema.Fields)
|
|
||||||
suite.NoError(err)
|
|
||||||
rec := b.NewRecord()
|
|
||||||
defer rec.Release()
|
|
||||||
reader, err := array.NewRecordReader(arrowSchema, []arrow.Record{rec})
|
|
||||||
suite.NoError(err)
|
|
||||||
err = space.Write(reader, &options.DefaultWriteOptions)
|
|
||||||
suite.NoError(err)
|
|
||||||
|
|
||||||
collMeta := genCollectionMeta(suite.collectionID, suite.partitionID, suite.schema)
|
|
||||||
inCodec := storage.NewInsertCodecWithSchema(collMeta)
|
|
||||||
statsLog, err := inCodec.SerializePkStatsByData(insertData)
|
|
||||||
suite.NoError(err)
|
|
||||||
|
|
||||||
err = space.WriteBlob(statsLog.Value, statsLog.Key, false)
|
|
||||||
suite.NoError(err)
|
|
||||||
|
|
||||||
dschema := space.Manifest().GetSchema().DeleteSchema()
|
|
||||||
dbuilder := array.NewRecordBuilder(memory.DefaultAllocator, dschema)
|
|
||||||
defer dbuilder.Release()
|
|
||||||
dbuilder.Field(0).(*array.Int64Builder).AppendValues([]int64{1, 2}, nil)
|
|
||||||
dbuilder.Field(1).(*array.Int64Builder).AppendValues([]int64{100, 200}, nil)
|
|
||||||
|
|
||||||
drec := dbuilder.NewRecord()
|
|
||||||
defer drec.Release()
|
|
||||||
|
|
||||||
dreader, err := array.NewRecordReader(dschema, []arrow.Record{drec})
|
|
||||||
suite.NoError(err)
|
|
||||||
|
|
||||||
err = space.Delete(dreader)
|
|
||||||
suite.NoError(err)
|
|
||||||
|
|
||||||
segments, err := suite.loader.Load(ctx, suite.collectionID, SegmentTypeSealed, 0, &querypb.SegmentLoadInfo{
|
|
||||||
SegmentID: suite.segmentID,
|
|
||||||
PartitionID: suite.partitionID,
|
|
||||||
CollectionID: suite.collectionID,
|
|
||||||
NumOfRows: int64(msgLength),
|
|
||||||
StorageVersion: 3,
|
|
||||||
InsertChannel: fmt.Sprintf("by-dev-rootcoord-dml_0_%dv0", suite.collectionID),
|
|
||||||
})
|
|
||||||
suite.NoError(err)
|
|
||||||
|
|
||||||
_, err = suite.loader.LoadBloomFilterSet(ctx, suite.collectionID, 0, &querypb.SegmentLoadInfo{
|
|
||||||
SegmentID: suite.segmentID,
|
|
||||||
PartitionID: suite.partitionID,
|
|
||||||
CollectionID: suite.collectionID,
|
|
||||||
NumOfRows: int64(msgLength),
|
|
||||||
StorageVersion: 3,
|
|
||||||
InsertChannel: fmt.Sprintf("by-dev-rootcoord-dml_0_%dv0", suite.collectionID),
|
|
||||||
})
|
|
||||||
suite.NoError(err)
|
|
||||||
|
|
||||||
segment := segments[0]
|
|
||||||
suite.EqualValues(4, segment.InsertCount())
|
|
||||||
suite.Equal(int64(msgLength-2), segment.RowNum())
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSegmentLoaderV2(t *testing.T) {
|
|
||||||
suite.Run(t, &SegmentLoaderV2Suite{})
|
|
||||||
}
|
|
||||||
|
|||||||
@ -348,11 +348,7 @@ func (node *QueryNode) Init() error {
|
|||||||
node.subscribingChannels = typeutil.NewConcurrentSet[string]()
|
node.subscribingChannels = typeutil.NewConcurrentSet[string]()
|
||||||
node.unsubscribingChannels = typeutil.NewConcurrentSet[string]()
|
node.unsubscribingChannels = typeutil.NewConcurrentSet[string]()
|
||||||
node.manager = segments.NewManager()
|
node.manager = segments.NewManager()
|
||||||
if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() {
|
node.loader = segments.NewLoader(node.manager, node.chunkManager)
|
||||||
node.loader = segments.NewLoaderV2(node.manager, node.chunkManager)
|
|
||||||
} else {
|
|
||||||
node.loader = segments.NewLoader(node.manager, node.chunkManager)
|
|
||||||
}
|
|
||||||
node.manager.SetLoader(node.loader)
|
node.manager.SetLoader(node.loader)
|
||||||
node.dispClient = msgdispatcher.NewClient(node.factory, typeutil.QueryNodeRole, node.GetNodeID())
|
node.dispClient = msgdispatcher.NewClient(node.factory, typeutil.QueryNodeRole, node.GetNodeID())
|
||||||
// init pipeline manager
|
// init pipeline manager
|
||||||
|
|||||||
@ -25,7 +25,7 @@ import (
|
|||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-storage/go/common/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestPerformance(t *testing.T) {
|
func TestPerformance(t *testing.T) {
|
||||||
|
|||||||
@ -24,9 +24,9 @@ import (
|
|||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus-storage/go/common/log"
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -41,7 +41,6 @@ type CodecIndex interface {
|
|||||||
Delete() error
|
Delete() error
|
||||||
CleanLocalData() error
|
CleanLocalData() error
|
||||||
UpLoad() (map[string]int64, error)
|
UpLoad() (map[string]int64, error)
|
||||||
UpLoadV2() (int64, error)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ CodecIndex = (*CgoIndex)(nil)
|
var _ CodecIndex = (*CgoIndex)(nil)
|
||||||
@ -127,35 +126,6 @@ func CreateIndex(ctx context.Context, buildIndexInfo *indexcgopb.BuildIndexInfo)
|
|||||||
return index, nil
|
return index, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func CreateIndexV2(ctx context.Context, buildIndexInfo *indexcgopb.BuildIndexInfo) (CodecIndex, error) {
|
|
||||||
buildIndexInfoBlob, err := proto.Marshal(buildIndexInfo)
|
|
||||||
if err != nil {
|
|
||||||
log.Ctx(ctx).Warn("marshal buildIndexInfo failed",
|
|
||||||
zap.String("clusterID", buildIndexInfo.GetClusterID()),
|
|
||||||
zap.Int64("buildID", buildIndexInfo.GetBuildID()),
|
|
||||||
zap.Error(err))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
var indexPtr C.CIndex
|
|
||||||
status := C.CreateIndexV2(&indexPtr, (*C.uint8_t)(unsafe.Pointer(&buildIndexInfoBlob[0])), (C.uint64_t)(len(buildIndexInfoBlob)))
|
|
||||||
if err := HandleCStatus(&status, "failed to create index"); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
index := &CgoIndex{
|
|
||||||
indexPtr: indexPtr,
|
|
||||||
close: false,
|
|
||||||
}
|
|
||||||
|
|
||||||
runtime.SetFinalizer(index, func(index *CgoIndex) {
|
|
||||||
if index != nil && !index.close {
|
|
||||||
log.Error("there is leakage in index object, please check.")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
return index, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: this seems to be used only for test. We should mark the method
|
// TODO: this seems to be used only for test. We should mark the method
|
||||||
// name with ForTest, or maybe move to test file.
|
// name with ForTest, or maybe move to test file.
|
||||||
func (index *CgoIndex) Build(dataset *Dataset) error {
|
func (index *CgoIndex) Build(dataset *Dataset) error {
|
||||||
@ -426,34 +396,3 @@ func (index *CgoIndex) UpLoad() (map[string]int64, error) {
|
|||||||
|
|
||||||
return res, nil
|
return res, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (index *CgoIndex) UpLoadV2() (int64, error) {
|
|
||||||
var cBinarySet C.CBinarySet
|
|
||||||
|
|
||||||
status := C.SerializeIndexAndUpLoadV2(index.indexPtr, &cBinarySet)
|
|
||||||
defer func() {
|
|
||||||
if cBinarySet != nil {
|
|
||||||
C.DeleteBinarySet(cBinarySet)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
if err := HandleCStatus(&status, "failed to serialize index and upload index"); err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
|
|
||||||
buffer, err := GetBinarySetValue(cBinarySet, "index_store_version")
|
|
||||||
if err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
var version int64
|
|
||||||
|
|
||||||
version = int64(buffer[7])
|
|
||||||
version = (version << 8) + int64(buffer[6])
|
|
||||||
version = (version << 8) + int64(buffer[5])
|
|
||||||
version = (version << 8) + int64(buffer[4])
|
|
||||||
version = (version << 8) + int64(buffer[3])
|
|
||||||
version = (version << 8) + int64(buffer[2])
|
|
||||||
version = (version << 8) + int64(buffer[1])
|
|
||||||
version = (version << 8) + int64(buffer[0])
|
|
||||||
|
|
||||||
return version, nil
|
|
||||||
}
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user