mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
enhance: Remove single chunk segment related codes (#39249)
https://github.com/milvus-io/milvus/issues/39112 --------- Signed-off-by: sunby <sunbingyi1992@gmail.com>
This commit is contained in:
parent
154a2a68e0
commit
bf617115ca
@ -416,7 +416,7 @@ queryNode:
|
||||
nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist
|
||||
memExpansionRate: 1.15 # extra memory needed by building interim index
|
||||
buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num
|
||||
multipleChunkedEnable: true # Enable multiple chunked search
|
||||
multipleChunkedEnable: true # Deprecated. Enable multiple chunked search
|
||||
knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic
|
||||
jsonKeyStatsCommitInterval: 200 # the commit interval for the JSON key Stats to commit
|
||||
loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments
|
||||
|
||||
@ -984,9 +984,6 @@ struct fmt::formatter<SegmentType> : fmt::formatter<std::string> {
|
||||
case Indexing:
|
||||
name = "Indexing";
|
||||
break;
|
||||
case ChunkedSealed:
|
||||
name = "ChunkedSealed";
|
||||
break;
|
||||
default:
|
||||
name = "Unknown";
|
||||
}
|
||||
|
||||
@ -28,7 +28,6 @@ enum SegmentType {
|
||||
Growing = 1,
|
||||
Sealed = 2,
|
||||
Indexing = 3,
|
||||
ChunkedSealed = 4,
|
||||
};
|
||||
|
||||
typedef enum SegmentType SegmentType;
|
||||
|
||||
@ -31,7 +31,7 @@
|
||||
#include "expr/ITypeExpr.h"
|
||||
#include "log/Log.h"
|
||||
#include "query/PlanProto.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
#include "segcore/SegmentSealed.h"
|
||||
#include "segcore/SegmentInterface.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
namespace milvus {
|
||||
@ -736,24 +736,25 @@ class SegmentExpr : public Expr {
|
||||
const bool* valid_data;
|
||||
if constexpr (std::is_same_v<T, std::string_view> ||
|
||||
std::is_same_v<T, Json>) {
|
||||
if (segment_->type() == SegmentType::Sealed) {
|
||||
valid_data = segment_
|
||||
->get_batch_views<T>(
|
||||
field_id_, i, data_pos, size)
|
||||
.second.data();
|
||||
}
|
||||
auto batch_views = segment_->get_batch_views<T>(
|
||||
field_id_, i, data_pos, size);
|
||||
valid_data = batch_views.second.data();
|
||||
ApplyValidData(valid_data,
|
||||
res + processed_size,
|
||||
valid_res + processed_size,
|
||||
size);
|
||||
} else {
|
||||
auto chunk = segment_->chunk_data<T>(field_id_, i);
|
||||
valid_data = chunk.valid_data();
|
||||
if (valid_data != nullptr) {
|
||||
valid_data += data_pos;
|
||||
}
|
||||
}
|
||||
ApplyValidData(valid_data,
|
||||
res + processed_size,
|
||||
valid_res + processed_size,
|
||||
size);
|
||||
}
|
||||
}
|
||||
|
||||
processed_size += size;
|
||||
if (processed_size >= batch_size_) {
|
||||
|
||||
@ -319,8 +319,9 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(EvalCtx& context) {
|
||||
}
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[ op_type, &processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
[op_type,
|
||||
&processed_cursor,
|
||||
&bitmap_input]<FilterType filter_type = FilterType::sequential>(
|
||||
const milvus::ArrayView* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -469,7 +470,8 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray(EvalCtx& context) {
|
||||
default:
|
||||
PanicInfo(
|
||||
OpTypeInvalid,
|
||||
fmt::format("unsupported operator type for unary expr: {}",
|
||||
fmt::format(
|
||||
"unsupported operator type for unary expr: {}",
|
||||
op_type));
|
||||
}
|
||||
processed_cursor += size;
|
||||
@ -529,14 +531,19 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(EvalCtx& context,
|
||||
|
||||
// filtering by index, get candidates.
|
||||
std::function<const milvus::ArrayView*(int64_t)> retrieve;
|
||||
|
||||
// avoid use-after-free
|
||||
milvus::ArrayView array_view_tmp;
|
||||
if (segment_->is_chunked()) {
|
||||
retrieve = [this](int64_t offset) -> const milvus::ArrayView* {
|
||||
retrieve = [this, &array_view_tmp](
|
||||
int64_t offset) -> const milvus::ArrayView* {
|
||||
auto [chunk_idx, chunk_offset] =
|
||||
segment_->get_chunk_by_offset(field_id_, offset);
|
||||
const auto& chunk =
|
||||
segment_->template chunk_data<milvus::ArrayView>(
|
||||
segment_->template chunk_view<milvus::ArrayView>(
|
||||
field_id_, chunk_idx);
|
||||
return chunk.data() + chunk_offset;
|
||||
array_view_tmp = std::move(chunk.first[chunk_offset]);
|
||||
return &array_view_tmp;
|
||||
};
|
||||
} else {
|
||||
auto size_per_chunk = segment_->size_per_chunk();
|
||||
@ -673,9 +680,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(EvalCtx& context) {
|
||||
} while (false)
|
||||
|
||||
int processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[ op_type, pointer, &processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
auto execute_sub_batch = [op_type,
|
||||
pointer,
|
||||
&processed_cursor,
|
||||
&bitmap_input]<FilterType filter_type =
|
||||
FilterType::sequential>(
|
||||
const milvus::Json* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
@ -1599,9 +1608,10 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) {
|
||||
auto expr_type = expr_->op_type_;
|
||||
|
||||
size_t processed_cursor = 0;
|
||||
auto execute_sub_batch =
|
||||
[ expr_type, &processed_cursor, &
|
||||
bitmap_input ]<FilterType filter_type = FilterType::sequential>(
|
||||
auto execute_sub_batch = [expr_type,
|
||||
&processed_cursor,
|
||||
&bitmap_input]<FilterType filter_type =
|
||||
FilterType::sequential>(
|
||||
const T* data,
|
||||
const bool* valid_data,
|
||||
const int32_t* offsets,
|
||||
|
||||
@ -20,7 +20,6 @@
|
||||
#include "knowhere/index/index_node.h"
|
||||
#include "segcore/SegmentInterface.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
#include "segcore/ConcurrentVector.h"
|
||||
#include "common/Span.h"
|
||||
#include "query/Utils.h"
|
||||
|
||||
@ -15,7 +15,6 @@
|
||||
// limitations under the License.
|
||||
#include "SearchGroupByOperator.h"
|
||||
#include "common/Consts.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
#include "query/Utils.h"
|
||||
|
||||
namespace milvus {
|
||||
|
||||
@ -20,10 +20,10 @@
|
||||
#include "knowhere/index/index_node.h"
|
||||
#include "segcore/SegmentInterface.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
#include "segcore/ConcurrentVector.h"
|
||||
#include "common/Span.h"
|
||||
#include "query/Utils.h"
|
||||
#include "segcore/SegmentSealed.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace exec {
|
||||
|
||||
@ -464,6 +464,7 @@ InvertedIndexTantivy<T>::BuildWithRawDataForUT(size_t n,
|
||||
if constexpr (std::is_same_v<std::string, T>) {
|
||||
schema_.set_data_type(proto::schema::DataType::VarChar);
|
||||
}
|
||||
if (!wrapper_) {
|
||||
boost::uuids::random_generator generator;
|
||||
auto uuid = generator();
|
||||
auto prefix = boost::uuids::to_string(uuid);
|
||||
@ -472,19 +473,20 @@ InvertedIndexTantivy<T>::BuildWithRawDataForUT(size_t n,
|
||||
d_type_ = get_tantivy_data_type(schema_);
|
||||
std::string field = "test_inverted_index";
|
||||
inverted_index_single_segment_ =
|
||||
GetValueFromConfig<int32_t>(config,
|
||||
milvus::index::SCALAR_INDEX_ENGINE_VERSION)
|
||||
GetValueFromConfig<int32_t>(
|
||||
config, milvus::index::SCALAR_INDEX_ENGINE_VERSION)
|
||||
.value_or(1) == 0;
|
||||
tantivy_index_version_ =
|
||||
GetValueFromConfig<int32_t>(config,
|
||||
milvus::index::TANTIVY_INDEX_VERSION)
|
||||
.value_or(milvus::index::TANTIVY_INDEX_LATEST_VERSION);
|
||||
wrapper_ =
|
||||
std::make_shared<TantivyIndexWrapper>(field.c_str(),
|
||||
wrapper_ = std::make_shared<TantivyIndexWrapper>(
|
||||
field.c_str(),
|
||||
d_type_,
|
||||
path_.c_str(),
|
||||
tantivy_index_version_,
|
||||
inverted_index_single_segment_);
|
||||
}
|
||||
if (!inverted_index_single_segment_) {
|
||||
if (config.find("is_array") != config.end()) {
|
||||
// only used in ut.
|
||||
|
||||
@ -134,868 +134,4 @@ class ColumnBase {
|
||||
virtual const char*
|
||||
Data(int chunk_id) const = 0;
|
||||
};
|
||||
class SingleChunkColumnBase : public ColumnBase {
|
||||
public:
|
||||
enum MappingType {
|
||||
MAP_WITH_ANONYMOUS = 0,
|
||||
MAP_WITH_FILE = 1,
|
||||
MAP_WITH_MANAGER = 2,
|
||||
};
|
||||
// MAP_WITH_ANONYMOUS ctor
|
||||
SingleChunkColumnBase(size_t reserve_rows, const FieldMeta& field_meta)
|
||||
: mapping_type_(MappingType::MAP_WITH_ANONYMOUS) {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
SetPaddingSize(data_type);
|
||||
|
||||
if (field_meta.is_nullable()) {
|
||||
nullable_ = true;
|
||||
valid_data_.reserve(reserve_rows);
|
||||
}
|
||||
// We don't pre-allocate memory for variable length data type, data_
|
||||
// will be allocated by ExpandData() when AppendBatch/Append is called.
|
||||
if (IsVariableDataType(data_type)) {
|
||||
return;
|
||||
}
|
||||
|
||||
data_cap_size_ = field_meta.get_sizeof() * reserve_rows;
|
||||
|
||||
// use anon mapping so we are able to free these memory with munmap only
|
||||
size_t mapped_size = data_cap_size_ + padding_;
|
||||
data_ = static_cast<char*>(mmap(nullptr,
|
||||
mapped_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON,
|
||||
-1,
|
||||
0));
|
||||
AssertInfo(data_ != MAP_FAILED,
|
||||
"failed to create anon map: {}, map_size={}",
|
||||
strerror(errno),
|
||||
mapped_size);
|
||||
UpdateMetricWhenMmap(mapped_size);
|
||||
}
|
||||
|
||||
// MAP_WITH_MANAGER ctor
|
||||
// reserve is number of bytes to allocate(without padding)
|
||||
SingleChunkColumnBase(size_t reserve,
|
||||
const DataType& data_type,
|
||||
storage::MmapChunkManagerPtr mcm,
|
||||
storage::MmapChunkDescriptorPtr descriptor,
|
||||
bool nullable)
|
||||
: mcm_(mcm),
|
||||
mmap_descriptor_(descriptor),
|
||||
data_cap_size_(reserve),
|
||||
mapping_type_(MappingType::MAP_WITH_MANAGER),
|
||||
nullable_(nullable) {
|
||||
AssertInfo((mcm != nullptr) && descriptor != nullptr,
|
||||
"use wrong mmap chunk manager and mmap chunk descriptor to "
|
||||
"create column.");
|
||||
|
||||
SetPaddingSize(data_type);
|
||||
size_t mapped_size = data_cap_size_ + padding_;
|
||||
data_ = (char*)mcm_->Allocate(mmap_descriptor_, (uint64_t)mapped_size);
|
||||
AssertInfo(data_ != nullptr,
|
||||
"fail to create with mmap manager: map_size = {}",
|
||||
mapped_size);
|
||||
if (nullable_) {
|
||||
valid_data_.reserve(reserve);
|
||||
}
|
||||
}
|
||||
|
||||
// MAP_WITH_FILE ctor
|
||||
// size is number of bytes of the file, with padding
|
||||
// !!! The incoming file must have padding written at the end of the file.
|
||||
// Subclasses of variable length data type, if they used this constructor,
|
||||
// must set num_rows_ by themselves.
|
||||
SingleChunkColumnBase(const File& file,
|
||||
size_t size,
|
||||
const FieldMeta& field_meta)
|
||||
: nullable_(field_meta.is_nullable()),
|
||||
mapping_type_(MappingType::MAP_WITH_FILE) {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
SetPaddingSize(data_type);
|
||||
|
||||
if (!IsVariableDataType(data_type)) {
|
||||
auto type_size = field_meta.get_sizeof();
|
||||
num_rows_ = size / type_size;
|
||||
}
|
||||
AssertInfo(size >= padding_,
|
||||
"file size {} is less than padding size {}",
|
||||
size,
|
||||
padding_);
|
||||
|
||||
// in MAP_WITH_FILE, no extra space written in file, so data_size_ is
|
||||
// the same as data_cap_size_.
|
||||
data_size_ = size - padding_;
|
||||
data_cap_size_ = data_size_;
|
||||
// use exactly same size of file, padding shall be written in file already
|
||||
// see also https://github.com/milvus-io/milvus/issues/34442
|
||||
data_ = static_cast<char*>(
|
||||
mmap(nullptr, size, PROT_READ, MAP_SHARED, file.Descriptor(), 0));
|
||||
AssertInfo(data_ != MAP_FAILED,
|
||||
"failed to create file-backed map, err: {}",
|
||||
strerror(errno));
|
||||
madvise(data_, size, MADV_WILLNEED);
|
||||
|
||||
// valid_data store in memory
|
||||
if (nullable_) {
|
||||
valid_data_.reserve(num_rows_);
|
||||
}
|
||||
|
||||
UpdateMetricWhenMmap(size);
|
||||
}
|
||||
|
||||
virtual ~SingleChunkColumnBase() {
|
||||
if (data_ != nullptr) {
|
||||
size_t mapped_size = data_cap_size_ + padding_;
|
||||
if (mapping_type_ != MappingType::MAP_WITH_MANAGER) {
|
||||
if (munmap(data_, mapped_size)) {
|
||||
AssertInfo(true,
|
||||
"failed to unmap variable field, err={}",
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
UpdateMetricWhenMunmap(mapped_size);
|
||||
}
|
||||
if (nullable_) {
|
||||
valid_data_.clear();
|
||||
}
|
||||
}
|
||||
|
||||
SingleChunkColumnBase(ColumnBase&&) = delete;
|
||||
|
||||
// Data() points at an addr that contains the elements
|
||||
virtual const char*
|
||||
Data(int chunk_id) const override {
|
||||
return data_;
|
||||
}
|
||||
|
||||
// MmappedData() returns the mmaped address
|
||||
const char*
|
||||
MmappedData() const override {
|
||||
return data_;
|
||||
}
|
||||
|
||||
bool
|
||||
IsValid(size_t offset) const {
|
||||
if (nullable_) {
|
||||
return valid_data_[offset];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
IsNullable() const {
|
||||
return nullable_;
|
||||
}
|
||||
|
||||
size_t
|
||||
NumRows() const {
|
||||
return num_rows_;
|
||||
};
|
||||
|
||||
// returns the number of bytes used to store actual data
|
||||
size_t
|
||||
DataByteSize() const override {
|
||||
return data_size_;
|
||||
}
|
||||
|
||||
// returns the ballpark number of bytes used by this object
|
||||
size_t
|
||||
MemoryUsageBytes() const {
|
||||
return data_cap_size_ + padding_ + (valid_data_.size() + 7) / 8;
|
||||
}
|
||||
|
||||
virtual SpanBase
|
||||
Span() const = 0;
|
||||
|
||||
// used for sequential access for search
|
||||
virtual BufferView
|
||||
GetBatchBuffer(int64_t start_offset, int64_t length) {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"GetBatchBuffer only supported for VariableColumn");
|
||||
}
|
||||
|
||||
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
StringViews() const {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"StringViews only supported for VariableColumn");
|
||||
}
|
||||
|
||||
virtual std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
ArrayViews() const {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"ArrayView only supported for ArrayColumn");
|
||||
}
|
||||
|
||||
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
ViewsByOffsets(const FixedVector<int32_t>& offsets) const {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"viewsbyoffsets only supported for VariableColumn");
|
||||
}
|
||||
|
||||
virtual std::string_view
|
||||
RawAt(const size_t i) const {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"RawAt only supported for VariableColumn");
|
||||
}
|
||||
|
||||
virtual void
|
||||
AppendBatch(const FieldDataPtr data) override {
|
||||
size_t required_size = data_size_ + data->DataSize();
|
||||
if (required_size > data_cap_size_) {
|
||||
ExpandData(required_size * 2);
|
||||
}
|
||||
|
||||
std::copy_n(static_cast<const char*>(data->Data()),
|
||||
data->DataSize(),
|
||||
data_ + data_size_);
|
||||
data_size_ = required_size;
|
||||
if (nullable_) {
|
||||
size_t required_rows = num_rows_ + data->get_num_rows();
|
||||
if (required_rows > valid_data_.size()) {
|
||||
valid_data_.reserve(required_rows * 2);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < data->get_num_rows(); i++) {
|
||||
valid_data_.push_back(data->is_valid(i));
|
||||
}
|
||||
}
|
||||
num_rows_ += data->Length();
|
||||
}
|
||||
|
||||
// Append one row
|
||||
virtual void
|
||||
Append(const char* data, size_t size) {
|
||||
AssertInfo(!nullable_,
|
||||
"no need to pass valid_data when nullable is false");
|
||||
size_t required_size = data_size_ + size;
|
||||
if (required_size > data_cap_size_) {
|
||||
ExpandData(required_size * 2);
|
||||
}
|
||||
|
||||
std::copy_n(data, size, data_ + data_size_);
|
||||
data_size_ = required_size;
|
||||
num_rows_++;
|
||||
}
|
||||
|
||||
// Append one row
|
||||
virtual void
|
||||
Append(const char* data, const bool valid_data, size_t size) {
|
||||
AssertInfo(nullable_, "need to pass valid_data_ when nullable is true");
|
||||
size_t required_size = data_size_ + size;
|
||||
if (required_size > data_cap_size_) {
|
||||
ExpandData(required_size * 2);
|
||||
}
|
||||
|
||||
std::copy_n(data, size, data_ + data_size_);
|
||||
valid_data_.push_back(valid_data);
|
||||
data_size_ = required_size;
|
||||
num_rows_++;
|
||||
}
|
||||
|
||||
void
|
||||
SetValidData(FixedVector<bool>&& valid_data) {
|
||||
valid_data_ = std::move(valid_data);
|
||||
}
|
||||
|
||||
protected:
|
||||
// new_size should not include padding, padding will be added in ExpandData()
|
||||
void
|
||||
ExpandData(size_t new_size) {
|
||||
if (new_size == 0) {
|
||||
return;
|
||||
}
|
||||
AssertInfo(
|
||||
mapping_type_ == MappingType::MAP_WITH_ANONYMOUS ||
|
||||
mapping_type_ == MappingType::MAP_WITH_MANAGER,
|
||||
"expand function only use in anonymous or with mmap manager");
|
||||
size_t new_mapped_size = new_size + padding_;
|
||||
if (mapping_type_ == MappingType::MAP_WITH_ANONYMOUS) {
|
||||
auto data = static_cast<char*>(mmap(nullptr,
|
||||
new_mapped_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON,
|
||||
-1,
|
||||
0));
|
||||
UpdateMetricWhenMmap(new_mapped_size);
|
||||
|
||||
AssertInfo(data != MAP_FAILED,
|
||||
"failed to expand map: {}, new_map_size={}",
|
||||
strerror(errno),
|
||||
new_size + padding_);
|
||||
|
||||
if (data_ != nullptr) {
|
||||
std::memcpy(data, data_, data_size_);
|
||||
if (munmap(data_, data_cap_size_ + padding_)) {
|
||||
auto err = errno;
|
||||
size_t mapped_size = new_size + padding_;
|
||||
munmap(data, mapped_size);
|
||||
UpdateMetricWhenMunmap(mapped_size);
|
||||
|
||||
// TODO: error handling is problematic:
|
||||
// if munmap fails, exception will be thrown and caught by
|
||||
// the cgo call, but the program continue to run. and the
|
||||
// successfully newly mmaped data will not be assigned to data_
|
||||
// and got leaked.
|
||||
AssertInfo(
|
||||
false,
|
||||
"failed to unmap while expanding: {}, old_map_size={}",
|
||||
strerror(err),
|
||||
data_cap_size_ + padding_);
|
||||
}
|
||||
UpdateMetricWhenMunmap(data_cap_size_ + padding_);
|
||||
}
|
||||
|
||||
data_ = data;
|
||||
data_cap_size_ = new_size;
|
||||
} else if (mapping_type_ == MappingType::MAP_WITH_MANAGER) {
|
||||
auto data = mcm_->Allocate(mmap_descriptor_, new_mapped_size);
|
||||
AssertInfo(data != nullptr,
|
||||
"fail to create with mmap manager: map_size = {}",
|
||||
new_mapped_size);
|
||||
std::memcpy(data, data_, data_cap_size_);
|
||||
// allocate space only append in one growing segment, so no need to munmap()
|
||||
data_ = (char*)data;
|
||||
data_cap_size_ = new_size;
|
||||
}
|
||||
}
|
||||
|
||||
char* data_{nullptr};
|
||||
bool nullable_{false};
|
||||
// When merging multiple valid_data, the bit operation logic is very complex
|
||||
// for the reason that, FixedVector<bool> use bit granularity for storage and access
|
||||
// so FixedVector is also used to store valid_data on the sealed segment.
|
||||
FixedVector<bool> valid_data_;
|
||||
size_t data_cap_size_{0};
|
||||
size_t padding_{0};
|
||||
size_t num_rows_{0};
|
||||
|
||||
// length in bytes
|
||||
storage::MmapChunkDescriptorPtr mmap_descriptor_ = nullptr;
|
||||
size_t data_size_{0};
|
||||
const MappingType mapping_type_;
|
||||
|
||||
private:
|
||||
void
|
||||
SetPaddingSize(const DataType& type) {
|
||||
padding_ = PaddingSize(type);
|
||||
}
|
||||
|
||||
void
|
||||
UpdateMetricWhenMmap(size_t mapped_size) {
|
||||
if (mapping_type_ == MappingType::MAP_WITH_ANONYMOUS) {
|
||||
milvus::monitor::internal_mmap_allocated_space_bytes_anon.Observe(
|
||||
mapped_size);
|
||||
milvus::monitor::internal_mmap_in_used_space_bytes_anon.Increment(
|
||||
mapped_size);
|
||||
milvus::monitor::internal_mmap_in_used_count_anon.Increment();
|
||||
} else if (mapping_type_ == MappingType::MAP_WITH_FILE) {
|
||||
milvus::monitor::internal_mmap_allocated_space_bytes_file.Observe(
|
||||
mapped_size);
|
||||
milvus::monitor::internal_mmap_in_used_space_bytes_file.Increment(
|
||||
mapped_size);
|
||||
milvus::monitor::internal_mmap_in_used_count_file.Increment();
|
||||
}
|
||||
// else: does not update metric for MAP_WITH_MANAGER, MmapChunkManagerPtr
|
||||
// will update metric itself.
|
||||
}
|
||||
|
||||
void
|
||||
UpdateMetricWhenMunmap(size_t mapped_size) {
|
||||
if (mapping_type_ == MappingType::MAP_WITH_ANONYMOUS) {
|
||||
milvus::monitor::internal_mmap_in_used_space_bytes_anon.Decrement(
|
||||
mapped_size);
|
||||
milvus::monitor::internal_mmap_in_used_count_anon.Decrement();
|
||||
} else if (mapping_type_ == MappingType::MAP_WITH_FILE) {
|
||||
milvus::monitor::internal_mmap_in_used_space_bytes_file.Decrement(
|
||||
mapped_size);
|
||||
milvus::monitor::internal_mmap_in_used_count_file.Decrement();
|
||||
}
|
||||
// else: does not update metric for MAP_WITH_MANAGER, MmapChunkManagerPtr
|
||||
// will update metric itself.
|
||||
}
|
||||
|
||||
storage::MmapChunkManagerPtr mcm_ = nullptr;
|
||||
};
|
||||
|
||||
class SingleChunkColumn : public SingleChunkColumnBase {
|
||||
public:
|
||||
// MAP_WITH_ANONYMOUS ctor
|
||||
SingleChunkColumn(size_t cap, const FieldMeta& field_meta)
|
||||
: SingleChunkColumnBase(cap, field_meta) {
|
||||
}
|
||||
|
||||
// MAP_WITH_FILE ctor
|
||||
SingleChunkColumn(const File& file,
|
||||
size_t size,
|
||||
const FieldMeta& field_meta)
|
||||
: SingleChunkColumnBase(file, size, field_meta) {
|
||||
}
|
||||
|
||||
// MAP_WITH_MANAGER ctor
|
||||
SingleChunkColumn(size_t reserve,
|
||||
const DataType& data_type,
|
||||
storage::MmapChunkManagerPtr mcm,
|
||||
storage::MmapChunkDescriptorPtr descriptor,
|
||||
bool nullable)
|
||||
: SingleChunkColumnBase(reserve, data_type, mcm, descriptor, nullable) {
|
||||
}
|
||||
|
||||
~SingleChunkColumn() override = default;
|
||||
|
||||
SpanBase
|
||||
Span() const override {
|
||||
return SpanBase(
|
||||
data_, valid_data_.data(), num_rows_, data_cap_size_ / num_rows_);
|
||||
}
|
||||
};
|
||||
|
||||
class SingleChunkSparseFloatColumn : public SingleChunkColumnBase {
|
||||
public:
|
||||
// MAP_WITH_ANONYMOUS ctor
|
||||
SingleChunkSparseFloatColumn(const FieldMeta& field_meta)
|
||||
: SingleChunkColumnBase(0, field_meta) {
|
||||
}
|
||||
// MAP_WITH_FILE ctor
|
||||
SingleChunkSparseFloatColumn(const File& file,
|
||||
size_t size,
|
||||
const FieldMeta& field_meta,
|
||||
std::vector<uint64_t>&& indices = {})
|
||||
: SingleChunkColumnBase(file, size, field_meta) {
|
||||
AssertInfo(!indices.empty(),
|
||||
"SparseFloatColumn indices should not be empty.");
|
||||
num_rows_ = indices.size();
|
||||
// so that indices[num_rows_] - indices[num_rows_ - 1] is the byte size of
|
||||
// the last row.
|
||||
indices.push_back(data_size_);
|
||||
dim_ = 0;
|
||||
for (size_t i = 0; i < num_rows_; i++) {
|
||||
auto vec_size = indices[i + 1] - indices[i];
|
||||
AssertInfo(
|
||||
vec_size % knowhere::sparse::SparseRow<float>::element_size() ==
|
||||
0,
|
||||
"Incorrect sparse vector byte size: {}",
|
||||
vec_size);
|
||||
vec_.emplace_back(
|
||||
vec_size / knowhere::sparse::SparseRow<float>::element_size(),
|
||||
(uint8_t*)(data_) + indices[i],
|
||||
false);
|
||||
dim_ = std::max(dim_, vec_.back().dim());
|
||||
}
|
||||
}
|
||||
// MAP_WITH_MANAGER ctor
|
||||
SingleChunkSparseFloatColumn(storage::MmapChunkManagerPtr mcm,
|
||||
storage::MmapChunkDescriptorPtr descriptor)
|
||||
: SingleChunkColumnBase(
|
||||
0, DataType::VECTOR_SPARSE_FLOAT, mcm, descriptor, false) {
|
||||
}
|
||||
|
||||
~SingleChunkSparseFloatColumn() override = default;
|
||||
|
||||
// returned pointer points at a list of knowhere::sparse::SparseRow<float>
|
||||
const char*
|
||||
Data(int chunk_id) const override {
|
||||
return static_cast<const char*>(static_cast<const void*>(vec_.data()));
|
||||
}
|
||||
|
||||
SpanBase
|
||||
Span() const override {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"SparseFloatColumn::Span() not supported");
|
||||
}
|
||||
|
||||
void
|
||||
AppendBatch(const FieldDataPtr data) override {
|
||||
AssertInfo(
|
||||
mapping_type_ != MappingType::MAP_WITH_FILE,
|
||||
"SparseFloatColumn::AppendBatch not supported for MAP_WITH_FILE");
|
||||
|
||||
size_t required_size = data_size_ + data->DataSize();
|
||||
if (required_size > data_cap_size_) {
|
||||
ExpandData(required_size * 2);
|
||||
// after expanding, the address of each row in vec_ become invalid.
|
||||
// the number of elements of each row is still correct, update the
|
||||
// address of each row to the new data_.
|
||||
size_t bytes = 0;
|
||||
for (size_t i = 0; i < num_rows_; i++) {
|
||||
auto count = vec_[i].size();
|
||||
auto row_bytes = vec_[i].data_byte_size();
|
||||
// destroy the old object and placement new a new one
|
||||
vec_[i].~SparseRow<float>();
|
||||
new (&vec_[i]) knowhere::sparse::SparseRow<float>(
|
||||
count, (uint8_t*)(data_) + bytes, false);
|
||||
bytes += row_bytes;
|
||||
}
|
||||
}
|
||||
dim_ = std::max(
|
||||
dim_,
|
||||
std::static_pointer_cast<FieldDataSparseVectorImpl>(data)->Dim());
|
||||
|
||||
auto ptr = static_cast<const knowhere::sparse::SparseRow<float>*>(
|
||||
data->Data());
|
||||
|
||||
for (size_t i = 0; i < data->Length(); ++i) {
|
||||
auto row_bytes = ptr[i].data_byte_size();
|
||||
std::memcpy(data_ + data_size_, ptr[i].data(), row_bytes);
|
||||
vec_.emplace_back(
|
||||
ptr[i].size(), (uint8_t*)(data_) + data_size_, false);
|
||||
data_size_ += row_bytes;
|
||||
}
|
||||
num_rows_ += data->Length();
|
||||
}
|
||||
|
||||
void
|
||||
Append(const char* data, size_t size) override {
|
||||
PanicInfo(
|
||||
ErrorCode::Unsupported,
|
||||
"SparseFloatColumn::Append not supported, use AppendBatch instead");
|
||||
}
|
||||
|
||||
void
|
||||
Append(const char* data, const bool valid_data, size_t size) override {
|
||||
PanicInfo(
|
||||
ErrorCode::Unsupported,
|
||||
"SparseFloatColumn::Append not supported, use AppendBatch instead");
|
||||
}
|
||||
|
||||
int64_t
|
||||
Dim() const {
|
||||
return dim_;
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t dim_ = 0;
|
||||
std::vector<knowhere::sparse::SparseRow<float>> vec_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class SingleChunkVariableColumn : public SingleChunkColumnBase {
|
||||
public:
|
||||
using ViewType =
|
||||
std::conditional_t<std::is_same_v<T, std::string>, std::string_view, T>;
|
||||
|
||||
// MAP_WITH_ANONYMOUS ctor
|
||||
SingleChunkVariableColumn(size_t reserve_rows,
|
||||
const FieldMeta& field_meta,
|
||||
size_t block_size)
|
||||
: SingleChunkColumnBase(reserve_rows, field_meta),
|
||||
block_size_(block_size) {
|
||||
}
|
||||
|
||||
// MAP_WITH_FILE ctor
|
||||
SingleChunkVariableColumn(const File& file,
|
||||
size_t size,
|
||||
const FieldMeta& field_meta,
|
||||
size_t block_size)
|
||||
: SingleChunkColumnBase(file, size, field_meta),
|
||||
block_size_(block_size) {
|
||||
}
|
||||
|
||||
~SingleChunkVariableColumn() override = default;
|
||||
|
||||
SpanBase
|
||||
Span() const override {
|
||||
PanicInfo(ErrorCode::NotImplemented,
|
||||
"span() interface is not implemented for variable column");
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
StringViews() const override {
|
||||
std::vector<std::string_view> res;
|
||||
res.reserve(num_rows_);
|
||||
char* pos = data_;
|
||||
for (size_t i = 0; i < num_rows_; ++i) {
|
||||
uint32_t size;
|
||||
size = *reinterpret_cast<uint32_t*>(pos);
|
||||
pos += sizeof(uint32_t);
|
||||
res.emplace_back(pos, size);
|
||||
pos += size;
|
||||
}
|
||||
return std::make_pair(res, valid_data_);
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
ViewsByOffsets(const FixedVector<int32_t>& offsets) const override {
|
||||
std::vector<std::string_view> res;
|
||||
FixedVector<bool> valid;
|
||||
res.reserve(offsets.size());
|
||||
valid.reserve(offsets.size());
|
||||
for (int offset : offsets) {
|
||||
res.emplace_back(RawAt(offset));
|
||||
valid.emplace_back(IsValid(offset));
|
||||
}
|
||||
return {res, valid};
|
||||
}
|
||||
|
||||
[[nodiscard]] std::vector<ViewType>
|
||||
Views() const {
|
||||
std::vector<ViewType> res;
|
||||
res.reserve(num_rows_);
|
||||
char* pos = data_;
|
||||
for (size_t i = 0; i < num_rows_; ++i) {
|
||||
uint32_t size;
|
||||
size = *reinterpret_cast<uint32_t*>(pos);
|
||||
pos += sizeof(uint32_t);
|
||||
res.emplace_back(ViewType(pos, size));
|
||||
pos += size;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
BufferView
|
||||
GetBatchBuffer(int64_t start_offset, int64_t length) override {
|
||||
if (start_offset < 0 || start_offset > num_rows_ ||
|
||||
start_offset + length > num_rows_) {
|
||||
PanicInfo(ErrorCode::OutOfRange, "index out of range");
|
||||
}
|
||||
|
||||
char* pos = data_ + indices_[start_offset / block_size_];
|
||||
for (size_t j = 0; j < start_offset % block_size_; j++) {
|
||||
uint32_t size;
|
||||
size = *reinterpret_cast<uint32_t*>(pos);
|
||||
pos += sizeof(uint32_t) + size;
|
||||
}
|
||||
|
||||
BufferView res;
|
||||
res.data_ = std::pair<char*, size_t>{pos, 0};
|
||||
return res;
|
||||
}
|
||||
|
||||
ViewType
|
||||
operator[](const int i) const {
|
||||
if (i < 0 || i > num_rows_) {
|
||||
PanicInfo(ErrorCode::OutOfRange, "index out of range");
|
||||
}
|
||||
size_t batch_id = i / block_size_;
|
||||
size_t offset = i % block_size_;
|
||||
|
||||
// located in batch start location
|
||||
char* pos = data_ + indices_[batch_id];
|
||||
for (size_t j = 0; j < offset; j++) {
|
||||
uint32_t size;
|
||||
size = *reinterpret_cast<uint32_t*>(pos);
|
||||
pos += sizeof(uint32_t) + size;
|
||||
}
|
||||
|
||||
uint32_t size;
|
||||
size = *reinterpret_cast<uint32_t*>(pos);
|
||||
return ViewType(pos + sizeof(uint32_t), size);
|
||||
}
|
||||
|
||||
int
|
||||
binary_search_string(std::string_view target) {
|
||||
int left = 0;
|
||||
int right = num_rows_ - 1; // `right` should be num_rows_ - 1
|
||||
int result =
|
||||
-1; // Initialize result to store the first occurrence index
|
||||
|
||||
while (left <= right) {
|
||||
int mid = left + (right - left) / 2;
|
||||
std::string_view midString = this->RawAt(mid);
|
||||
if (midString == target) {
|
||||
result = mid; // Store the index of match
|
||||
right = mid - 1; // Continue searching in the left half
|
||||
} else if (midString < target) {
|
||||
// midString < target
|
||||
left = mid + 1;
|
||||
} else {
|
||||
// midString > target
|
||||
right = mid - 1;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string_view
|
||||
RawAt(const size_t i) const {
|
||||
return std::string_view((*this)[i]);
|
||||
}
|
||||
|
||||
void
|
||||
Append(FieldDataPtr chunk) {
|
||||
for (auto i = 0; i < chunk->get_num_rows(); i++) {
|
||||
indices_.emplace_back(data_size_);
|
||||
auto data = static_cast<const T*>(chunk->RawValue(i));
|
||||
data_size_ += sizeof(uint32_t) + data->size();
|
||||
if (nullable_) {
|
||||
valid_data_.push_back(chunk->is_valid(i));
|
||||
}
|
||||
}
|
||||
load_buf_.emplace(std::move(chunk));
|
||||
}
|
||||
|
||||
void
|
||||
Seal(std::vector<uint64_t> indices = {}) {
|
||||
if (!indices.empty()) {
|
||||
indices_ = std::move(indices);
|
||||
}
|
||||
|
||||
num_rows_ = indices_.size();
|
||||
|
||||
// for variable length column in memory mode only
|
||||
if (data_ == nullptr) {
|
||||
size_t total_data_size = data_size_;
|
||||
data_size_ = 0;
|
||||
ExpandData(total_data_size);
|
||||
|
||||
while (!load_buf_.empty()) {
|
||||
auto chunk = std::move(load_buf_.front());
|
||||
load_buf_.pop();
|
||||
|
||||
// data_ as: |size|data|size|data......
|
||||
for (auto i = 0; i < chunk->get_num_rows(); i++) {
|
||||
auto current_size = (uint32_t)chunk->DataSize(i);
|
||||
std::memcpy(
|
||||
data_ + data_size_, ¤t_size, sizeof(uint32_t));
|
||||
data_size_ += sizeof(uint32_t);
|
||||
auto data = static_cast<const T*>(chunk->RawValue(i));
|
||||
std::memcpy(
|
||||
data_ + data_size_, data->c_str(), data->size());
|
||||
data_size_ += data->size();
|
||||
}
|
||||
if (nullable_) {
|
||||
for (size_t i = 0; i < chunk->get_num_rows(); i++) {
|
||||
valid_data_.push_back(chunk->is_valid(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
shrink_indice();
|
||||
}
|
||||
|
||||
protected:
|
||||
void
|
||||
shrink_indice() {
|
||||
std::vector<uint64_t> tmp_indices;
|
||||
tmp_indices.reserve((indices_.size() + block_size_ - 1) / block_size_);
|
||||
|
||||
for (size_t i = 0; i < indices_.size();) {
|
||||
tmp_indices.push_back(indices_[i]);
|
||||
i += block_size_;
|
||||
}
|
||||
|
||||
indices_.swap(tmp_indices);
|
||||
}
|
||||
|
||||
private:
|
||||
// loading states
|
||||
std::queue<FieldDataPtr> load_buf_{};
|
||||
// raw data index, record indices located 0, block_size_, 2 * block_size_, 3 * block_size_
|
||||
size_t block_size_;
|
||||
std::vector<uint64_t> indices_{};
|
||||
};
|
||||
|
||||
class SingleChunkArrayColumn : public SingleChunkColumnBase {
|
||||
public:
|
||||
// MAP_WITH_ANONYMOUS ctor
|
||||
SingleChunkArrayColumn(size_t reserve_rows, const FieldMeta& field_meta)
|
||||
: SingleChunkColumnBase(reserve_rows, field_meta),
|
||||
element_type_(field_meta.get_element_type()) {
|
||||
}
|
||||
|
||||
// MAP_WITH_FILE ctor
|
||||
SingleChunkArrayColumn(const File& file,
|
||||
size_t size,
|
||||
const FieldMeta& field_meta)
|
||||
: SingleChunkColumnBase(file, size, field_meta),
|
||||
element_type_(field_meta.get_element_type()) {
|
||||
}
|
||||
|
||||
~SingleChunkArrayColumn() override = default;
|
||||
|
||||
SpanBase
|
||||
Span() const override {
|
||||
return SpanBase(views_.data(),
|
||||
valid_data_.data(),
|
||||
views_.size(),
|
||||
sizeof(ArrayView));
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::vector<ArrayView>&
|
||||
Views() const {
|
||||
return views_;
|
||||
}
|
||||
|
||||
ArrayView
|
||||
operator[](const int i) const {
|
||||
return views_[i];
|
||||
}
|
||||
|
||||
ScalarArray
|
||||
RawAt(const int i) const {
|
||||
return views_[i].output_data();
|
||||
}
|
||||
|
||||
void
|
||||
Append(const Array& array, bool valid_data = false) {
|
||||
indices_.emplace_back(data_size_);
|
||||
lens_.emplace_back(array.length());
|
||||
if (IsVariableDataType(array.get_element_type())) {
|
||||
element_indices_.emplace_back(
|
||||
array.get_offsets_data(),
|
||||
array.get_offsets_data() + array.length());
|
||||
} else {
|
||||
element_indices_.emplace_back();
|
||||
}
|
||||
|
||||
if (nullable_) {
|
||||
return SingleChunkColumnBase::Append(
|
||||
static_cast<const char*>(array.data()),
|
||||
valid_data,
|
||||
array.byte_size());
|
||||
}
|
||||
SingleChunkColumnBase::Append(static_cast<const char*>(array.data()),
|
||||
array.byte_size());
|
||||
}
|
||||
|
||||
void
|
||||
Seal(std::vector<uint64_t>&& indices = {},
|
||||
std::vector<std::vector<uint32_t>>&& element_indices = {}) {
|
||||
if (!indices.empty()) {
|
||||
indices_ = std::move(indices);
|
||||
element_indices_ = std::move(element_indices);
|
||||
lens_.reserve(element_indices_.size());
|
||||
for (auto& ele_idices : element_indices_) {
|
||||
lens_.emplace_back(ele_idices.size());
|
||||
}
|
||||
}
|
||||
num_rows_ = indices_.size();
|
||||
ConstructViews();
|
||||
}
|
||||
|
||||
std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
ArrayViews() const override {
|
||||
return {Views(), valid_data_};
|
||||
}
|
||||
|
||||
protected:
|
||||
void
|
||||
ConstructViews() {
|
||||
views_.reserve(indices_.size());
|
||||
auto last = indices_.size() - 1;
|
||||
for (size_t i = 0; i < last; i++) {
|
||||
views_.emplace_back(data_ + indices_[i],
|
||||
lens_[i],
|
||||
indices_[i + 1] - indices_[i],
|
||||
element_type_,
|
||||
element_indices_[i].data());
|
||||
}
|
||||
views_.emplace_back(data_ + indices_.back(),
|
||||
lens_[last],
|
||||
data_size_ - indices_.back(),
|
||||
element_type_,
|
||||
element_indices_[last].data());
|
||||
lens_.clear();
|
||||
indices_.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<uint64_t> indices_{};
|
||||
std::vector<std::vector<uint32_t>> element_indices_{};
|
||||
std::vector<int> lens_{};
|
||||
// Compatible with current Span type
|
||||
std::vector<ArrayView> views_{};
|
||||
DataType element_type_;
|
||||
};
|
||||
} // namespace milvus
|
||||
@ -27,52 +27,17 @@ namespace milvus {
|
||||
|
||||
struct FieldDataInfo {
|
||||
FieldDataInfo() {
|
||||
channel = std::make_shared<FieldDataChannel>();
|
||||
arrow_reader_channel = std::make_shared<ArrowReaderChannel>();
|
||||
}
|
||||
|
||||
FieldDataInfo(int64_t field_id,
|
||||
size_t row_count,
|
||||
std::string mmap_dir_path = "",
|
||||
bool growing = true)
|
||||
std::string mmap_dir_path = "")
|
||||
: field_id(field_id),
|
||||
row_count(row_count),
|
||||
mmap_dir_path(std::move(mmap_dir_path)) {
|
||||
if (growing) {
|
||||
channel = std::make_shared<FieldDataChannel>();
|
||||
} else {
|
||||
arrow_reader_channel = std::make_shared<ArrowReaderChannel>();
|
||||
}
|
||||
}
|
||||
|
||||
FieldDataInfo(int64_t field_id,
|
||||
size_t row_count,
|
||||
FieldDataChannelPtr channel)
|
||||
: field_id(field_id),
|
||||
row_count(row_count),
|
||||
channel(std::move(channel)) {
|
||||
}
|
||||
|
||||
FieldDataInfo(int64_t field_id,
|
||||
size_t row_count,
|
||||
std::string mmap_dir_path,
|
||||
FieldDataChannelPtr channel)
|
||||
: field_id(field_id),
|
||||
row_count(row_count),
|
||||
mmap_dir_path(std::move(mmap_dir_path)),
|
||||
channel(std::move(channel)) {
|
||||
}
|
||||
|
||||
FieldDataInfo(int64_t field_id,
|
||||
size_t row_count,
|
||||
const std::vector<FieldDataPtr>& batch)
|
||||
: field_id(field_id), row_count(row_count) {
|
||||
channel = std::make_shared<FieldDataChannel>();
|
||||
for (auto& data : batch) {
|
||||
channel->push(data);
|
||||
}
|
||||
channel->close();
|
||||
}
|
||||
|
||||
FieldDataInfo(
|
||||
int64_t field_id,
|
||||
@ -86,24 +51,9 @@ struct FieldDataInfo {
|
||||
arrow_reader_channel->close();
|
||||
}
|
||||
|
||||
FieldDataInfo(int64_t field_id,
|
||||
size_t row_count,
|
||||
std::string mmap_dir_path,
|
||||
const std::vector<FieldDataPtr>& batch)
|
||||
: field_id(field_id),
|
||||
row_count(row_count),
|
||||
mmap_dir_path(std::move(mmap_dir_path)) {
|
||||
channel = std::make_shared<FieldDataChannel>();
|
||||
for (auto& data : batch) {
|
||||
channel->push(data);
|
||||
}
|
||||
channel->close();
|
||||
}
|
||||
|
||||
int64_t field_id;
|
||||
size_t row_count;
|
||||
std::string mmap_dir_path;
|
||||
FieldDataChannelPtr channel;
|
||||
std::shared_ptr<ArrowReaderChannel> arrow_reader_channel;
|
||||
};
|
||||
} // namespace milvus
|
||||
|
||||
@ -235,8 +235,8 @@ ChunkedSegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& load_info) {
|
||||
std::stol(b.substr(b.find_last_of('/') + 1));
|
||||
});
|
||||
|
||||
auto field_data_info = FieldDataInfo(
|
||||
field_id.get(), num_rows, load_info.mmap_dir_path, false);
|
||||
auto field_data_info =
|
||||
FieldDataInfo(field_id.get(), num_rows, load_info.mmap_dir_path);
|
||||
LOG_INFO("segment {} loads field {} with num_rows {}",
|
||||
this->get_segment_id(),
|
||||
field_id.get(),
|
||||
|
||||
@ -437,4 +437,20 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
||||
json_key_indexes_;
|
||||
};
|
||||
|
||||
inline SegmentSealedUPtr
|
||||
CreateSealedSegment(
|
||||
SchemaPtr schema,
|
||||
IndexMetaPtr index_meta = nullptr,
|
||||
int64_t segment_id = 0,
|
||||
const SegcoreConfig& segcore_config = SegcoreConfig::default_config(),
|
||||
bool TEST_skip_index_for_retrieve = false,
|
||||
bool is_sorted_by_pk = false) {
|
||||
return std::make_unique<ChunkedSegmentSealedImpl>(
|
||||
schema,
|
||||
index_meta,
|
||||
segcore_config,
|
||||
segment_id,
|
||||
TEST_skip_index_for_retrieve,
|
||||
is_sorted_by_pk);
|
||||
}
|
||||
} // namespace milvus::segcore
|
||||
|
||||
@ -383,39 +383,6 @@ struct InsertRecord {
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
insert_pks(milvus::DataType data_type,
|
||||
const std::shared_ptr<SingleChunkColumnBase>& data) {
|
||||
std::lock_guard lck(shared_mutex_);
|
||||
int64_t offset = 0;
|
||||
switch (data_type) {
|
||||
case DataType::INT64: {
|
||||
auto column =
|
||||
std::dynamic_pointer_cast<SingleChunkColumn>(data);
|
||||
auto pks = reinterpret_cast<const int64_t*>(column->Data(0));
|
||||
for (int i = 0; i < column->NumRows(); ++i) {
|
||||
pk2offset_->insert(pks[i], offset++);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto column = std::dynamic_pointer_cast<
|
||||
SingleChunkVariableColumn<std::string>>(data);
|
||||
auto pks = column->Views();
|
||||
|
||||
for (int i = 0; i < column->NumRows(); ++i) {
|
||||
pk2offset_->insert(std::string(pks[i]), offset++);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo(DataTypeInvalid,
|
||||
fmt::format("unsupported primary key data type",
|
||||
data_type));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
insert_pks(const std::vector<FieldDataPtr>& field_datas) {
|
||||
std::lock_guard lck(shared_mutex_);
|
||||
|
||||
@ -138,14 +138,13 @@ SegmentChunkReader::GetChunkDataAccessor<std::string>(
|
||||
current_chunk_size =
|
||||
segment_->chunk_size(field_id, current_chunk_id);
|
||||
}
|
||||
auto chunk_data = chunk_info.first;
|
||||
auto chunk_valid_data = chunk_info.second;
|
||||
auto& chunk_data = chunk_info.first;
|
||||
auto& chunk_valid_data = chunk_info.second;
|
||||
if (current_chunk_pos < chunk_valid_data.size() &&
|
||||
!chunk_valid_data[current_chunk_pos]) {
|
||||
current_chunk_pos++;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return std::string(chunk_data[current_chunk_pos++]);
|
||||
};
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,461 +0,0 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <tbb/concurrent_priority_queue.h>
|
||||
#include <tbb/concurrent_vector.h>
|
||||
|
||||
#include <deque>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ConcurrentVector.h"
|
||||
#include "DeletedRecord.h"
|
||||
#include "SealedIndexingRecord.h"
|
||||
#include "SegmentSealed.h"
|
||||
#include "TimestampIndex.h"
|
||||
#include "common/EasyAssert.h"
|
||||
#include "google/protobuf/message_lite.h"
|
||||
#include "mmap/Column.h"
|
||||
#include "index/ScalarIndex.h"
|
||||
#include "segcore/ChunkedSegmentSealedImpl.h"
|
||||
#include "sys/mman.h"
|
||||
#include "common/Types.h"
|
||||
#include "common/IndexMeta.h"
|
||||
#include "index/TextMatchIndex.h"
|
||||
#include "index/JsonKeyStatsInvertedIndex.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
class SegmentSealedImpl : public SegmentSealed {
|
||||
public:
|
||||
explicit SegmentSealedImpl(SchemaPtr schema,
|
||||
IndexMetaPtr index_meta,
|
||||
const SegcoreConfig& segcore_config,
|
||||
int64_t segment_id,
|
||||
bool TEST_skip_index_for_retrieve = false,
|
||||
bool is_sorted_by_pk = false);
|
||||
~SegmentSealedImpl() override;
|
||||
void
|
||||
LoadIndex(const LoadIndexInfo& info) override;
|
||||
void
|
||||
LoadFieldData(const LoadFieldDataInfo& info) override;
|
||||
void
|
||||
LoadDeletedRecord(const LoadDeletedRecordInfo& info) override;
|
||||
void
|
||||
LoadSegmentMeta(
|
||||
const milvus::proto::segcore::LoadSegmentMeta& segment_meta) override;
|
||||
void
|
||||
DropIndex(const FieldId field_id) override;
|
||||
void
|
||||
DropFieldData(const FieldId field_id) override;
|
||||
bool
|
||||
HasIndex(FieldId field_id) const override;
|
||||
bool
|
||||
HasFieldData(FieldId field_id) const override;
|
||||
|
||||
bool
|
||||
Contain(const PkType& pk) const override {
|
||||
return insert_record_.contain(pk);
|
||||
}
|
||||
|
||||
void
|
||||
LoadFieldData(FieldId field_id, FieldDataInfo& data) override;
|
||||
void
|
||||
MapFieldData(const FieldId field_id, FieldDataInfo& data) override;
|
||||
void
|
||||
AddFieldDataInfoForSealed(
|
||||
const LoadFieldDataInfo& field_data_info) override;
|
||||
|
||||
int64_t
|
||||
get_segment_id() const override {
|
||||
return id_;
|
||||
}
|
||||
|
||||
bool
|
||||
HasRawData(int64_t field_id) const override;
|
||||
|
||||
DataType
|
||||
GetFieldDataType(FieldId fieldId) const override;
|
||||
|
||||
void
|
||||
RemoveFieldFile(const FieldId field_id) override;
|
||||
|
||||
void
|
||||
CreateTextIndex(FieldId field_id) override;
|
||||
|
||||
void
|
||||
LoadTextIndex(FieldId field_id,
|
||||
std::unique_ptr<index::TextMatchIndex> index) override;
|
||||
|
||||
void
|
||||
LoadJsonKeyIndex(
|
||||
FieldId field_id,
|
||||
std::unique_ptr<index::JsonKeyStatsInvertedIndex> index) override;
|
||||
|
||||
index::JsonKeyStatsInvertedIndex*
|
||||
GetJsonKeyIndex(FieldId field_id) const override;
|
||||
|
||||
std::pair<std::string_view, bool>
|
||||
GetJsonData(FieldId field_id, size_t offset) const override;
|
||||
|
||||
public:
|
||||
size_t
|
||||
GetMemoryUsageInBytes() const override {
|
||||
return stats_.mem_size.load() + deleted_record_.mem_size();
|
||||
}
|
||||
|
||||
int64_t
|
||||
get_row_count() const override;
|
||||
|
||||
int64_t
|
||||
get_deleted_count() const override;
|
||||
|
||||
const Schema&
|
||||
get_schema() const override;
|
||||
|
||||
std::vector<SegOffset>
|
||||
search_pk(const PkType& pk, Timestamp timestamp) const override;
|
||||
|
||||
std::vector<SegOffset>
|
||||
search_pk(const PkType& pk, int64_t insert_barrier) const override;
|
||||
|
||||
template <typename Condition>
|
||||
std::vector<SegOffset>
|
||||
search_sorted_pk(const PkType& pk, Condition condition) const;
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
get_vector(FieldId field_id,
|
||||
const int64_t* ids,
|
||||
int64_t count) const override;
|
||||
|
||||
bool
|
||||
is_nullable(FieldId field_id) const override {
|
||||
auto it = fields_.find(field_id);
|
||||
AssertInfo(it != fields_.end(),
|
||||
"Cannot find field with field_id: " +
|
||||
std::to_string(field_id.get()));
|
||||
return it->second->IsNullable();
|
||||
};
|
||||
|
||||
InsertRecord<true>&
|
||||
get_insert_record() override {
|
||||
return insert_record_;
|
||||
}
|
||||
|
||||
public:
|
||||
int64_t
|
||||
num_chunk_index(FieldId field_id) const override;
|
||||
|
||||
// count of chunk that has raw data
|
||||
int64_t
|
||||
num_chunk_data(FieldId field_id) const override;
|
||||
|
||||
int64_t
|
||||
num_chunk(FieldId field_id) const override;
|
||||
|
||||
// return size_per_chunk for each chunk, renaming against confusion
|
||||
int64_t
|
||||
size_per_chunk() const override;
|
||||
|
||||
int64_t
|
||||
chunk_size(FieldId field_id, int64_t chunk_id) const override {
|
||||
PanicInfo(ErrorCode::Unsupported, "Not implemented");
|
||||
}
|
||||
bool
|
||||
is_chunked() const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::pair<int64_t, int64_t>
|
||||
get_chunk_by_offset(FieldId field_id, int64_t offset) const override {
|
||||
if (fields_.find(field_id) == fields_.end()) {
|
||||
PanicInfo(
|
||||
ErrorCode::FieldIDInvalid,
|
||||
"Failed to get chunk offset towards a non-existing field:{}",
|
||||
field_id.get());
|
||||
}
|
||||
// for sealed segment, chunk id is always zero and input offset is the target offset
|
||||
return std::make_pair(0, offset);
|
||||
}
|
||||
|
||||
int64_t
|
||||
num_rows_until_chunk(FieldId field_id, int64_t chunk_id) const override {
|
||||
PanicInfo(ErrorCode::Unsupported, "Not implemented");
|
||||
}
|
||||
std::string
|
||||
debug() const override;
|
||||
|
||||
SegcoreError
|
||||
Delete(int64_t reserved_offset,
|
||||
int64_t size,
|
||||
const IdArray* pks,
|
||||
const Timestamp* timestamps) override;
|
||||
|
||||
std::pair<std::vector<OffsetMap::OffsetType>, bool>
|
||||
find_first(int64_t limit, const BitsetType& bitset) const override;
|
||||
|
||||
// Calculate: output[i] = Vec[seg_offset[i]]
|
||||
// where Vec is determined from field_offset
|
||||
std::unique_ptr<DataArray>
|
||||
bulk_subscript(FieldId field_id,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count) const override;
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
bulk_subscript(
|
||||
FieldId field_id,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count,
|
||||
const std::vector<std::string>& dynamic_field_names) const override;
|
||||
|
||||
bool
|
||||
is_mmap_field(FieldId id) const override;
|
||||
|
||||
void
|
||||
ClearData() override;
|
||||
|
||||
bool
|
||||
is_field_exist(FieldId field_id) const override {
|
||||
return schema_->get_fields().find(field_id) !=
|
||||
schema_->get_fields().end();
|
||||
}
|
||||
|
||||
protected:
|
||||
// blob and row_count
|
||||
SpanBase
|
||||
chunk_data_impl(FieldId field_id, int64_t chunk_id) const override;
|
||||
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
chunk_string_view_impl(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
std::optional<std::pair<int64_t, int64_t>> offset_len) const override;
|
||||
|
||||
std::pair<std::vector<ArrayView>, FixedVector<bool>>
|
||||
chunk_array_view_impl(
|
||||
FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
std::optional<std::pair<int64_t, int64_t>> offset_len) const override;
|
||||
|
||||
std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
chunk_view_by_offsets(FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
const FixedVector<int32_t>& offsets) const override;
|
||||
|
||||
std::pair<BufferView, FixedVector<bool>>
|
||||
get_chunk_buffer(FieldId field_id,
|
||||
int64_t chunk_id,
|
||||
int64_t start_offset,
|
||||
int64_t length) const override;
|
||||
|
||||
const index::IndexBase*
|
||||
chunk_index_impl(FieldId field_id, int64_t chunk_id) const override;
|
||||
|
||||
// Calculate: output[i] = Vec[seg_offset[i]],
|
||||
// where Vec is determined from field_offset
|
||||
void
|
||||
bulk_subscript(SystemFieldType system_type,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count,
|
||||
void* output) const override;
|
||||
|
||||
void
|
||||
check_search(const query::Plan* plan) const override;
|
||||
|
||||
int64_t
|
||||
get_active_count(Timestamp ts) const override;
|
||||
|
||||
const ConcurrentVector<Timestamp>&
|
||||
get_timestamps() const override {
|
||||
return insert_record_.timestamps_;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename S, typename T = S>
|
||||
static void
|
||||
bulk_subscript_impl(const void* src_raw,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count,
|
||||
T* dst_raw);
|
||||
|
||||
template <typename S, typename T = S>
|
||||
static void
|
||||
bulk_subscript_impl(const SingleChunkColumnBase* field,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count,
|
||||
void* dst_raw);
|
||||
|
||||
template <typename S, typename T = S>
|
||||
static void
|
||||
bulk_subscript_ptr_impl(const SingleChunkColumnBase* field,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count,
|
||||
google::protobuf::RepeatedPtrField<T>* dst_raw);
|
||||
|
||||
template <typename T>
|
||||
static void
|
||||
bulk_subscript_array_impl(const SingleChunkColumnBase* column,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count,
|
||||
google::protobuf::RepeatedPtrField<T>* dst);
|
||||
|
||||
static void
|
||||
bulk_subscript_impl(int64_t element_sizeof,
|
||||
const void* src_raw,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count,
|
||||
void* dst_raw);
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
fill_with_empty(FieldId field_id, int64_t count) const;
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
get_raw_data(FieldId field_id,
|
||||
const FieldMeta& field_meta,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count) const;
|
||||
|
||||
void
|
||||
update_row_count(int64_t row_count) {
|
||||
// if (row_count_opt_.has_value()) {
|
||||
// AssertInfo(row_count_opt_.value() == row_count, "load data has different row count from other columns");
|
||||
// } else {
|
||||
num_rows_ = row_count;
|
||||
// }
|
||||
deleted_record_.set_sealed_row_count(row_count);
|
||||
}
|
||||
|
||||
void
|
||||
mask_with_timestamps(BitsetTypeView& bitset_chunk,
|
||||
Timestamp timestamp) const override;
|
||||
|
||||
void
|
||||
vector_search(SearchInfo& search_info,
|
||||
const void* query_data,
|
||||
int64_t query_count,
|
||||
Timestamp timestamp,
|
||||
const BitsetView& bitset,
|
||||
SearchResult& output) const override;
|
||||
|
||||
void
|
||||
mask_with_delete(BitsetTypeView& bitset,
|
||||
int64_t ins_barrier,
|
||||
Timestamp timestamp) const override;
|
||||
|
||||
bool
|
||||
is_system_field_ready() const {
|
||||
return system_ready_count_ == 2;
|
||||
}
|
||||
|
||||
std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
|
||||
search_ids(const IdArray& id_array, Timestamp timestamp) const override;
|
||||
|
||||
std::tuple<std::string, int64_t>
|
||||
GetFieldDataPath(FieldId field_id, int64_t offset) const;
|
||||
|
||||
void
|
||||
LoadVecIndex(const LoadIndexInfo& info);
|
||||
|
||||
void
|
||||
LoadScalarIndex(const LoadIndexInfo& info);
|
||||
|
||||
void
|
||||
WarmupChunkCache(const FieldId field_id, bool mmap_enabled) override;
|
||||
|
||||
bool
|
||||
generate_interim_index(const FieldId field_id);
|
||||
|
||||
private:
|
||||
// mmap descriptor, used in chunk cache
|
||||
storage::MmapChunkDescriptorPtr mmap_descriptor_ = nullptr;
|
||||
// segment loading state
|
||||
BitsetType field_data_ready_bitset_;
|
||||
BitsetType index_ready_bitset_;
|
||||
BitsetType binlog_index_bitset_;
|
||||
std::atomic<int> system_ready_count_ = 0;
|
||||
// segment data
|
||||
|
||||
// TODO: generate index for scalar
|
||||
std::optional<int64_t> num_rows_;
|
||||
|
||||
// scalar field index
|
||||
std::unordered_map<FieldId, index::IndexBasePtr> scalar_indexings_;
|
||||
// vector field index
|
||||
SealedIndexingRecord vector_indexings_;
|
||||
|
||||
// inserted fields data and row_ids, timestamps
|
||||
InsertRecord<true> insert_record_;
|
||||
|
||||
// deleted pks
|
||||
mutable DeletedRecord<true> deleted_record_;
|
||||
|
||||
LoadFieldDataInfo field_data_info_;
|
||||
|
||||
SchemaPtr schema_;
|
||||
int64_t id_;
|
||||
std::unordered_map<FieldId, std::shared_ptr<SingleChunkColumnBase>> fields_;
|
||||
std::unordered_set<FieldId> mmap_fields_;
|
||||
|
||||
// only useful in binlog
|
||||
IndexMetaPtr col_index_meta_;
|
||||
SegcoreConfig segcore_config_;
|
||||
std::unordered_map<FieldId, std::unique_ptr<VecIndexConfig>>
|
||||
vec_binlog_config_;
|
||||
|
||||
SegmentStats stats_{};
|
||||
|
||||
// for sparse vector unit test only! Once a type of sparse index that
|
||||
// doesn't has raw data is added, this should be removed.
|
||||
bool TEST_skip_index_for_retrieve_ = false;
|
||||
|
||||
// whether the segment is sorted by the pk
|
||||
bool is_sorted_by_pk_ = false;
|
||||
|
||||
// used for json expr optimization
|
||||
std::unordered_map<FieldId,
|
||||
std::unique_ptr<index::JsonKeyStatsInvertedIndex>>
|
||||
json_key_indexes_;
|
||||
};
|
||||
|
||||
inline SegmentSealedUPtr
|
||||
CreateSealedSegment(
|
||||
SchemaPtr schema,
|
||||
IndexMetaPtr index_meta = nullptr,
|
||||
int64_t segment_id = 0,
|
||||
const SegcoreConfig& segcore_config = SegcoreConfig::default_config(),
|
||||
bool TEST_skip_index_for_retrieve = false,
|
||||
bool is_sorted_by_pk = false,
|
||||
bool is_multi_chunk = false) {
|
||||
if (!is_multi_chunk) {
|
||||
return std::make_unique<SegmentSealedImpl>(schema,
|
||||
index_meta,
|
||||
segcore_config,
|
||||
segment_id,
|
||||
TEST_skip_index_for_retrieve,
|
||||
is_sorted_by_pk);
|
||||
} else {
|
||||
return std::make_unique<ChunkedSegmentSealedImpl>(
|
||||
schema,
|
||||
index_meta,
|
||||
segcore_config,
|
||||
segment_id,
|
||||
TEST_skip_index_for_retrieve,
|
||||
is_sorted_by_pk);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace milvus::segcore
|
||||
@ -28,12 +28,14 @@
|
||||
#include "segcore/Collection.h"
|
||||
#include "segcore/SegcoreConfig.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
#include "segcore/Utils.h"
|
||||
#include "storage/Event.h"
|
||||
#include "storage/Util.h"
|
||||
#include "futures/Future.h"
|
||||
#include "futures/Executor.h"
|
||||
#include "segcore/SegmentSealed.h"
|
||||
#include "segcore/ChunkedSegmentSealedImpl.h"
|
||||
#include "mmap/Types.h"
|
||||
|
||||
////////////////////////////// common interfaces //////////////////////////////
|
||||
CStatus
|
||||
@ -61,18 +63,7 @@ NewSegment(CCollection collection,
|
||||
segment_id,
|
||||
milvus::segcore::SegcoreConfig::default_config(),
|
||||
false,
|
||||
is_sorted_by_pk,
|
||||
false);
|
||||
break;
|
||||
case ChunkedSealed:
|
||||
segment = milvus::segcore::CreateSealedSegment(
|
||||
col->get_schema(),
|
||||
col->get_index_meta(),
|
||||
segment_id,
|
||||
milvus::segcore::SegcoreConfig::default_config(),
|
||||
false,
|
||||
is_sorted_by_pk,
|
||||
true);
|
||||
is_sorted_by_pk);
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -389,12 +380,13 @@ LoadFieldRawData(CSegmentInterface c_segment,
|
||||
auto field_data =
|
||||
milvus::storage::CreateFieldData(data_type, false, dim);
|
||||
field_data->FillFieldData(data, row_count);
|
||||
milvus::FieldDataChannelPtr channel =
|
||||
std::make_shared<milvus::FieldDataChannel>();
|
||||
channel->push(field_data);
|
||||
channel->close();
|
||||
auto field_data_info = milvus::FieldDataInfo(
|
||||
field_id, static_cast<size_t>(row_count), channel);
|
||||
auto arrow_data_wrapper =
|
||||
milvus::storage::ConvertFieldDataToArrowDataWrapper(field_data);
|
||||
auto field_data_info = milvus::FieldDataInfo{
|
||||
field_id,
|
||||
static_cast<size_t>(row_count),
|
||||
std::vector<std::shared_ptr<milvus::ArrowDataWrapper>>{
|
||||
arrow_data_wrapper}};
|
||||
segment->LoadFieldData(milvus::FieldId(field_id), field_data_info);
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
|
||||
@ -150,94 +150,6 @@ ChunkCache::Read(const std::string& filepath,
|
||||
return column;
|
||||
}
|
||||
|
||||
std::shared_ptr<ColumnBase>
|
||||
ChunkCache::Read(const std::string& filepath,
|
||||
const MmapChunkDescriptorPtr& descriptor,
|
||||
const FieldMeta& field_meta,
|
||||
bool mmap_enabled,
|
||||
bool mmap_rss_not_need) {
|
||||
// use rlock to get future
|
||||
{
|
||||
std::shared_lock lck(mutex_);
|
||||
auto it = columns_.find(filepath);
|
||||
if (it != columns_.end()) {
|
||||
lck.unlock();
|
||||
auto result = it->second.second.get();
|
||||
AssertInfo(result, "unexpected null column, file={}", filepath);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// lock for mutation
|
||||
std::unique_lock lck(mutex_);
|
||||
// double check no-futurn
|
||||
auto it = columns_.find(filepath);
|
||||
if (it != columns_.end()) {
|
||||
lck.unlock();
|
||||
auto result = it->second.second.get();
|
||||
AssertInfo(result, "unexpected null column, file={}", filepath);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::promise<std::shared_ptr<ColumnBase>> p;
|
||||
std::shared_future<std::shared_ptr<ColumnBase>> f = p.get_future();
|
||||
columns_.emplace(filepath, std::make_pair(std::move(p), f));
|
||||
lck.unlock();
|
||||
|
||||
// release lock and perform download and decode
|
||||
// other thread request same path shall get the future.
|
||||
std::unique_ptr<DataCodec> field_data;
|
||||
std::shared_ptr<ColumnBase> column;
|
||||
bool allocate_success = false;
|
||||
ErrorCode err_code = Success;
|
||||
std::string err_msg = "";
|
||||
try {
|
||||
field_data = DownloadAndDecodeRemoteFile(cm_.get(), filepath);
|
||||
column = ConvertToColumn(
|
||||
field_data->GetFieldData(), descriptor, field_meta, mmap_enabled);
|
||||
if (mmap_enabled && mmap_rss_not_need) {
|
||||
auto ok = madvise(reinterpret_cast<void*>(
|
||||
const_cast<char*>(column->MmappedData())),
|
||||
column->DataByteSize(),
|
||||
ReadAheadPolicy_Map["dontneed"]);
|
||||
if (ok != 0) {
|
||||
LOG_WARN(
|
||||
"failed to madvise to the data file {}, addr {}, size {}, "
|
||||
"err: "
|
||||
"{}",
|
||||
filepath,
|
||||
static_cast<const void*>(column->MmappedData()),
|
||||
column->DataByteSize(),
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
allocate_success = true;
|
||||
} catch (const SegcoreError& e) {
|
||||
err_code = e.get_error_code();
|
||||
err_msg = fmt::format("failed to read for chunkCache, seg_core_err:{}",
|
||||
e.what());
|
||||
}
|
||||
std::unique_lock mmap_lck(mutex_);
|
||||
it = columns_.find(filepath);
|
||||
if (it != columns_.end()) {
|
||||
// check pair exists then set value
|
||||
it->second.first.set_value(column);
|
||||
if (allocate_success) {
|
||||
AssertInfo(column, "unexpected null column, file={}", filepath);
|
||||
}
|
||||
} else {
|
||||
PanicInfo(UnexpectedError,
|
||||
"Wrong code, the thread to download for "
|
||||
"cache should get the "
|
||||
"target entry");
|
||||
}
|
||||
if (err_code != Success) {
|
||||
columns_.erase(filepath);
|
||||
throw SegcoreError(err_code, err_msg);
|
||||
}
|
||||
return column;
|
||||
}
|
||||
|
||||
void
|
||||
ChunkCache::Remove(const std::string& filepath) {
|
||||
std::unique_lock lck(mutex_);
|
||||
@ -267,42 +179,6 @@ ChunkCache::Prefetch(const std::string& filepath) {
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<ColumnBase>
|
||||
ChunkCache::ConvertToColumn(const FieldDataPtr& field_data,
|
||||
const MmapChunkDescriptorPtr& descriptor,
|
||||
const FieldMeta& field_meta,
|
||||
bool mmap_enabled) {
|
||||
auto data_type = field_data->get_data_type();
|
||||
|
||||
std::shared_ptr<ColumnBase> column{};
|
||||
|
||||
if (IsSparseFloatVectorDataType(data_type)) {
|
||||
if (mmap_enabled) {
|
||||
column = std::make_shared<SingleChunkSparseFloatColumn>(mcm_,
|
||||
descriptor);
|
||||
} else {
|
||||
column = std::make_shared<SingleChunkSparseFloatColumn>(field_meta);
|
||||
}
|
||||
} else if (IsVariableDataType(data_type)) {
|
||||
AssertInfo(
|
||||
false, "TODO: unimplemented for variable data type: {}", data_type);
|
||||
} else {
|
||||
if (mmap_enabled) {
|
||||
column =
|
||||
std::make_shared<SingleChunkColumn>(field_data->Size(),
|
||||
data_type,
|
||||
mcm_,
|
||||
descriptor,
|
||||
field_data->IsNullable());
|
||||
} else {
|
||||
column = std::make_shared<SingleChunkColumn>(
|
||||
field_data->get_num_rows(), field_meta);
|
||||
}
|
||||
}
|
||||
column->AppendBatch(field_data);
|
||||
return column;
|
||||
}
|
||||
|
||||
// TODO(sunby): use mmap chunk manager to create chunk
|
||||
std::string
|
||||
ChunkCache::CachePath(const std::string& filepath) {
|
||||
|
||||
@ -52,13 +52,6 @@ class ChunkCache {
|
||||
bool mmap_enabled,
|
||||
bool mmap_rss_not_need = false);
|
||||
|
||||
std::shared_ptr<ColumnBase>
|
||||
Read(const std::string& filepath,
|
||||
const MmapChunkDescriptorPtr& descriptor,
|
||||
const FieldMeta& field_meta,
|
||||
bool mmap_enabled,
|
||||
bool mmap_rss_not_need = false);
|
||||
|
||||
void
|
||||
Remove(const std::string& filepath);
|
||||
|
||||
@ -69,12 +62,6 @@ class ChunkCache {
|
||||
std::string
|
||||
CachePath(const std::string& filepath);
|
||||
|
||||
std::shared_ptr<ColumnBase>
|
||||
ConvertToColumn(const FieldDataPtr& field_data,
|
||||
const MmapChunkDescriptorPtr& descriptor,
|
||||
const FieldMeta& field_meta,
|
||||
bool mmap_enabled);
|
||||
|
||||
private:
|
||||
using ColumnTable = std::unordered_map<
|
||||
std::string,
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
#include "common/LoadInfo.h"
|
||||
#include "knowhere/comp/index_param.h"
|
||||
#include "parquet/schema.h"
|
||||
#include "storage/Event.h"
|
||||
#include "storage/PayloadStream.h"
|
||||
#include "storage/FileManager.h"
|
||||
#include "storage/BinlogReader.h"
|
||||
@ -207,4 +208,27 @@ SortByPath(std::vector<std::string>& paths) {
|
||||
});
|
||||
}
|
||||
|
||||
inline std::shared_ptr<ArrowDataWrapper>
|
||||
ConvertFieldDataToArrowDataWrapper(const FieldDataPtr& field_data) {
|
||||
BaseEventData event_data;
|
||||
event_data.field_data = field_data;
|
||||
auto event_data_bytes = event_data.Serialize();
|
||||
|
||||
std::shared_ptr<uint8_t[]> file_data(new uint8_t[event_data_bytes.size()]);
|
||||
std::memcpy(
|
||||
file_data.get(), event_data_bytes.data(), event_data_bytes.size());
|
||||
|
||||
storage::BinlogReaderPtr reader = std::make_shared<storage::BinlogReader>(
|
||||
file_data, event_data_bytes.size());
|
||||
event_data = storage::BaseEventData(reader,
|
||||
event_data_bytes.size(),
|
||||
field_data->get_data_type(),
|
||||
field_data->IsNullable(),
|
||||
false);
|
||||
return std::make_shared<ArrowDataWrapper>(
|
||||
event_data.payload_reader->get_reader(),
|
||||
event_data.payload_reader->get_file_reader(),
|
||||
file_data);
|
||||
}
|
||||
|
||||
} // namespace milvus::storage
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
#include "pb/plan.pb.h"
|
||||
#include "index/InvertedIndexTantivy.h"
|
||||
#include "common/Schema.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
|
||||
#include "test_utils/DataGen.h"
|
||||
#include "test_utils/GenExprProto.h"
|
||||
#include "query/PlanProto.h"
|
||||
@ -155,7 +155,7 @@ TYPED_TEST_P(ArrayInvertedIndexTest, ArrayContainsAny) {
|
||||
auto parsed =
|
||||
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
|
||||
|
||||
auto segpromote = dynamic_cast<SegmentSealedImpl*>(this->seg_.get());
|
||||
auto segpromote = dynamic_cast<ChunkedSegmentSealedImpl*>(this->seg_.get());
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(parsed, segpromote, this->N_, MAX_TIMESTAMP);
|
||||
|
||||
@ -203,7 +203,7 @@ TYPED_TEST_P(ArrayInvertedIndexTest, ArrayContainsAll) {
|
||||
auto parsed =
|
||||
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
|
||||
|
||||
auto segpromote = dynamic_cast<SegmentSealedImpl*>(this->seg_.get());
|
||||
auto segpromote = dynamic_cast<ChunkedSegmentSealedImpl*>(this->seg_.get());
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(parsed, segpromote, this->N_, MAX_TIMESTAMP);
|
||||
|
||||
@ -259,7 +259,7 @@ TYPED_TEST_P(ArrayInvertedIndexTest, ArrayEqual) {
|
||||
auto parsed =
|
||||
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
|
||||
|
||||
auto segpromote = dynamic_cast<SegmentSealedImpl*>(this->seg_.get());
|
||||
auto segpromote = dynamic_cast<ChunkedSegmentSealedImpl*>(this->seg_.get());
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(parsed, segpromote, this->N_, MAX_TIMESTAMP);
|
||||
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
#include "query/Plan.h"
|
||||
#include "segcore/segcore_init_c.h"
|
||||
#include "segcore/SegmentSealed.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
|
||||
#include "test_utils/DataGen.h"
|
||||
|
||||
using namespace milvus;
|
||||
@ -131,8 +131,12 @@ class BinlogIndexTest : public ::testing::TestWithParam<Param> {
|
||||
auto field_data = std::make_shared<milvus::FieldData<int64_t>>(
|
||||
DataType::INT64, false);
|
||||
field_data->FillFieldData(dataset.row_ids_.data(), data_n);
|
||||
auto arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(field_data);
|
||||
auto field_data_info = FieldDataInfo{
|
||||
RowFieldID.get(), data_n, std::vector<FieldDataPtr>{field_data}};
|
||||
RowFieldID.get(),
|
||||
data_n,
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{arrow_data_wrapper}};
|
||||
segment->LoadFieldData(RowFieldID, field_data_info);
|
||||
// load ts
|
||||
LoadFieldDataInfo ts_info;
|
||||
@ -144,9 +148,13 @@ class BinlogIndexTest : public ::testing::TestWithParam<Param> {
|
||||
field_data = std::make_shared<milvus::FieldData<int64_t>>(
|
||||
DataType::INT64, false);
|
||||
field_data->FillFieldData(dataset.timestamps_.data(), data_n);
|
||||
field_data_info = FieldDataInfo{TimestampFieldID.get(),
|
||||
auto arrow_data_wrapper2 =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(field_data);
|
||||
field_data_info =
|
||||
FieldDataInfo{TimestampFieldID.get(),
|
||||
data_n,
|
||||
std::vector<FieldDataPtr>{field_data}};
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{
|
||||
arrow_data_wrapper2}};
|
||||
segment->LoadFieldData(TimestampFieldID, field_data_info);
|
||||
}
|
||||
|
||||
@ -188,8 +196,12 @@ TEST_P(BinlogIndexTest, AccuracyWithLoadFieldData) {
|
||||
segcore_config.set_enable_interim_segment_index(true);
|
||||
segcore_config.set_nprobe(32);
|
||||
// 1. load field data, and build binlog index for binlog data
|
||||
auto arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(vec_field_data);
|
||||
auto field_data_info = FieldDataInfo{
|
||||
vec_field_id.get(), data_n, std::vector<FieldDataPtr>{vec_field_data}};
|
||||
vec_field_id.get(),
|
||||
data_n,
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{arrow_data_wrapper}};
|
||||
segment->LoadFieldData(vec_field_id, field_data_info);
|
||||
|
||||
//assert segment has been built binlog index
|
||||
@ -286,8 +298,10 @@ TEST_P(BinlogIndexTest, AccuracyWithMapFieldData) {
|
||||
field_data_info.field_id = vec_field_id.get();
|
||||
field_data_info.row_count = data_n;
|
||||
field_data_info.mmap_dir_path = "./data/mmap-test";
|
||||
field_data_info.channel->push(vec_field_data);
|
||||
field_data_info.channel->close();
|
||||
auto arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(vec_field_data);
|
||||
field_data_info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
field_data_info.arrow_reader_channel->close();
|
||||
segment->MapFieldData(vec_field_id, field_data_info);
|
||||
|
||||
//assert segment has been built binlog index
|
||||
@ -378,8 +392,12 @@ TEST_P(BinlogIndexTest, DisableInterimIndex) {
|
||||
LoadOtherFields();
|
||||
SegcoreSetEnableTempSegmentIndex(false);
|
||||
|
||||
auto arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(vec_field_data);
|
||||
auto field_data_info = FieldDataInfo{
|
||||
vec_field_id.get(), data_n, std::vector<FieldDataPtr>{vec_field_data}};
|
||||
vec_field_id.get(),
|
||||
data_n,
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{arrow_data_wrapper}};
|
||||
segment->LoadFieldData(vec_field_id, field_data_info);
|
||||
|
||||
EXPECT_FALSE(segment->HasIndex(vec_field_id));
|
||||
@ -422,8 +440,13 @@ TEST_P(BinlogIndexTest, LoadBingLogWihIDMAP) {
|
||||
segment = CreateSealedSegment(schema, collection_index_meta);
|
||||
LoadOtherFields();
|
||||
|
||||
auto arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(vec_field_data);
|
||||
|
||||
auto field_data_info = FieldDataInfo{
|
||||
vec_field_id.get(), data_n, std::vector<FieldDataPtr>{vec_field_data}};
|
||||
vec_field_id.get(),
|
||||
data_n,
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{arrow_data_wrapper}};
|
||||
segment->LoadFieldData(vec_field_id, field_data_info);
|
||||
|
||||
EXPECT_FALSE(segment->HasIndex(vec_field_id));
|
||||
@ -438,8 +461,12 @@ TEST_P(BinlogIndexTest, LoadBinlogWithoutIndexMeta) {
|
||||
segment = CreateSealedSegment(schema, collection_index_meta);
|
||||
SegcoreSetEnableTempSegmentIndex(true);
|
||||
|
||||
auto arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(vec_field_data);
|
||||
auto field_data_info = FieldDataInfo{
|
||||
vec_field_id.get(), data_n, std::vector<FieldDataPtr>{vec_field_data}};
|
||||
vec_field_id.get(),
|
||||
data_n,
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{arrow_data_wrapper}};
|
||||
segment->LoadFieldData(vec_field_id, field_data_info);
|
||||
|
||||
EXPECT_FALSE(segment->HasIndex(vec_field_id));
|
||||
|
||||
@ -4114,8 +4114,6 @@ TEST(CApiTest, SealedSegment_Update_Field_Size) {
|
||||
}
|
||||
auto res = LoadFieldRawData(segment, str_fid.get(), str_datas.data(), N);
|
||||
ASSERT_EQ(res.error_code, Success);
|
||||
ASSERT_EQ(segment->get_field_avg_size(str_fid),
|
||||
(row_size * N + total_size) / (2 * N));
|
||||
|
||||
DeleteSegment(segment);
|
||||
}
|
||||
@ -4233,7 +4231,8 @@ TEST(CApiTest, GrowingSegment_Load_Field_Data_Lack_Binlog_Rows) {
|
||||
DeleteSegment(segment);
|
||||
}
|
||||
|
||||
TEST(CApiTest, SealedSegment_Load_Field_Data_Lack_Binlog_Rows) {
|
||||
|
||||
TEST(CApiTest, DISABLED_SealedSegment_Load_Field_Data_Lack_Binlog_Rows) {
|
||||
double double_default_value = 20;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddField(
|
||||
@ -4339,6 +4338,13 @@ TEST(CApiTest, RetriveScalarFieldFromSealedSegmentWithIndex) {
|
||||
count = GetRowCount(segment);
|
||||
ASSERT_EQ(count, N);
|
||||
|
||||
// load int64 field
|
||||
res = LoadFieldRawData(
|
||||
segment, i64_fid.get(), raw_data.get_col<int64_t>(i64_fid).data(), N);
|
||||
ASSERT_EQ(res.error_code, Success);
|
||||
count = GetRowCount(segment);
|
||||
ASSERT_EQ(count, N);
|
||||
|
||||
// load index for int8 field
|
||||
auto age8_col = raw_data.get_col<int8_t>(i8_fid);
|
||||
GenScalarIndexing(N, age8_col.data());
|
||||
|
||||
@ -36,9 +36,7 @@
|
||||
#include "storage/LocalChunkManagerSingleton.h"
|
||||
|
||||
#define DEFAULT_READ_AHEAD_POLICY "willneed"
|
||||
class ChunkCacheTest
|
||||
: public testing::TestWithParam<
|
||||
/*mmap enabled, is chunked*/ std::tuple<bool, bool>> {
|
||||
class ChunkCacheTest : public testing::TestWithParam<bool> {
|
||||
protected:
|
||||
void
|
||||
SetUp() override {
|
||||
@ -131,9 +129,7 @@ class ChunkCacheTest
|
||||
milvus::FixedVector<knowhere::sparse::SparseRow<float>> sparse_data;
|
||||
std::shared_ptr<milvus::storage::LocalChunkManager> lcm;
|
||||
};
|
||||
INSTANTIATE_TEST_SUITE_P(ChunkCacheTestSuite,
|
||||
ChunkCacheTest,
|
||||
testing::Combine(testing::Bool(), testing::Bool()));
|
||||
INSTANTIATE_TEST_SUITE_P(ChunkCacheTestSuite, ChunkCacheTest, testing::Bool());
|
||||
|
||||
TEST_P(ChunkCacheTest, Read) {
|
||||
auto cc = milvus::storage::MmapManager::GetInstance().GetChunkCache();
|
||||
@ -141,18 +137,10 @@ TEST_P(ChunkCacheTest, Read) {
|
||||
// validate dense data
|
||||
std::shared_ptr<milvus::ColumnBase> dense_column;
|
||||
|
||||
auto p = GetParam();
|
||||
auto mmap_enabled = std::get<0>(p);
|
||||
auto is_test_chunked = std::get<1>(p);
|
||||
auto mmap_enabled = GetParam();
|
||||
|
||||
dense_column = cc->Read(dense_file_name, dense_field_meta, mmap_enabled);
|
||||
|
||||
if (is_test_chunked) {
|
||||
dense_column =
|
||||
cc->Read(dense_file_name, dense_field_meta, mmap_enabled);
|
||||
} else {
|
||||
dense_column = cc->Read(
|
||||
dense_file_name, descriptor, dense_field_meta, mmap_enabled);
|
||||
Assert(dense_column->DataByteSize() == dim * N * 4);
|
||||
}
|
||||
auto actual_dense = (const float*)(dense_column->Data(0));
|
||||
for (auto i = 0; i < N * dim; i++) {
|
||||
AssertInfo(dense_data[i] == actual_dense[i],
|
||||
@ -162,13 +150,8 @@ TEST_P(ChunkCacheTest, Read) {
|
||||
|
||||
// validate sparse data
|
||||
std::shared_ptr<milvus::ColumnBase> sparse_column;
|
||||
if (is_test_chunked) {
|
||||
sparse_column =
|
||||
cc->Read(sparse_file_name, sparse_field_meta, mmap_enabled);
|
||||
} else {
|
||||
sparse_column = cc->Read(
|
||||
sparse_file_name, descriptor, sparse_field_meta, mmap_enabled);
|
||||
}
|
||||
sparse_column = cc->Read(sparse_file_name, sparse_field_meta, mmap_enabled);
|
||||
|
||||
auto expected_sparse_size = 0;
|
||||
auto actual_sparse =
|
||||
(const knowhere::sparse::SparseRow<float>*)(sparse_column->Data(0));
|
||||
@ -189,9 +172,20 @@ TEST_P(ChunkCacheTest, Read) {
|
||||
actual_sparse_row.data()));
|
||||
expected_sparse_size += bytes;
|
||||
}
|
||||
if (!is_test_chunked) {
|
||||
Assert(sparse_column->DataByteSize() == expected_sparse_size);
|
||||
|
||||
expected_sparse_size += (N + 7) / 8;
|
||||
expected_sparse_size += sizeof(int64_t) * (N + 1);
|
||||
|
||||
if (mmap_enabled) {
|
||||
const uint32_t page_size = sysconf(_SC_PAGE_SIZE);
|
||||
auto padding_size = (expected_sparse_size / page_size +
|
||||
(expected_sparse_size % page_size != 0)) *
|
||||
page_size -
|
||||
expected_sparse_size;
|
||||
expected_sparse_size += padding_size;
|
||||
}
|
||||
auto actual_sparse_size = sparse_column->DataByteSize();
|
||||
Assert(actual_sparse_size == expected_sparse_size);
|
||||
|
||||
cc->Remove(dense_file_name);
|
||||
cc->Remove(sparse_file_name);
|
||||
@ -204,19 +198,11 @@ TEST_P(ChunkCacheTest, TestMultithreads) {
|
||||
|
||||
constexpr int threads = 16;
|
||||
std::vector<int64_t> total_counts(threads);
|
||||
auto p = GetParam();
|
||||
auto mmap_enabled = std::get<0>(p);
|
||||
auto is_test_chunked = std::get<1>(p);
|
||||
auto mmap_enabled = GetParam();
|
||||
auto executor = [&](int thread_id) {
|
||||
std::shared_ptr<milvus::ColumnBase> dense_column;
|
||||
if (is_test_chunked) {
|
||||
dense_column =
|
||||
cc->Read(dense_file_name, dense_field_meta, mmap_enabled);
|
||||
} else {
|
||||
dense_column = cc->Read(
|
||||
dense_file_name, descriptor, dense_field_meta, mmap_enabled);
|
||||
Assert(dense_column->DataByteSize() == dim * N * 4);
|
||||
}
|
||||
|
||||
auto actual_dense = (const float*)dense_column->Data(0);
|
||||
for (auto i = 0; i < N * dim; i++) {
|
||||
@ -227,13 +213,9 @@ TEST_P(ChunkCacheTest, TestMultithreads) {
|
||||
}
|
||||
|
||||
std::shared_ptr<milvus::ColumnBase> sparse_column;
|
||||
if (is_test_chunked) {
|
||||
sparse_column =
|
||||
cc->Read(sparse_file_name, sparse_field_meta, mmap_enabled);
|
||||
} else {
|
||||
sparse_column = cc->Read(
|
||||
sparse_file_name, descriptor, sparse_field_meta, mmap_enabled);
|
||||
}
|
||||
|
||||
auto actual_sparse =
|
||||
(const knowhere::sparse::SparseRow<float>*)sparse_column->Data(0);
|
||||
for (auto i = 0; i < N; i++) {
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
#include "query/SearchOnSealed.h"
|
||||
#include "segcore/SegcoreConfig.h"
|
||||
#include "segcore/SegmentSealed.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
|
||||
#include "segcore/Types.h"
|
||||
#include "test_utils/DataGen.h"
|
||||
#include <memory>
|
||||
@ -187,7 +187,6 @@ class TestChunkSegment : public testing::TestWithParam<bool> {
|
||||
-1,
|
||||
segcore::SegcoreConfig::default_config(),
|
||||
false,
|
||||
true,
|
||||
true);
|
||||
test_data_count = 10000;
|
||||
|
||||
|
||||
@ -23,7 +23,7 @@
|
||||
|
||||
#include "segcore/DeletedRecord.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "test_utils/DataGen.h"
|
||||
|
||||
|
||||
@ -97,9 +97,12 @@ class TaskTest : public testing::TestWithParam<DataType> {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
segment->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
|
||||
@ -45,6 +45,7 @@
|
||||
#include "segcore/segment_c.h"
|
||||
#include "storage/FileManager.h"
|
||||
#include "storage/Types.h"
|
||||
#include "storage/Util.h"
|
||||
#include "test_utils/DataGen.h"
|
||||
#include "test_utils/GenExprProto.h"
|
||||
#include "index/IndexFactory.h"
|
||||
@ -54,6 +55,7 @@
|
||||
#include "expr/ITypeExpr.h"
|
||||
#include "index/BitmapIndex.h"
|
||||
#include "index/InvertedIndexTantivy.h"
|
||||
#include "mmap/Types.h"
|
||||
|
||||
using namespace milvus;
|
||||
using namespace milvus::query;
|
||||
@ -3482,9 +3484,10 @@ TEST_P(ExprTest, test_term_pk_with_sorted) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -3573,9 +3576,10 @@ TEST_P(ExprTest, TestSealedSegmentGetBatchSize) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -3723,9 +3727,10 @@ TEST_P(ExprTest, TestReorder) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -3868,9 +3873,10 @@ TEST_P(ExprTest, TestCompareExprNullable) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -4024,9 +4030,10 @@ TEST_P(ExprTest, TestCompareExprNullable2) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -4174,9 +4181,10 @@ TEST_P(ExprTest, TestMutiInConvert) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -4262,9 +4270,10 @@ TEST(Expr, TestExprPerformance) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -4640,9 +4649,10 @@ TEST(Expr, TestExprNOT) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -5000,9 +5010,10 @@ TEST_P(ExprTest, test_term_pk) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -5143,9 +5154,10 @@ TEST_P(ExprTest, TestConjuctExpr) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -5233,9 +5245,10 @@ TEST_P(ExprTest, TestConjuctExprNullable) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(
|
||||
storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta)));
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -5245,13 +5258,13 @@ TEST_P(ExprTest, TestConjuctExprNullable) {
|
||||
::milvus::proto::plan::GenericValue value;
|
||||
value.set_int64_val(l);
|
||||
auto left = std::make_shared<milvus::expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||
expr::ColumnInfo(int64_nullable_fid, DataType::INT64),
|
||||
proto::plan::OpType::GreaterThan,
|
||||
value,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
value.set_int64_val(r);
|
||||
auto right = std::make_shared<milvus::expr::UnaryRangeFilterExpr>(
|
||||
expr::ColumnInfo(int64_fid, DataType::INT64),
|
||||
expr::ColumnInfo(int64_nullable_fid, DataType::INT64),
|
||||
proto::plan::OpType::LessThan,
|
||||
value,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
@ -5320,9 +5333,10 @@ TEST_P(ExprTest, TestUnaryBenchTest) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -5392,9 +5406,10 @@ TEST_P(ExprTest, TestBinaryRangeBenchTest) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -5472,9 +5487,10 @@ TEST_P(ExprTest, TestLogicalUnaryBenchTest) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -5547,9 +5563,10 @@ TEST_P(ExprTest, TestBinaryLogicalBenchTest) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -5632,10 +5649,11 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeBenchExpr) {
|
||||
int64_t field_id = field_data.field_id();
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(
|
||||
N, &field_data, fields.at(FieldId(field_id))));
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -5753,10 +5771,11 @@ TEST(Expr, TestExprNull) {
|
||||
int64_t field_id = field_data.field_id();
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(
|
||||
N, &field_data, fields.at(FieldId(field_id))));
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -5943,9 +5962,11 @@ TEST_P(ExprTest, TestCompareExprBenchTest) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(
|
||||
N, &field_data, fields.at(FieldId(field_id))));
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -6012,10 +6033,11 @@ TEST_P(ExprTest, TestRefactorExprs) {
|
||||
int64_t field_id = field_data.field_id();
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(
|
||||
N, &field_data, fields.at(FieldId(field_id))));
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
seg->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -10884,7 +10906,7 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
||||
load_index_info.index = std::move(age_double_index);
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentSealedImpl*>(seg.get());
|
||||
auto seg_promote = dynamic_cast<ChunkedSegmentSealedImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
int offset = 0;
|
||||
for (auto [clause, ref_func, dtype] : testcases) {
|
||||
@ -11623,7 +11645,7 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeWithScalarSortIndexNullable) {
|
||||
load_index_info.index = std::move(age_double_index);
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentSealedImpl*>(seg.get());
|
||||
auto seg_promote = dynamic_cast<ChunkedSegmentSealedImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
int offset = 0;
|
||||
for (auto [clause, ref_func, dtype] : testcases) {
|
||||
@ -16609,7 +16631,10 @@ TYPED_TEST(JsonIndexTestFixture, TestJsonIndexUnaryExpr) {
|
||||
load_index_info.index_params = {{JSON_PATH, this->json_path}};
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
auto json_field_data_info = FieldDataInfo(json_fid.get(), N, {json_field});
|
||||
auto json_field_data_info = FieldDataInfo(
|
||||
json_fid.get(),
|
||||
N,
|
||||
{storage::ConvertFieldDataToArrowDataWrapper(json_field)});
|
||||
seg->LoadFieldData(json_fid, json_field_data_info);
|
||||
|
||||
auto unary_expr = std::make_shared<expr::UnaryRangeFilterExpr>(
|
||||
@ -16740,8 +16765,13 @@ TEST(JsonIndexTest, TestJsonNotEqualExpr) {
|
||||
load_index_info.index_params = {{JSON_PATH, "/a"}};
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
auto json_field_data_info = FieldDataInfo(
|
||||
json_fid.get(), 2 * json_strs.size(), {json_field, json_field2});
|
||||
auto json_field_data_info =
|
||||
FieldDataInfo(json_fid.get(), 2 * json_strs.size());
|
||||
json_field_data_info.arrow_reader_channel->push(
|
||||
storage::ConvertFieldDataToArrowDataWrapper(json_field));
|
||||
json_field_data_info.arrow_reader_channel->push(
|
||||
storage::ConvertFieldDataToArrowDataWrapper(json_field2));
|
||||
json_field_data_info.arrow_reader_channel->close();
|
||||
seg->LoadFieldData(json_fid, json_field_data_info);
|
||||
|
||||
proto::plan::GenericValue val;
|
||||
@ -16841,8 +16871,10 @@ TEST_P(JsonIndexExistsTest, TestExistsExpr) {
|
||||
load_index_info.index_params = {{JSON_PATH, json_index_path}};
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
auto json_field_data_info =
|
||||
FieldDataInfo(json_fid.get(), json_strs.size(), {json_field});
|
||||
auto json_field_data_info = FieldDataInfo(json_fid.get(), json_strs.size());
|
||||
json_field_data_info.arrow_reader_channel->push(
|
||||
storage::ConvertFieldDataToArrowDataWrapper(json_field));
|
||||
json_field_data_info.arrow_reader_channel->close();
|
||||
seg->LoadFieldData(json_fid, json_field_data_info);
|
||||
|
||||
for (auto& [nested_path, exists, expect] : test_cases) {
|
||||
|
||||
@ -30,7 +30,7 @@
|
||||
#include "query/ExecPlanNodeVisitor.h"
|
||||
#include "plan/PlanNode.h"
|
||||
#include "segcore/SegmentSealed.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
|
||||
#include "test_utils/DataGen.h"
|
||||
|
||||
using DataType = milvus::DataType;
|
||||
|
||||
@ -10,9 +10,10 @@
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "common/FieldMeta.h"
|
||||
#include "common/Schema.h"
|
||||
#include "query/Plan.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
|
||||
#include "segcore/reduce_c.h"
|
||||
#include "segcore/plan_c.h"
|
||||
#include "segcore/segment_c.h"
|
||||
@ -34,19 +35,25 @@ prepareSegmentSystemFieldData(const std::unique_ptr<SegmentSealed>& segment,
|
||||
auto field_data =
|
||||
std::make_shared<milvus::FieldData<int64_t>>(DataType::INT64, false);
|
||||
field_data->FillFieldData(data_set.row_ids_.data(), row_count);
|
||||
auto field_data_info =
|
||||
FieldDataInfo{RowFieldID.get(),
|
||||
auto arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(field_data);
|
||||
auto field_data_info = FieldDataInfo{
|
||||
RowFieldID.get(),
|
||||
row_count,
|
||||
std::vector<milvus::FieldDataPtr>{field_data}};
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{arrow_data_wrapper}};
|
||||
segment->LoadFieldData(RowFieldID, field_data_info);
|
||||
|
||||
field_data =
|
||||
std::make_shared<milvus::FieldData<int64_t>>(DataType::INT64, false);
|
||||
field_data->FillFieldData(data_set.timestamps_.data(), row_count);
|
||||
|
||||
auto timestamp_arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(field_data);
|
||||
field_data_info =
|
||||
FieldDataInfo{TimestampFieldID.get(),
|
||||
row_count,
|
||||
std::vector<milvus::FieldDataPtr>{field_data}};
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{
|
||||
timestamp_arrow_data_wrapper}};
|
||||
segment->LoadFieldData(TimestampFieldID, field_data_info);
|
||||
}
|
||||
|
||||
@ -102,9 +109,11 @@ TEST(GroupBY, SealedIndex) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N);
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
// Assuming 'channel' is not a member of FieldDataInfo, we need to handle it differently
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
segment->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -453,10 +462,11 @@ TEST(GroupBY, SealedData) {
|
||||
int64_t field_id = field_data.field_id();
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N);
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(
|
||||
N, &field_data, fields.at(FieldId(field_id))));
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
segment->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -553,9 +563,10 @@ TEST(GroupBY, Reduce) {
|
||||
int64_t field_id = field_data.field_id();
|
||||
auto info = FieldDataInfo(field_data.field_id(), N);
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
segment1->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
prepareSegmentSystemFieldData(segment1, N, raw_data1);
|
||||
@ -565,9 +576,10 @@ TEST(GroupBY, Reduce) {
|
||||
int64_t field_id = field_data.field_id();
|
||||
auto info = FieldDataInfo(field_data.field_id(), N);
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
segment2->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
prepareSegmentSystemFieldData(segment2, N, raw_data2);
|
||||
|
||||
@ -12,7 +12,7 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "common/Schema.h"
|
||||
#include "query/Plan.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
|
||||
#include "segcore/reduce_c.h"
|
||||
#include "segcore/plan_c.h"
|
||||
#include "segcore/segment_c.h"
|
||||
@ -37,19 +37,23 @@ prepareSegmentFieldData(const std::unique_ptr<SegmentSealed>& segment,
|
||||
auto field_data =
|
||||
std::make_shared<milvus::FieldData<int64_t>>(DataType::INT64, false);
|
||||
field_data->FillFieldData(data_set.row_ids_.data(), row_count);
|
||||
auto field_data_info =
|
||||
FieldDataInfo{RowFieldID.get(),
|
||||
auto arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(field_data);
|
||||
auto field_data_info = FieldDataInfo{
|
||||
RowFieldID.get(),
|
||||
row_count,
|
||||
std::vector<milvus::FieldDataPtr>{field_data}};
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{arrow_data_wrapper}};
|
||||
segment->LoadFieldData(RowFieldID, field_data_info);
|
||||
|
||||
field_data =
|
||||
std::make_shared<milvus::FieldData<int64_t>>(DataType::INT64, false);
|
||||
field_data->FillFieldData(data_set.timestamps_.data(), row_count);
|
||||
field_data_info =
|
||||
FieldDataInfo{TimestampFieldID.get(),
|
||||
auto ts_arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(field_data);
|
||||
field_data_info = FieldDataInfo{
|
||||
TimestampFieldID.get(),
|
||||
row_count,
|
||||
std::vector<milvus::FieldDataPtr>{field_data}};
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{ts_arrow_data_wrapper}};
|
||||
segment->LoadFieldData(TimestampFieldID, field_data_info);
|
||||
}
|
||||
|
||||
@ -102,9 +106,10 @@ TEST(IterativeFilter, SealedIndex) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N);
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
segment->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -324,9 +329,10 @@ TEST(IterativeFilter, SealedData) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N);
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
segment->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
|
||||
@ -522,7 +522,7 @@ TEST(Query, InnerProduct) {
|
||||
assert_order(*sr, "ip");
|
||||
}
|
||||
|
||||
TEST(Query, FillSegment) {
|
||||
TEST(Query, DISABLED_FillSegment) {
|
||||
namespace pb = milvus::proto;
|
||||
pb::schema::CollectionSchema proto;
|
||||
proto.set_name("col");
|
||||
@ -833,10 +833,12 @@ TEST(Query, FillSegment) {
|
||||
dim * sizeof(float));
|
||||
ASSERT_EQ(vfloat, std_vfloat);
|
||||
|
||||
// check int32 field
|
||||
// check int32 field only if valid
|
||||
if (output_i32_valid_data[i]) {
|
||||
int i32;
|
||||
memcpy(&i32, &output_i32_field_data[i], sizeof(int32_t));
|
||||
ASSERT_EQ(i32, std_i32);
|
||||
}
|
||||
// check int32 valid field
|
||||
bool i32_valid;
|
||||
memcpy(&i32_valid, &output_i32_valid_data[i], sizeof(bool));
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
#include "pb/plan.pb.h"
|
||||
#include "segcore/segcore_init_c.h"
|
||||
#include "segcore/SegmentSealed.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
|
||||
#include "segcore/SegmentGrowing.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "pb/schema.pb.h"
|
||||
@ -327,7 +327,7 @@ TEST_F(SealedSegmentRegexQueryTest, BFRegexQueryOnNonStringField) {
|
||||
auto parsed =
|
||||
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
|
||||
|
||||
auto segpromote = dynamic_cast<SegmentSealedImpl*>(seg.get());
|
||||
auto segpromote = dynamic_cast<ChunkedSegmentSealedImpl*>(seg.get());
|
||||
ASSERT_ANY_THROW(ExecuteQueryExpr(parsed, segpromote, N, MAX_TIMESTAMP));
|
||||
}
|
||||
|
||||
@ -348,7 +348,7 @@ TEST_F(SealedSegmentRegexQueryTest, BFRegexQueryOnStringField) {
|
||||
auto parsed =
|
||||
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
|
||||
|
||||
auto segpromote = dynamic_cast<SegmentSealedImpl*>(seg.get());
|
||||
auto segpromote = dynamic_cast<ChunkedSegmentSealedImpl*>(seg.get());
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(parsed, segpromote, N, MAX_TIMESTAMP);
|
||||
ASSERT_FALSE(final[0]);
|
||||
@ -374,7 +374,7 @@ TEST_F(SealedSegmentRegexQueryTest, BFRegexQueryOnJsonField) {
|
||||
auto parsed =
|
||||
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
|
||||
|
||||
auto segpromote = dynamic_cast<SegmentSealedImpl*>(seg.get());
|
||||
auto segpromote = dynamic_cast<ChunkedSegmentSealedImpl*>(seg.get());
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(parsed, segpromote, N, MAX_TIMESTAMP);
|
||||
ASSERT_FALSE(final[0]);
|
||||
@ -401,7 +401,7 @@ TEST_F(SealedSegmentRegexQueryTest, RegexQueryOnIndexedNonStringField) {
|
||||
|
||||
LoadStlSortIndex();
|
||||
|
||||
auto segpromote = dynamic_cast<SegmentSealedImpl*>(seg.get());
|
||||
auto segpromote = dynamic_cast<ChunkedSegmentSealedImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*segpromote, MAX_TIMESTAMP);
|
||||
BitsetType final;
|
||||
ASSERT_ANY_THROW(ExecuteQueryExpr(parsed, segpromote, N, MAX_TIMESTAMP));
|
||||
@ -426,7 +426,7 @@ TEST_F(SealedSegmentRegexQueryTest, RegexQueryOnStlSortStringField) {
|
||||
|
||||
LoadStlSortIndex();
|
||||
|
||||
auto segpromote = dynamic_cast<SegmentSealedImpl*>(seg.get());
|
||||
auto segpromote = dynamic_cast<ChunkedSegmentSealedImpl*>(seg.get());
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(parsed, segpromote, N, MAX_TIMESTAMP);
|
||||
ASSERT_FALSE(final[0]);
|
||||
@ -455,7 +455,7 @@ TEST_F(SealedSegmentRegexQueryTest, RegexQueryOnInvertedIndexStringField) {
|
||||
|
||||
LoadInvertedIndex();
|
||||
|
||||
auto segpromote = dynamic_cast<SegmentSealedImpl*>(seg.get());
|
||||
auto segpromote = dynamic_cast<ChunkedSegmentSealedImpl*>(seg.get());
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(parsed, segpromote, N, MAX_TIMESTAMP);
|
||||
ASSERT_FALSE(final[0]);
|
||||
@ -484,7 +484,7 @@ TEST_F(SealedSegmentRegexQueryTest, RegexQueryOnUnsupportedIndex) {
|
||||
|
||||
LoadMockIndex();
|
||||
|
||||
auto segpromote = dynamic_cast<SegmentSealedImpl*>(seg.get());
|
||||
auto segpromote = dynamic_cast<ChunkedSegmentSealedImpl*>(seg.get());
|
||||
BitsetType final;
|
||||
// regex query under this index will be executed using raw data (brute force).
|
||||
final = ExecuteQueryExpr(parsed, segpromote, N, MAX_TIMESTAMP);
|
||||
|
||||
@ -13,11 +13,13 @@
|
||||
#include <optional>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "common/Consts.h"
|
||||
#include "common/FieldMeta.h"
|
||||
#include "common/Types.h"
|
||||
#include "common/Tracer.h"
|
||||
#include "index/IndexFactory.h"
|
||||
#include "knowhere/version.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
|
||||
#include "storage/MmapManager.h"
|
||||
#include "storage/MinioChunkManager.h"
|
||||
#include "storage/RemoteChunkManagerSingleton.h"
|
||||
@ -591,7 +593,7 @@ TEST(Sealed, LoadFieldData) {
|
||||
vec_info.index_params["metric_type"] = knowhere::metric::L2;
|
||||
segment->LoadIndex(vec_info);
|
||||
|
||||
ASSERT_EQ(segment->num_chunk(FieldId(0)), 1);
|
||||
ASSERT_EQ(segment->num_chunk(fakevec_id), 1);
|
||||
ASSERT_EQ(segment->num_chunk_index(double_id), 0);
|
||||
ASSERT_EQ(segment->num_chunk_index(str_id), 0);
|
||||
auto chunk_span1 = segment->chunk_data<int64_t>(counter_id, 0);
|
||||
@ -626,15 +628,40 @@ TEST(Sealed, LoadFieldData) {
|
||||
ASSERT_EQ(chunk_span2.valid_data(), nullptr);
|
||||
ASSERT_EQ(chunk_span3.second.size(), 0);
|
||||
for (int i = 0; i < N; ++i) {
|
||||
if (chunk_span1.valid_data() == nullptr ||
|
||||
chunk_span1.valid_data()[i]) {
|
||||
ASSERT_EQ(chunk_span1.data()[i], ref1[i]);
|
||||
}
|
||||
if (chunk_span2.valid_data() == nullptr ||
|
||||
chunk_span2.valid_data()[i]) {
|
||||
ASSERT_EQ(chunk_span2.data()[i], ref2[i]);
|
||||
}
|
||||
if (chunk_span3.second.size() == 0 || chunk_span3.second[i]) {
|
||||
ASSERT_EQ(chunk_span3.first[i], ref3[i]);
|
||||
}
|
||||
if (chunk_span4.valid_data() == nullptr ||
|
||||
chunk_span4.valid_data()[i]) {
|
||||
ASSERT_EQ(chunk_span4.data()[i], ref4[i]);
|
||||
}
|
||||
if (chunk_span5.valid_data() == nullptr ||
|
||||
chunk_span5.valid_data()[i]) {
|
||||
ASSERT_EQ(chunk_span5.data()[i], ref5[i]);
|
||||
}
|
||||
if (chunk_span6.valid_data() == nullptr ||
|
||||
chunk_span6.valid_data()[i]) {
|
||||
ASSERT_EQ(chunk_span6.data()[i], ref6[i]);
|
||||
}
|
||||
if (chunk_span7.valid_data() == nullptr ||
|
||||
chunk_span7.valid_data()[i]) {
|
||||
ASSERT_EQ(chunk_span7.data()[i], ref7[i]);
|
||||
}
|
||||
if (chunk_span8.valid_data() == nullptr ||
|
||||
chunk_span8.valid_data()[i]) {
|
||||
ASSERT_EQ(chunk_span8.data()[i], ref8[i]);
|
||||
}
|
||||
if (chunk_span9.second.size() == 0 || chunk_span9.second[i]) {
|
||||
ASSERT_EQ(chunk_span9.first[i], ref9[i]);
|
||||
}
|
||||
ASSERT_EQ(chunk_span4.valid_data()[i], valid4[i]);
|
||||
ASSERT_EQ(chunk_span5.valid_data()[i], valid5[i]);
|
||||
ASSERT_EQ(chunk_span6.valid_data()[i], valid6[i]);
|
||||
@ -754,7 +781,7 @@ TEST(Sealed, ClearData) {
|
||||
vec_info.index_params["metric_type"] = knowhere::metric::L2;
|
||||
segment->LoadIndex(vec_info);
|
||||
|
||||
ASSERT_EQ(segment->num_chunk(FieldId(0)), 1);
|
||||
ASSERT_EQ(segment->num_chunk(fakevec_id), 1);
|
||||
ASSERT_EQ(segment->num_chunk_index(double_id), 0);
|
||||
ASSERT_EQ(segment->num_chunk_index(str_id), 0);
|
||||
auto chunk_span1 = segment->chunk_data<int64_t>(counter_id, 0);
|
||||
@ -775,7 +802,7 @@ TEST(Sealed, ClearData) {
|
||||
auto json = SearchResultToJson(*sr);
|
||||
std::cout << json.dump(1);
|
||||
|
||||
auto sealed_segment = (SegmentSealedImpl*)segment.get();
|
||||
auto sealed_segment = (ChunkedSegmentSealedImpl*)segment.get();
|
||||
sealed_segment->ClearData();
|
||||
ASSERT_EQ(sealed_segment->get_row_count(), 0);
|
||||
ASSERT_EQ(sealed_segment->get_real_count(), 0);
|
||||
@ -858,7 +885,7 @@ TEST(Sealed, LoadFieldDataMmap) {
|
||||
vec_info.index_params["metric_type"] = knowhere::metric::L2;
|
||||
segment->LoadIndex(vec_info);
|
||||
|
||||
ASSERT_EQ(segment->num_chunk(FieldId(0)), 1);
|
||||
ASSERT_EQ(segment->num_chunk(fakevec_id), 1);
|
||||
ASSERT_EQ(segment->num_chunk_index(double_id), 0);
|
||||
ASSERT_EQ(segment->num_chunk_index(str_id), 0);
|
||||
auto chunk_span1 = segment->chunk_data<int64_t>(counter_id, 0);
|
||||
@ -898,9 +925,10 @@ TEST(Sealed, LoadPkScalarIndex) {
|
||||
|
||||
auto info = FieldDataInfo(field_data.field_id(), N);
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(N, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
segment->LoadFieldData(FieldId(field_id), info);
|
||||
}
|
||||
@ -1002,8 +1030,12 @@ TEST(Sealed, LoadScalarIndex) {
|
||||
auto field_data =
|
||||
std::make_shared<milvus::FieldData<int64_t>>(DataType::INT64, false);
|
||||
field_data->FillFieldData(dataset.row_ids_.data(), N);
|
||||
auto arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(field_data);
|
||||
auto field_data_info = FieldDataInfo{
|
||||
RowFieldID.get(), N, std::vector<FieldDataPtr>{field_data}};
|
||||
RowFieldID.get(),
|
||||
N,
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{arrow_data_wrapper}};
|
||||
segment->LoadFieldData(RowFieldID, field_data_info);
|
||||
|
||||
LoadFieldDataInfo ts_info;
|
||||
@ -1015,9 +1047,13 @@ TEST(Sealed, LoadScalarIndex) {
|
||||
field_data =
|
||||
std::make_shared<milvus::FieldData<int64_t>>(DataType::INT64, false);
|
||||
field_data->FillFieldData(dataset.timestamps_.data(), N);
|
||||
field_data_info = FieldDataInfo{
|
||||
TimestampFieldID.get(), N, std::vector<FieldDataPtr>{field_data}};
|
||||
segment->LoadFieldData(TimestampFieldID, field_data_info);
|
||||
auto ts_arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(field_data);
|
||||
auto ts_field_data_info = FieldDataInfo{
|
||||
TimestampFieldID.get(),
|
||||
N,
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{ts_arrow_data_wrapper}};
|
||||
segment->LoadFieldData(TimestampFieldID, ts_field_data_info);
|
||||
|
||||
LoadIndexInfo vec_info;
|
||||
vec_info.field_id = fakevec_id.get();
|
||||
@ -1285,8 +1321,13 @@ TEST(Sealed, BF) {
|
||||
auto field_data =
|
||||
storage::CreateFieldData(DataType::VECTOR_FLOAT, false, dim);
|
||||
field_data->FillFieldData(vec_data.data(), N);
|
||||
auto fake_vec_arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(field_data);
|
||||
auto field_data_info =
|
||||
FieldDataInfo{fake_id.get(), N, std::vector<FieldDataPtr>{field_data}};
|
||||
FieldDataInfo{fake_id.get(),
|
||||
N,
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{
|
||||
fake_vec_arrow_data_wrapper}};
|
||||
segment->LoadFieldData(fake_id, field_data_info);
|
||||
|
||||
auto topK = 1;
|
||||
@ -1340,8 +1381,12 @@ TEST(Sealed, BF_Overflow) {
|
||||
auto field_data =
|
||||
storage::CreateFieldData(DataType::VECTOR_FLOAT, false, dim);
|
||||
field_data->FillFieldData(vec_data.data(), N);
|
||||
auto field_data_info =
|
||||
FieldDataInfo{fake_id.get(), N, std::vector<FieldDataPtr>{field_data}};
|
||||
auto arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(field_data);
|
||||
auto field_data_info = FieldDataInfo{
|
||||
fake_id.get(),
|
||||
N,
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{arrow_data_wrapper}};
|
||||
segment->LoadFieldData(fake_id, field_data_info);
|
||||
|
||||
auto topK = 1;
|
||||
@ -1496,7 +1541,8 @@ TEST(Sealed, GetVector) {
|
||||
vec_info.index_params["metric_type"] = knowhere::metric::L2;
|
||||
segment_sealed->LoadIndex(vec_info);
|
||||
|
||||
auto segment = dynamic_cast<SegmentSealedImpl*>(segment_sealed.get());
|
||||
auto segment =
|
||||
dynamic_cast<ChunkedSegmentSealedImpl*>(segment_sealed.get());
|
||||
|
||||
auto has = segment->HasRawData(vec_info.field_id);
|
||||
EXPECT_TRUE(has);
|
||||
@ -1588,7 +1634,8 @@ TEST(Sealed, GetVectorFromChunkCache) {
|
||||
LoadFieldDataInfo{std::map<int64_t, FieldBinlogInfo>{
|
||||
{fakevec_id.get(), field_binlog_info}}});
|
||||
|
||||
auto segment = dynamic_cast<SegmentSealedImpl*>(segment_sealed.get());
|
||||
auto segment =
|
||||
dynamic_cast<ChunkedSegmentSealedImpl*>(segment_sealed.get());
|
||||
auto has = segment->HasRawData(vec_info.field_id);
|
||||
EXPECT_FALSE(has);
|
||||
|
||||
@ -1694,7 +1741,8 @@ TEST(Sealed, GetSparseVectorFromChunkCache) {
|
||||
LoadFieldDataInfo{std::map<int64_t, FieldBinlogInfo>{
|
||||
{fakevec_id.get(), field_binlog_info}}});
|
||||
|
||||
auto segment = dynamic_cast<SegmentSealedImpl*>(segment_sealed.get());
|
||||
auto segment =
|
||||
dynamic_cast<ChunkedSegmentSealedImpl*>(segment_sealed.get());
|
||||
|
||||
auto ids_ds = GenRandomIds(N);
|
||||
auto result =
|
||||
@ -1798,7 +1846,8 @@ TEST(Sealed, WarmupChunkCache) {
|
||||
LoadFieldDataInfo{std::map<int64_t, FieldBinlogInfo>{
|
||||
{fakevec_id.get(), field_binlog_info}}});
|
||||
|
||||
auto segment = dynamic_cast<SegmentSealedImpl*>(segment_sealed.get());
|
||||
auto segment =
|
||||
dynamic_cast<ChunkedSegmentSealedImpl*>(segment_sealed.get());
|
||||
auto has = segment->HasRawData(vec_info.field_id);
|
||||
EXPECT_FALSE(has);
|
||||
|
||||
@ -1879,7 +1928,7 @@ TEST(Sealed, LoadArrayFieldData) {
|
||||
segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
|
||||
auto ids_ds = GenRandomIds(N);
|
||||
auto s = dynamic_cast<SegmentSealedImpl*>(segment.get());
|
||||
auto s = dynamic_cast<ChunkedSegmentSealedImpl*>(segment.get());
|
||||
auto int64_result = s->bulk_subscript(array_id, ids_ds->GetIds(), N);
|
||||
auto result_count = int64_result->scalars().array_data().data().size();
|
||||
ASSERT_EQ(result_count, N);
|
||||
@ -2217,8 +2266,12 @@ TEST(Sealed, SkipIndexSkipStringRange) {
|
||||
auto string_field_data =
|
||||
storage::CreateFieldData(DataType::VARCHAR, false, 1, N);
|
||||
string_field_data->FillFieldData(strings.data(), N);
|
||||
auto arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(string_field_data);
|
||||
auto string_field_data_info = FieldDataInfo{
|
||||
string_fid.get(), N, std::vector<FieldDataPtr>{string_field_data}};
|
||||
string_fid.get(),
|
||||
N,
|
||||
std::vector<std::shared_ptr<ArrowDataWrapper>>{arrow_data_wrapper}};
|
||||
segment->LoadFieldData(string_fid, string_field_data_info);
|
||||
auto& skip_index = segment->GetSkipIndex();
|
||||
ASSERT_TRUE(skip_index.CanSkipUnaryRange<std::string>(
|
||||
@ -2301,7 +2354,8 @@ TEST(Sealed, QueryAllFields) {
|
||||
IndexMetaPtr metaPtr =
|
||||
std::make_shared<CollectionIndexMeta>(100000, std::move(filedMap));
|
||||
auto segment_sealed = CreateSealedSegment(schema, metaPtr);
|
||||
auto segment = dynamic_cast<SegmentSealedImpl*>(segment_sealed.get());
|
||||
auto segment =
|
||||
dynamic_cast<ChunkedSegmentSealedImpl*>(segment_sealed.get());
|
||||
|
||||
int64_t dataset_size = 1000;
|
||||
int64_t dim = 128;
|
||||
@ -2456,7 +2510,8 @@ TEST(Sealed, QueryAllNullableFields) {
|
||||
IndexMetaPtr metaPtr =
|
||||
std::make_shared<CollectionIndexMeta>(100000, std::move(filedMap));
|
||||
auto segment_sealed = CreateSealedSegment(schema, metaPtr);
|
||||
auto segment = dynamic_cast<SegmentSealedImpl*>(segment_sealed.get());
|
||||
auto segment =
|
||||
dynamic_cast<ChunkedSegmentSealedImpl*>(segment_sealed.get());
|
||||
|
||||
int64_t dataset_size = 1000;
|
||||
int64_t dim = 128;
|
||||
@ -2576,7 +2631,8 @@ TEST(Sealed, SearchSortedPk) {
|
||||
schema->set_primary_field_id(varchar_pk_field);
|
||||
auto segment_sealed = CreateSealedSegment(
|
||||
schema, nullptr, 999, SegcoreConfig::default_config(), false, true);
|
||||
auto segment = dynamic_cast<SegmentSealedImpl*>(segment_sealed.get());
|
||||
auto segment =
|
||||
dynamic_cast<ChunkedSegmentSealedImpl*>(segment_sealed.get());
|
||||
|
||||
int64_t dataset_size = 1000;
|
||||
auto dataset = DataGen(schema, dataset_size, 42, 0, 10);
|
||||
|
||||
@ -30,12 +30,19 @@
|
||||
#include "index/VectorMemIndex.h"
|
||||
#include "segcore/Collection.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
|
||||
#include "segcore/Utils.h"
|
||||
#include "knowhere/comp/index_param.h"
|
||||
|
||||
#include "PbHelper.h"
|
||||
#include "segcore/collection_c.h"
|
||||
#include "segcore/SegmentSealed.h"
|
||||
#include "common/Types.h"
|
||||
#include "storage/BinlogReader.h"
|
||||
#include "storage/Event.h"
|
||||
#include "storage/PayloadWriter.h"
|
||||
#include "segcore/ChunkedSegmentSealedImpl.h"
|
||||
#include "storage/Util.h"
|
||||
|
||||
using boost::algorithm::starts_with;
|
||||
|
||||
@ -1148,20 +1155,28 @@ SealedLoadFieldData(const GeneratedData& dataset,
|
||||
auto field_data = std::make_shared<milvus::FieldData<int64_t>>(
|
||||
DataType::INT64, false);
|
||||
field_data->FillFieldData(dataset.row_ids_.data(), row_count);
|
||||
auto field_data_info =
|
||||
FieldDataInfo(RowFieldID.get(),
|
||||
|
||||
auto arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(field_data);
|
||||
|
||||
auto field_data_info = FieldDataInfo(
|
||||
RowFieldID.get(),
|
||||
row_count,
|
||||
std::vector<milvus::FieldDataPtr>{field_data});
|
||||
std::vector<std::shared_ptr<milvus::ArrowDataWrapper>>{
|
||||
arrow_data_wrapper});
|
||||
seg.LoadFieldData(RowFieldID, field_data_info);
|
||||
}
|
||||
{
|
||||
auto field_data = std::make_shared<milvus::FieldData<int64_t>>(
|
||||
DataType::INT64, false);
|
||||
field_data->FillFieldData(dataset.timestamps_.data(), row_count);
|
||||
auto field_data_info =
|
||||
FieldDataInfo(TimestampFieldID.get(),
|
||||
auto arrow_data_wrapper =
|
||||
storage::ConvertFieldDataToArrowDataWrapper(field_data);
|
||||
auto field_data_info = FieldDataInfo(
|
||||
TimestampFieldID.get(),
|
||||
row_count,
|
||||
std::vector<milvus::FieldDataPtr>{field_data});
|
||||
std::vector<std::shared_ptr<milvus::ArrowDataWrapper>>{
|
||||
arrow_data_wrapper});
|
||||
seg.LoadFieldData(TimestampFieldID, field_data_info);
|
||||
}
|
||||
for (auto& iter : dataset.schema_->get_fields()) {
|
||||
@ -1183,9 +1198,11 @@ SealedLoadFieldData(const GeneratedData& dataset,
|
||||
info.field_id = field_data.field_id();
|
||||
info.row_count = row_count;
|
||||
auto field_meta = fields.at(FieldId(field_id));
|
||||
info.channel->push(
|
||||
auto arrow_data_wrapper = storage::ConvertFieldDataToArrowDataWrapper(
|
||||
CreateFieldDataFromDataArray(row_count, &field_data, field_meta));
|
||||
info.channel->close();
|
||||
|
||||
info.arrow_reader_channel->push(arrow_data_wrapper);
|
||||
info.arrow_reader_channel->close();
|
||||
|
||||
if (with_mmap) {
|
||||
seg.MapFieldData(FieldId(field_id), info);
|
||||
|
||||
@ -347,7 +347,6 @@ func NewSegment(ctx context.Context,
|
||||
SegmentID: loadInfo.GetSegmentID(),
|
||||
SegmentType: segmentType,
|
||||
IsSorted: loadInfo.GetIsSorted(),
|
||||
EnableChunked: paramtable.Get().QueryNodeCfg.MultipleChunkedEnable.GetAsBool(),
|
||||
})
|
||||
return nil, err
|
||||
}).Await(); err != nil {
|
||||
|
||||
@ -42,7 +42,6 @@ type CreateCSegmentRequest struct {
|
||||
SegmentID int64
|
||||
SegmentType SegmentType
|
||||
IsSorted bool
|
||||
EnableChunked bool
|
||||
}
|
||||
|
||||
func (req *CreateCSegmentRequest) getCSegmentType() C.SegmentType {
|
||||
@ -51,10 +50,6 @@ func (req *CreateCSegmentRequest) getCSegmentType() C.SegmentType {
|
||||
case SegmentTypeGrowing:
|
||||
segmentType = C.Growing
|
||||
case SegmentTypeSealed:
|
||||
if req.EnableChunked {
|
||||
segmentType = C.ChunkedSealed
|
||||
break
|
||||
}
|
||||
segmentType = C.Sealed
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid segment type: %d", req.SegmentType))
|
||||
|
||||
@ -2683,7 +2683,7 @@ type queryNodeConfig struct {
|
||||
InterimIndexNProbe ParamItem `refreshable:"false"`
|
||||
InterimIndexMemExpandRate ParamItem `refreshable:"false"`
|
||||
InterimIndexBuildParallelRate ParamItem `refreshable:"false"`
|
||||
MultipleChunkedEnable ParamItem `refreshable:"false"`
|
||||
MultipleChunkedEnable ParamItem `refreshable:"false"` // Deprecated
|
||||
|
||||
KnowhereScoreConsistency ParamItem `refreshable:"false"`
|
||||
|
||||
@ -2907,7 +2907,7 @@ This defaults to true, indicating that Milvus creates temporary index for growin
|
||||
Key: "queryNode.segcore.multipleChunkedEnable",
|
||||
Version: "2.0.0",
|
||||
DefaultValue: "true",
|
||||
Doc: "Enable multiple chunked search",
|
||||
Doc: "Deprecated. Enable multiple chunked search",
|
||||
Export: true,
|
||||
}
|
||||
p.MultipleChunkedEnable.Init(base.mgr)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user