mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
enhance: Utilize search_batch_pks for search_ids of PkTerm (#43751)
Related to #43660 --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
b8fe8aed53
commit
b6199acb05
@ -34,9 +34,13 @@ PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (expr_->op_ == proto::plan::JSONContainsExpr_JSONOp_ContainsAll) {
|
if (expr_->op_ == proto::plan::JSONContainsExpr_JSONOp_ContainsAll) {
|
||||||
result = std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, true), TargetBitmap(real_batch_size, true));
|
result = std::make_shared<ColumnVector>(
|
||||||
|
TargetBitmap(real_batch_size, true),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
} else {
|
} else {
|
||||||
result = std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false), TargetBitmap(real_batch_size, true));
|
result = std::make_shared<ColumnVector>(
|
||||||
|
TargetBitmap(real_batch_size, false),
|
||||||
|
TargetBitmap(real_batch_size, true));
|
||||||
}
|
}
|
||||||
MoveCursor();
|
MoveCursor();
|
||||||
return;
|
return;
|
||||||
|
|||||||
@ -191,8 +191,7 @@ PhyTermFilterExpr::InitPkCacheOffset() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto [uids, seg_offsets] =
|
auto seg_offsets = segment_->search_ids(*id_array, query_timestamp_);
|
||||||
segment_->search_ids(*id_array, query_timestamp_);
|
|
||||||
cached_bits_.resize(active_count_, false);
|
cached_bits_.resize(active_count_, false);
|
||||||
for (const auto& offset : seg_offsets) {
|
for (const auto& offset : seg_offsets) {
|
||||||
auto _offset = (int64_t)offset.get();
|
auto _offset = (int64_t)offset.get();
|
||||||
@ -540,8 +539,7 @@ PhyTermFilterExpr::ExecJsonInVariableByKeyIndex() {
|
|||||||
if (!arg_inited_) {
|
if (!arg_inited_) {
|
||||||
arg_set_ = std::make_shared<SetElement<ValueType>>(expr_->vals_);
|
arg_set_ = std::make_shared<SetElement<ValueType>>(expr_->vals_);
|
||||||
if constexpr (std::is_same_v<GetType, double>) {
|
if constexpr (std::is_same_v<GetType, double>) {
|
||||||
arg_set_float_ =
|
arg_set_float_ = std::make_shared<SetElement<float>>(expr_->vals_);
|
||||||
std::make_shared<SetElement<float>>(expr_->vals_);
|
|
||||||
}
|
}
|
||||||
arg_inited_ = true;
|
arg_inited_ = true;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -916,17 +916,18 @@ ChunkedSegmentSealedImpl::search_pk(const PkType& pk,
|
|||||||
void
|
void
|
||||||
ChunkedSegmentSealedImpl::search_batch_pks(
|
ChunkedSegmentSealedImpl::search_batch_pks(
|
||||||
const std::vector<PkType>& pks,
|
const std::vector<PkType>& pks,
|
||||||
const Timestamp* timestamps,
|
const std::function<Timestamp(const size_t idx)>& get_timestamp,
|
||||||
bool include_same_ts,
|
bool include_same_ts,
|
||||||
const std::function<void(const SegOffset offset, const Timestamp ts)>&
|
const std::function<void(const SegOffset offset, const Timestamp ts)>&
|
||||||
callback) const {
|
callback) const {
|
||||||
// handle unsorted case
|
// handle unsorted case
|
||||||
if (!is_sorted_by_pk_) {
|
if (!is_sorted_by_pk_) {
|
||||||
for (size_t i = 0; i < pks.size(); i++) {
|
for (size_t i = 0; i < pks.size(); i++) {
|
||||||
auto offsets = insert_record_.search_pk(
|
auto timestamp = get_timestamp(i);
|
||||||
pks[i], timestamps[i], include_same_ts);
|
auto offsets =
|
||||||
|
insert_record_.search_pk(pks[i], timestamp, include_same_ts);
|
||||||
for (auto offset : offsets) {
|
for (auto offset : offsets) {
|
||||||
callback(offset, timestamps[i]);
|
callback(offset, timestamp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -956,7 +957,7 @@ ChunkedSegmentSealedImpl::search_batch_pks(
|
|||||||
for (size_t j = 0; j < pks.size(); j++) {
|
for (size_t j = 0; j < pks.size(); j++) {
|
||||||
// get int64 pks
|
// get int64 pks
|
||||||
auto target = std::get<int64_t>(pks[j]);
|
auto target = std::get<int64_t>(pks[j]);
|
||||||
auto timestamp = timestamps[j];
|
auto timestamp = get_timestamp(j);
|
||||||
auto it = std::lower_bound(
|
auto it = std::lower_bound(
|
||||||
src,
|
src,
|
||||||
src + chunk_row_num,
|
src + chunk_row_num,
|
||||||
@ -988,7 +989,7 @@ ChunkedSegmentSealedImpl::search_batch_pks(
|
|||||||
for (size_t j = 0; j < pks.size(); ++j) {
|
for (size_t j = 0; j < pks.size(); ++j) {
|
||||||
// get varchar pks
|
// get varchar pks
|
||||||
auto& target = std::get<std::string>(pks[j]);
|
auto& target = std::get<std::string>(pks[j]);
|
||||||
auto timestamp = timestamps[j];
|
auto timestamp = get_timestamp(j);
|
||||||
auto offset = string_chunk->binary_search_string(target);
|
auto offset = string_chunk->binary_search_string(target);
|
||||||
for (; offset != -1 && offset < string_chunk->RowNums() &&
|
for (; offset != -1 && offset < string_chunk->RowNums() &&
|
||||||
string_chunk->operator[](offset) == target;
|
string_chunk->operator[](offset) == target;
|
||||||
@ -1142,7 +1143,11 @@ ChunkedSegmentSealedImpl::ChunkedSegmentSealedImpl(
|
|||||||
const Timestamp* timestamps,
|
const Timestamp* timestamps,
|
||||||
std::function<void(const SegOffset offset, const Timestamp ts)>
|
std::function<void(const SegOffset offset, const Timestamp ts)>
|
||||||
callback) {
|
callback) {
|
||||||
this->search_batch_pks(pks, timestamps, false, callback);
|
this->search_batch_pks(
|
||||||
|
pks,
|
||||||
|
[&](const size_t idx) { return timestamps[idx]; },
|
||||||
|
false,
|
||||||
|
callback);
|
||||||
},
|
},
|
||||||
segment_id) {
|
segment_id) {
|
||||||
auto mcm = storage::MmapManager::GetInstance().GetMmapChunkManager();
|
auto mcm = storage::MmapManager::GetInstance().GetMmapChunkManager();
|
||||||
@ -1752,7 +1757,7 @@ ChunkedSegmentSealedImpl::GetFieldDataType(milvus::FieldId field_id) const {
|
|||||||
return field_meta.get_data_type();
|
return field_meta.get_data_type();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
|
std::vector<SegOffset>
|
||||||
ChunkedSegmentSealedImpl::search_ids(const IdArray& id_array,
|
ChunkedSegmentSealedImpl::search_ids(const IdArray& id_array,
|
||||||
Timestamp timestamp) const {
|
Timestamp timestamp) const {
|
||||||
auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
|
auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
|
||||||
@ -1763,37 +1768,16 @@ ChunkedSegmentSealedImpl::search_ids(const IdArray& id_array,
|
|||||||
std::vector<PkType> pks(ids_size);
|
std::vector<PkType> pks(ids_size);
|
||||||
ParsePksFromIDs(pks, data_type, id_array);
|
ParsePksFromIDs(pks, data_type, id_array);
|
||||||
|
|
||||||
auto res_id_arr = std::make_unique<IdArray>();
|
|
||||||
std::vector<SegOffset> res_offsets;
|
std::vector<SegOffset> res_offsets;
|
||||||
res_offsets.reserve(pks.size());
|
res_offsets.reserve(pks.size());
|
||||||
for (auto& pk : pks) {
|
this->search_batch_pks(
|
||||||
std::vector<SegOffset> pk_offsets;
|
pks,
|
||||||
if (!is_sorted_by_pk_) {
|
[=](const size_t idx) { return timestamp; },
|
||||||
pk_offsets = insert_record_.search_pk(pk, timestamp);
|
true,
|
||||||
} else {
|
[&](const SegOffset offset, const Timestamp ts) {
|
||||||
pk_offsets = search_pk(pk, timestamp);
|
|
||||||
}
|
|
||||||
for (auto offset : pk_offsets) {
|
|
||||||
switch (data_type) {
|
|
||||||
case DataType::INT64: {
|
|
||||||
res_id_arr->mutable_int_id()->add_data(
|
|
||||||
std::get<int64_t>(pk));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case DataType::VARCHAR: {
|
|
||||||
res_id_arr->mutable_str_id()->add_data(
|
|
||||||
std::get<std::string>(std::move(pk)));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default: {
|
|
||||||
ThrowInfo(DataTypeInvalid,
|
|
||||||
fmt::format("unsupported type {}", data_type));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
res_offsets.push_back(offset);
|
res_offsets.push_back(offset);
|
||||||
}
|
});
|
||||||
}
|
return std::move(res_offsets);
|
||||||
return {std::move(res_id_arr), std::move(res_offsets)};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SegcoreError
|
SegcoreError
|
||||||
|
|||||||
@ -207,7 +207,7 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
|||||||
void
|
void
|
||||||
search_batch_pks(
|
search_batch_pks(
|
||||||
const std::vector<PkType>& pks,
|
const std::vector<PkType>& pks,
|
||||||
const Timestamp* timestamps,
|
const std::function<Timestamp(const size_t idx)>& get_timestamp,
|
||||||
bool include_same_ts,
|
bool include_same_ts,
|
||||||
const std::function<void(const SegOffset offset, const Timestamp ts)>&
|
const std::function<void(const SegOffset offset, const Timestamp ts)>&
|
||||||
callback) const;
|
callback) const;
|
||||||
@ -410,7 +410,7 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
|||||||
return system_ready_count_ == 1;
|
return system_ready_count_ == 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
|
std::vector<SegOffset>
|
||||||
search_ids(const IdArray& id_array, Timestamp timestamp) const override;
|
search_ids(const IdArray& id_array, Timestamp timestamp) const override;
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|||||||
@ -118,7 +118,9 @@ class DeletedRecord {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
search_pk_func_(
|
search_pk_func_(
|
||||||
pks, timestamps, [&](SegOffset offset, Timestamp delete_ts) {
|
pks,
|
||||||
|
timestamps,
|
||||||
|
[&](const SegOffset offset, const Timestamp delete_ts) {
|
||||||
auto row_id = offset.get();
|
auto row_id = offset.get();
|
||||||
// if already deleted, no need to add new record
|
// if already deleted, no need to add new record
|
||||||
if (deleted_mask_.size() > row_id && deleted_mask_[row_id]) {
|
if (deleted_mask_.size() > row_id && deleted_mask_[row_id]) {
|
||||||
|
|||||||
@ -1106,7 +1106,7 @@ SegmentGrowingImpl::bulk_subscript(SystemFieldType system_type,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
|
std::vector<SegOffset>
|
||||||
SegmentGrowingImpl::search_ids(const IdArray& id_array,
|
SegmentGrowingImpl::search_ids(const IdArray& id_array,
|
||||||
Timestamp timestamp) const {
|
Timestamp timestamp) const {
|
||||||
auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
|
auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
|
||||||
@ -1117,32 +1117,15 @@ SegmentGrowingImpl::search_ids(const IdArray& id_array,
|
|||||||
std::vector<PkType> pks(ids_size);
|
std::vector<PkType> pks(ids_size);
|
||||||
ParsePksFromIDs(pks, data_type, id_array);
|
ParsePksFromIDs(pks, data_type, id_array);
|
||||||
|
|
||||||
auto res_id_arr = std::make_unique<IdArray>();
|
|
||||||
std::vector<SegOffset> res_offsets;
|
std::vector<SegOffset> res_offsets;
|
||||||
res_offsets.reserve(pks.size());
|
res_offsets.reserve(pks.size());
|
||||||
for (auto& pk : pks) {
|
for (auto& pk : pks) {
|
||||||
auto segOffsets = insert_record_.search_pk(pk, timestamp);
|
auto segOffsets = insert_record_.search_pk(pk, timestamp);
|
||||||
for (auto offset : segOffsets) {
|
for (auto offset : segOffsets) {
|
||||||
switch (data_type) {
|
|
||||||
case DataType::INT64: {
|
|
||||||
res_id_arr->mutable_int_id()->add_data(
|
|
||||||
std::get<int64_t>(pk));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case DataType::VARCHAR: {
|
|
||||||
res_id_arr->mutable_str_id()->add_data(
|
|
||||||
std::get<std::string>(std::move(pk)));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default: {
|
|
||||||
ThrowInfo(DataTypeInvalid,
|
|
||||||
fmt::format("unsupported type {}", data_type));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
res_offsets.push_back(offset);
|
res_offsets.push_back(offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return {std::move(res_id_arr), std::move(res_offsets)};
|
return std::move(res_offsets);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string
|
std::string
|
||||||
|
|||||||
@ -340,7 +340,7 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||||||
int64_t ins_barrier,
|
int64_t ins_barrier,
|
||||||
Timestamp timestamp) const override;
|
Timestamp timestamp) const override;
|
||||||
|
|
||||||
std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
|
std::vector<SegOffset>
|
||||||
search_ids(const IdArray& id_array, Timestamp timestamp) const override;
|
search_ids(const IdArray& id_array, Timestamp timestamp) const override;
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
|||||||
@ -440,7 +440,14 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||||||
virtual int64_t
|
virtual int64_t
|
||||||
get_active_count(Timestamp ts) const = 0;
|
get_active_count(Timestamp ts) const = 0;
|
||||||
|
|
||||||
virtual std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
|
/**
|
||||||
|
* search offset by possible pk values and mvcc timestamp
|
||||||
|
*
|
||||||
|
* @param id_array possible pk values
|
||||||
|
* @param timestamp mvcc timestamp
|
||||||
|
* @return all the hit entries in vector of offsets
|
||||||
|
*/
|
||||||
|
virtual std::vector<SegOffset>
|
||||||
search_ids(const IdArray& id_array, Timestamp timestamp) const = 0;
|
search_ids(const IdArray& id_array, Timestamp timestamp) const = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -45,7 +45,8 @@ TEST(DeleteMVCC, common_case) {
|
|||||||
[&insert_record](
|
[&insert_record](
|
||||||
const std::vector<PkType>& pks,
|
const std::vector<PkType>& pks,
|
||||||
const Timestamp* timestamps,
|
const Timestamp* timestamps,
|
||||||
std::function<void(SegOffset offset, Timestamp ts)> cb) {
|
std::function<void(const SegOffset offset, const Timestamp ts)>
|
||||||
|
cb) {
|
||||||
for (size_t i = 0; i < pks.size(); ++i) {
|
for (size_t i = 0; i < pks.size(); ++i) {
|
||||||
auto timestamp = timestamps[i];
|
auto timestamp = timestamps[i];
|
||||||
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
||||||
@ -170,7 +171,8 @@ TEST(DeleteMVCC, delete_exist_duplicate_pks) {
|
|||||||
[&insert_record](
|
[&insert_record](
|
||||||
const std::vector<PkType>& pks,
|
const std::vector<PkType>& pks,
|
||||||
const Timestamp* timestamps,
|
const Timestamp* timestamps,
|
||||||
std::function<void(SegOffset offset, Timestamp ts)> cb) {
|
std::function<void(const SegOffset offset, const Timestamp ts)>
|
||||||
|
cb) {
|
||||||
for (size_t i = 0; i < pks.size(); ++i) {
|
for (size_t i = 0; i < pks.size(); ++i) {
|
||||||
auto timestamp = timestamps[i];
|
auto timestamp = timestamps[i];
|
||||||
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
||||||
@ -294,7 +296,8 @@ TEST(DeleteMVCC, snapshot) {
|
|||||||
[&insert_record](
|
[&insert_record](
|
||||||
const std::vector<PkType>& pks,
|
const std::vector<PkType>& pks,
|
||||||
const Timestamp* timestamps,
|
const Timestamp* timestamps,
|
||||||
std::function<void(SegOffset offset, Timestamp ts)> cb) {
|
std::function<void(const SegOffset offset, const Timestamp ts)>
|
||||||
|
cb) {
|
||||||
for (size_t i = 0; i < pks.size(); ++i) {
|
for (size_t i = 0; i < pks.size(); ++i) {
|
||||||
auto timestamp = timestamps[i];
|
auto timestamp = timestamps[i];
|
||||||
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
||||||
@ -351,7 +354,8 @@ TEST(DeleteMVCC, insert_after_snapshot) {
|
|||||||
[&insert_record](
|
[&insert_record](
|
||||||
const std::vector<PkType>& pks,
|
const std::vector<PkType>& pks,
|
||||||
const Timestamp* timestamps,
|
const Timestamp* timestamps,
|
||||||
std::function<void(SegOffset offset, Timestamp ts)> cb) {
|
std::function<void(const SegOffset offset, const Timestamp ts)>
|
||||||
|
cb) {
|
||||||
for (size_t i = 0; i < pks.size(); ++i) {
|
for (size_t i = 0; i < pks.size(); ++i) {
|
||||||
auto timestamp = timestamps[i];
|
auto timestamp = timestamps[i];
|
||||||
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
||||||
@ -455,7 +459,8 @@ TEST(DeleteMVCC, perform) {
|
|||||||
[&insert_record](
|
[&insert_record](
|
||||||
const std::vector<PkType>& pks,
|
const std::vector<PkType>& pks,
|
||||||
const Timestamp* timestamps,
|
const Timestamp* timestamps,
|
||||||
std::function<void(SegOffset offset, Timestamp ts)> cb) {
|
std::function<void(const SegOffset offset, const Timestamp ts)>
|
||||||
|
cb) {
|
||||||
for (size_t i = 0; i < pks.size(); ++i) {
|
for (size_t i = 0; i < pks.size(); ++i) {
|
||||||
auto timestamp = timestamps[i];
|
auto timestamp = timestamps[i];
|
||||||
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
||||||
|
|||||||
@ -93,7 +93,8 @@ TEST(Util, GetDeleteBitmap) {
|
|||||||
[&insert_record](
|
[&insert_record](
|
||||||
const std::vector<PkType>& pks,
|
const std::vector<PkType>& pks,
|
||||||
const Timestamp* timestamps,
|
const Timestamp* timestamps,
|
||||||
std::function<void(SegOffset offset, Timestamp ts)> cb) {
|
std::function<void(const SegOffset offset, const Timestamp ts)>
|
||||||
|
cb) {
|
||||||
for (size_t i = 0; i < pks.size(); ++i) {
|
for (size_t i = 0; i < pks.size(); ++i) {
|
||||||
auto timestamp = timestamps[i];
|
auto timestamp = timestamps[i];
|
||||||
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user