fix: Fix the bug when offsets is nullptr in bulk api (#43127)

issue: https://github.com/milvus-io/milvus/issues/42978

---------

Signed-off-by: sunby <sunbingyi1992@gmail.com>
This commit is contained in:
Bingyi Sun 2025-07-15 17:54:50 +08:00 committed by GitHub
parent 858115bfb8
commit ccfaa7bee8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 38 additions and 76 deletions

View File

@ -347,27 +347,17 @@ class ChunkedVariableColumn : public ChunkedColumnBase {
ErrorCode::Unsupported,
"RawJsonAt only supported for ChunkedVariableColumn<Json>");
}
if (offsets == nullptr) {
auto ca = SemiInlineGet(slot_->PinAllCells());
for (int64_t i = 0; i < num_rows_; i++) {
auto [chunk_id, offset_in_chunk] = GetChunkIDByOffset(i);
auto chunk = ca->get_cell_of(chunk_id);
auto valid = nullable_ ? chunk->isValid(offset_in_chunk) : true;
auto str_view = static_cast<StringChunk*>(chunk)->operator[](
offset_in_chunk);
fn(Json(str_view.data(), str_view.size()), i, valid);
}
} else {
auto [cids, offsets_in_chunk] = ToChunkIdAndOffset(offsets, count);
auto ca = SemiInlineGet(slot_->PinCells(cids));
for (int64_t i = 0; i < count; i++) {
auto chunk = ca->get_cell_of(cids[i]);
auto valid =
nullable_ ? chunk->isValid(offsets_in_chunk[i]) : true;
auto str_view = static_cast<StringChunk*>(chunk)->operator[](
offsets_in_chunk[i]);
fn(Json(str_view.data(), str_view.size()), i, valid);
}
if (count == 0) {
return;
}
auto [cids, offsets_in_chunk] = ToChunkIdAndOffset(offsets, count);
auto ca = SemiInlineGet(slot_->PinCells(cids));
for (int64_t i = 0; i < count; i++) {
auto chunk = ca->get_cell_of(cids[i]);
auto valid = nullable_ ? chunk->isValid(offsets_in_chunk[i]) : true;
auto str_view = static_cast<StringChunk*>(chunk)->operator[](
offsets_in_chunk[i]);
fn(Json(str_view.data(), str_view.size()), i, valid);
}
}
};

View File

@ -172,27 +172,16 @@ class ProxyChunkColumn : public ChunkedColumnInterface {
}
}
// nullable:
if (offsets == nullptr) {
int64_t current_offset = 0;
for (cid_t cid = 0; cid < num_chunks(); ++cid) {
auto group_chunk = group_->GetGroupChunk(cid);
auto chunk = group_chunk.get()->GetChunk(field_id_);
auto chunk_rows = chunk->RowNums();
for (int64_t i = 0; i < chunk_rows; ++i) {
auto valid = chunk->isValid(i);
fn(valid, current_offset + i);
}
current_offset += chunk_rows;
}
} else {
auto [cids, offsets_in_chunk] = ToChunkIdAndOffset(offsets, count);
auto ca = group_->GetGroupChunks(cids);
for (int64_t i = 0; i < count; i++) {
auto* group_chunk = ca->get_cell_of(cids[i]);
auto chunk = group_chunk->GetChunk(field_id_);
auto valid = chunk->isValid(offsets_in_chunk[i]);
fn(valid, i);
}
if (count == 0) {
return;
}
auto [cids, offsets_in_chunk] = ToChunkIdAndOffset(offsets, count);
auto ca = group_->GetGroupChunks(cids);
for (int64_t i = 0; i < count; i++) {
auto* group_chunk = ca->get_cell_of(cids[i]);
auto chunk = group_chunk->GetChunk(field_id_);
auto valid = chunk->isValid(offsets_in_chunk[i]);
fn(valid, i);
}
}
@ -370,8 +359,7 @@ class ProxyChunkColumn : public ChunkedColumnInterface {
auto chunk = group_chunk->GetChunk(field_id_);
auto valid = chunk->isValid(offsets_in_chunk[i]);
auto value = static_cast<StringChunk*>(chunk.get())
->
operator[](offsets_in_chunk[i]);
->operator[](offsets_in_chunk[i]);
fn(value, i, valid);
}
}
@ -387,35 +375,19 @@ class ProxyChunkColumn : public ChunkedColumnInterface {
ErrorCode::Unsupported,
"RawJsonAt only supported for ProxyChunkColumn of Json type");
}
if (offsets == nullptr) {
int64_t current_offset = 0;
for (cid_t cid = 0; cid < num_chunks(); ++cid) {
auto group_chunk = group_->GetGroupChunk(cid);
auto chunk = group_chunk.get()->GetChunk(field_id_);
auto chunk_rows = chunk->RowNums();
for (int64_t i = 0; i < chunk_rows; ++i) {
auto valid = chunk->isValid(i);
auto str_view =
static_cast<StringChunk*>(chunk.get())->operator[](i);
fn(Json(str_view.data(), str_view.size()),
current_offset + i,
valid);
}
current_offset += chunk_rows;
}
} else {
auto [cids, offsets_in_chunk] = ToChunkIdAndOffset(offsets, count);
auto ca = group_->GetGroupChunks(cids);
if (count == 0) {
return;
}
auto [cids, offsets_in_chunk] = ToChunkIdAndOffset(offsets, count);
auto ca = group_->GetGroupChunks(cids);
for (int64_t i = 0; i < count; i++) {
auto* group_chunk = ca->get_cell_of(cids[i]);
auto chunk = group_chunk->GetChunk(field_id_);
auto valid = chunk->isValid(offsets_in_chunk[i]);
auto str_view = static_cast<StringChunk*>(chunk.get())
->
operator[](offsets_in_chunk[i]);
fn(Json(str_view.data(), str_view.size()), i, valid);
}
for (int64_t i = 0; i < count; i++) {
auto* group_chunk = ca->get_cell_of(cids[i]);
auto chunk = group_chunk->GetChunk(field_id_);
auto valid = chunk->isValid(offsets_in_chunk[i]);
auto str_view = static_cast<StringChunk*>(chunk.get())
->operator[](offsets_in_chunk[i]);
fn(Json(str_view.data(), str_view.size()), i, valid);
}
}

View File

@ -45,8 +45,8 @@ class ChunkedColumnInterface {
// Other Bulk* methods can also support nullptr offsets, but not added at this moment.
virtual void
BulkIsValid(std::function<void(bool, size_t)> fn,
const int64_t* offsets = nullptr,
int64_t count = 0) const = 0;
const int64_t* offsets,
int64_t count) const = 0;
// Check if the column can contain null values
virtual bool
@ -124,8 +124,8 @@ class ChunkedColumnInterface {
virtual void
BulkRawJsonAt(std::function<void(Json, size_t, bool)> fn,
const int64_t* offsets = nullptr,
int64_t count = 0) const {
const int64_t* offsets,
int64_t count) const {
PanicInfo(
ErrorCode::Unsupported,
"RawJsonAt only supported for ChunkColumnInterface of Json type");