diff --git a/CHANGELOG.md b/CHANGELOG.md index 17a4f18608..85f1a6f15a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Please mark all change in change log and use the issue from GitHub ## Bug - \#2366 Reduce SQL execution times for collection contains lot of partitions - \#2378 Duplicate data after server restart +- \#2395 Fix large nq cudaMalloc error - \#2399 The nlist set by the user may not take effect - \#2403 MySQL max_idle_time is 10 by default diff --git a/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexGPUIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexGPUIVF.cpp index 860d32d57a..171749d324 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexGPUIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/gpu/IndexGPUIVF.cpp @@ -145,7 +145,14 @@ GPUIVF::QueryImpl(int64_t n, const float* data, int64_t k, float* distances, int if (device_index) { device_index->nprobe = config[IndexParams::nprobe]; ResScope rs(res_, gpu_id_); - device_index->search(n, (float*)data, k, distances, labels, bitset_); + + // if query size > 2048 we search by blocks to avoid malloc issue + size_t block_size = 2048; + size_t dim = device_index->d; + for (size_t i = 0; i < n; i += block_size) { + size_t search_size = (n - i > block_size) ? block_size : (n - i); + device_index->search(search_size, (float*)data + i * dim, k, distances + i * k, labels + i * k, bitset_); + } } else { KNOWHERE_THROW_MSG("Not a GpuIndexIVF type."); }