#include #include #include #include #include #include #include #include "tantivy-binding.h" #include "tantivy-wrapper.h" using namespace milvus::tantivy; template void run() { std::cout << "run " << typeid(T).name() << std::endl; auto path = "/tmp/inverted-index/test-binding/"; boost::filesystem::remove_all(path); boost::filesystem::create_directories(path); if (tantivy_index_exist(path)) { auto w = TantivyIndexWrapper(path); auto cnt = w.count(); std::cout << "index already exist, open it, count: " << cnt << std::endl; return; } auto w = TantivyIndexWrapper("test_field_name", guess_data_type(), path); T arr[] = {1, 2, 3, 4, 5, 6}; auto l = sizeof(arr) / sizeof(T); w.add_data(arr, l); w.finish(); assert(w.count() == l); { auto hits = w.term_query(2); hits.debug(); } { auto hits = w.lower_bound_range_query(1, false); hits.debug(); } { auto hits = w.upper_bound_range_query(4, false); hits.debug(); } { auto hits = w.range_query(2, 4, false, false); hits.debug(); } } template <> void run() { std::cout << "run bool" << std::endl; auto path = "/tmp/inverted-index/test-binding/"; boost::filesystem::remove_all(path); boost::filesystem::create_directories(path); if (tantivy_index_exist(path)) { auto w = TantivyIndexWrapper(path); auto cnt = w.count(); std::cout << "index already exist, open it, count: " << cnt << std::endl; return; } auto w = TantivyIndexWrapper("test_field_name", TantivyDataType::Bool, path); bool arr[] = {true, false, false, true, false, true}; auto l = sizeof(arr) / sizeof(bool); w.add_data(arr, l); w.finish(); assert(w.count() == l); { auto hits = w.term_query(true); hits.debug(); } } template <> void run() { std::cout << "run string" << std::endl; auto path = "/tmp/inverted-index/test-binding/"; boost::filesystem::remove_all(path); boost::filesystem::create_directories(path); if (tantivy_index_exist(path)) { auto w = TantivyIndexWrapper(path); auto cnt = w.count(); std::cout << "index already exist, open it, count: " << cnt << std::endl; return; } auto w = TantivyIndexWrapper("test_field_name", TantivyDataType::Keyword, path); std::vector arr = {"a", "b", "aaa", "abbb"}; auto l = arr.size(); w.add_data(arr.data(), l); w.finish(); assert(w.count() == l); { auto hits = w.term_query("a"); hits.debug(); } { auto hits = w.lower_bound_range_query("aa", true); hits.debug(); } { auto hits = w.upper_bound_range_query("ab", true); hits.debug(); } { auto hits = w.range_query("aa", "ab", true, true); hits.debug(); } { auto hits = w.prefix_query("a"); hits.debug(); } { auto hits = w.regex_query("a(.|\n)*"); hits.debug(); } } void test_32717() { using T = int16_t; auto path = "/tmp/inverted-index/test-binding/"; boost::filesystem::remove_all(path); boost::filesystem::create_directories(path); if (tantivy_index_exist(path)) { auto w = TantivyIndexWrapper(path); auto cnt = w.count(); std::cout << "index already exist, open it, count: " << cnt << std::endl; return; } auto w = TantivyIndexWrapper("test_field_name", guess_data_type(), path); std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution dis(1, 1000); std::vector arr; std::map> inverted; size_t l = 1000000; for (size_t i = 0; i < l; i++) { auto n = static_cast(dis(gen)); arr.push_back(n); if (inverted.find(n) == inverted.end()) { inverted[n] = std::set(); } inverted[n].insert(i); } w.add_data(arr.data(), l); w.finish(); assert(w.count() == l); for (int16_t term = 1; term < 1000; term += 10) { auto hits = w.term_query(term); for (size_t i = 0; i < hits.array_.len; i++) { assert(arr[hits.array_.array[i]] == term); } } } int main(int argc, char* argv[]) { test_32717(); run(); run(); run(); run(); run(); run(); run(); run(); return 0; }