diff --git a/cpp/src/storage/id_generators.cpp b/cpp/src/storage/id_generators.cpp new file mode 100644 index 0000000000..8296a4048f --- /dev/null +++ b/cpp/src/storage/id_generators.cpp @@ -0,0 +1,32 @@ +#include +#include + +#inlcude "id_generators.h" + +using std::chrono; + +namespace vecengine { + +IDGenerator::~IDGenerator() {} + +IDNumber SimpleIDGenerator::getNextIDNumber() { + auto now = chrono::system_clock::now(); + auto micros = duration_cast(now.time_since_epoch()).count(); + return micros * MAX_IDS_PER_MICRO +} + +IDNumbers&& SimpleIDGenerator::getNextIDNumbers(size_t n) { + assert(n < MAX_IDS_PER_MICRO); + auto now = chrono::system_clock::now(); + auto micros = duration_cast(now.time_since_epoch()).count(); + micros *= MAX_IDS_PER_MICRO; + + IDNumbers ids = IDNumbers(n); + for (int pos=0; pos + +namespace vecengine { + +#define uint64_t IDNumber; +#define IDNumber* IDNumberPtr; +#define std::vector IDNumbers; + +class IDGenerator { +public: + virtual IDNumber getNextIDNumber() = 0; + virtual IDNumbers&& getNextIDNumbers(size_t n_) = 0; + + virtual ~IDGenerator(); + +}; // IDGenerator + + +class SimpleIDGenerator : public IDGenerator { +public: + virtual IDNumber getNextIDNumber() override; + virtual IDNumbers&& getNextIDNumbers(size_t n_) override; + +private: + const MAX_IDS_PER_MICRO = 1000; + +}; // SimpleIDGenerator + + +} // namespace vecengine + +#endif // UTILS_ID_GENERATORS_H_ diff --git a/cpp/src/storage/memvectors.cpp b/cpp/src/storage/memvectors.cpp new file mode 100644 index 0000000000..3a30dfda9e --- /dev/null +++ b/cpp/src/storage/memvectors.cpp @@ -0,0 +1,84 @@ +#include +#include +#include + +#include "memvectors.h" + + +namespace vecengine { + +MemVectors::MemVectors(size_t dimension_, const std::string& file_location_) : + _file_location(file_location_), + _pIdGenerator(new SimpleIDGenerator()), + _dimension(dimension_), + _pInnerIndex(new faiss::IndexFlat(_dimension)), + _pIdMapIndex = new faiss::IndexIDMap(_pInnerIndex) { +} + +IDNumbers&& MemVectors::add(size_t n, const float* vectors) { + IDNumbers&& ids = _pIdGenerator->getNextIDNumbers(n); + _pIdMapIndex->add_with_ids(n, vectors, pIds, &ids[0]); + return ids; +} + +size_t MemVectors::total() const { + return _pIdMapIndex->ntotal; +} + +size_t MemVectors::approximate_size() const { + return total() * _dimension; +} + +void MemVectors::serialize() { + faiss::write_index(_pIdMapIndex, _file_location); +} + +MemVectors::~MemVectors() { + if (_pIdGenerator != nullptr) { + delete _pIdGenerator; + _pIdGenerator = nullptr; + } + if (_pIdMapIndex != nullptr) { + delete _pIdMapIndex; + _pIdMapIndex = nullptr; + } + if (_pInnerIndex != nullptr) { + delete _pInnerIndex; + _pInnerIndex = nullptr; + } +} + +/* + * MemManager + */ + +MemVectors* MemManager::get_mem_by_group(const std::string& group_id_) { + auto memIt = _memMap.find(group_id_); + if memIt != _memMap.end() { + return &(memIt->second); + } + // PXU TODO: + // 1. Read Group meta info + // 2. Initalize MemVectors base meta info + return nullptr; + /* GroupMetaInfo info; */ + /* bool succ = env->getGroupMeta(group_id, &info); */ + /* if (!succ) { */ + /* return nullptr; */ + /* } */ + /* _memMap[group_id] = MemVectors(info.dimension, info.next_file_location); */ + /* return &(_memMap[group_id]); */ +} + +IDNumbers&& MemManager::add_vectors_no_lock(const std::string& group_id_, + size_t n, + const float* vectors) { + auto mem = get_group_mem(group_id_); + if (mem == nullptr) { + return IDNumbers(); + } + return mem->add(n, vectors); +} + + +} // namespace vecengine diff --git a/cpp/src/storage/memvectors.h b/cpp/src/storage/memvectors.h new file mode 100644 index 0000000000..1a025f8d26 --- /dev/null +++ b/cpp/src/storage/memvectors.h @@ -0,0 +1,56 @@ +#ifndef STORAGE_VECENGINE_MEMVECTORS_H_ +#define STORAGE_VECENGINE_MEMVECTORS_H_ + +#include +#include +#include "id_generators.h" + +class faiss::IndexIDMap; +class faiss::Index; + + +namespace vecengine { + +class MemVectors { +public: + explicit MemVectors(size_t dimension_, const std::string& file_location_); + + IDNumbers&& add(size_t n, const float* vectors); + + size_t total() const; + + size_t approximate_size() const; + + void serialize(); + + ~MemVectors(); + +private: + std::string _file_location; + IDGenerator* _pIdGenerator; + size_t _dimension; + faiss::Index* _pInnerIndex; + faiss::IndexIDMap* _pIdMapIndex; + +}; // MemVectors + + +class MemManager { +public: + MemManager() = default; + + MemVectors* get_mem_by_group(const std::string& group_id_); + +private: + IDNumbers&& add_vectors_no_lock(const std::string& group_id_, + size_t n, + const float* vectors); + + typedef std::map MemMap; + MemMap _memMap; +}; // MemManager + + +} // namespace vecengine + +#endif diff --git a/cpp/src/storage/serializers.h b/cpp/src/storage/serializers.h new file mode 100644 index 0000000000..5356691530 --- /dev/null +++ b/cpp/src/storage/serializers.h @@ -0,0 +1,11 @@ +#ifndef STORAGE_SERIALIZERS_H_ +#define STORAGE_SERIALIZERS_H_ + +class Serializer { +public: + + +}; // Serializer + + +#endif // STORAGE_SERIALIZERS_H_