diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 0088389a76..80d1a70c8b 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -16,6 +16,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-21 - Implement SDK interface part 2 - MS-26 - cmake. Add thirdparty packages - MS-31 - cmake: add prometheus +- MS-33 - cmake: add -j4 to make third party packages build faster ### Task @@ -23,4 +24,5 @@ Please mark all change in change log and use the ticket from JIRA. - MS-4 - Refactor the vecwise_engine code structure - MS-20 - Clean Code Part 1 - MS-30 - Use faiss v1.5.2 +- MS-32 - Fix thrift error - MS-34 - Fix prometheus-cpp thirdparty diff --git a/cpp/build.sh b/cpp/build.sh index abab7bdffa..66f06a617f 100755 --- a/cpp/build.sh +++ b/cpp/build.sh @@ -72,7 +72,7 @@ if [[ ${MAKE_CLEAN} = "ON" ]]; then make clean fi -make -j 1 || exit 1 +make -j 4 || exit 1 if [[ ${BUILD_TYPE} != "Debug" ]]; then strip src/vecwise_server diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index cd367262db..8167879376 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -152,14 +152,16 @@ if("${MAKE}" STREQUAL "") endif() endif() -# Using make -j in sub-make is fragile -# see discussion https://github.com/apache/MEGASEARCH/pull/2779 -if(${CMAKE_GENERATOR} MATCHES "Makefiles") - set(MAKE_BUILD_ARGS "") -else() - # limit the maximum number of jobs for ninja - set(MAKE_BUILD_ARGS "-j4") -endif() +set(MAKE_BUILD_ARGS "-j4") + +## Using make -j in sub-make is fragile +## see discussion https://github.com/apache/MEGASEARCH/pull/2779 +#if(${CMAKE_GENERATOR} MATCHES "Makefiles") +# set(MAKE_BUILD_ARGS "") +#else() +# # limit the maximum number of jobs for ninja +# set(MAKE_BUILD_ARGS "-j4") +#endif() # ---------------------------------------------------------------------- # Find pthreads @@ -537,6 +539,9 @@ macro(build_easyloggingpp) ${EP_LOG_OPTIONS} CMAKE_ARGS ${EASYLOGGINGPP_CMAKE_ARGS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} BUILD_BYPRODUCTS ${EASYLOGGINGPP_STATIC_LIB}) @@ -577,7 +582,8 @@ macro(build_openblas) BUILD_IN_SOURCE 1 BUILD_COMMAND - ${MAKE} ${MAKE_BUILD_ARGS} + ${MAKE} + ${MAKE_BUILD_ARGS} INSTALL_COMMAND ${MAKE} PREFIX=${OPENBLAS_PREFIX} @@ -623,6 +629,9 @@ macro(build_lapack) ${EP_LOG_OPTIONS} CMAKE_ARGS ${LAPACK_CMAKE_ARGS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} BUILD_BYPRODUCTS ${LAPACK_STATIC_LIB}) @@ -694,6 +703,9 @@ macro(build_faiss) # ${FAISS_PREFIX} # BUILD_COMMAND # ${MAKE} ${MAKE_BUILD_ARGS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} BUILD_IN_SOURCE 1 # INSTALL_DIR @@ -774,6 +786,9 @@ macro(build_gtest) ExternalProject_Add(googletest_ep URL ${GTEST_SOURCE_URL} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} BUILD_BYPRODUCTS ${GTEST_STATIC_LIB} ${GTEST_MAIN_STATIC_LIB} @@ -936,27 +951,25 @@ macro(build_prometheus) "${PROMETHEUS_PUSH_STATIC_LIB}" "${PROMETHEUS_PULL_STATIC_LIB}") - #file(MAKE_DIRECTORY "${PROMETHEUS_PREFIX}/include") - + file(MAKE_DIRECTORY "${PROMETHEUS_PREFIX}/push/include") add_library(prometheus-cpp-push STATIC IMPORTED) set_target_properties(prometheus-cpp-push - PROPERTIES IMPORTED_LOCATION "${PROMETHEUS_PUSH_STATIC_LIB}") -# INTERFACE_INCLUDE_DIRECTORIES -# "${PROMETHEUS_PREFIX}/push/include") + PROPERTIES IMPORTED_LOCATION "${PROMETHEUS_PUSH_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${PROMETHEUS_PREFIX}/push/include") add_dependencies(prometheus-cpp-push prometheus_ep) + file(MAKE_DIRECTORY "${PROMETHEUS_PREFIX}/pull/include") add_library(prometheus-cpp-pull STATIC IMPORTED) set_target_properties(prometheus-cpp-pull - PROPERTIES IMPORTED_LOCATION "${PROMETHEUS_PULL_STATIC_LIB}") -# INTERFACE_INCLUDE_DIRECTORIES -# "${PROMETHEUS_PREFIX}/pull/include") + PROPERTIES IMPORTED_LOCATION "${PROMETHEUS_PULL_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${PROMETHEUS_PREFIX}/pull/include") add_dependencies(prometheus-cpp-pull prometheus_ep) + file(MAKE_DIRECTORY "${PROMETHEUS_PREFIX}/core/include") add_library(prometheus-cpp-core STATIC IMPORTED) set_target_properties(prometheus-cpp-core - PROPERTIES IMPORTED_LOCATION "${PROMETHEUS_CORE_STATIC_LIB}") -# INTERFACE_INCLUDE_DIRECTORIES -# "${PROMETHEUS_PREFIX}/core/include") + PROPERTIES IMPORTED_LOCATION "${PROMETHEUS_CORE_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${PROMETHEUS_PREFIX}/core/include") add_dependencies(prometheus-cpp-core prometheus_ep) endmacro() @@ -1051,6 +1064,9 @@ macro(build_snappy) externalproject_add(snappy_ep ${EP_LOG_OPTIONS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} BUILD_IN_SOURCE 1 INSTALL_DIR @@ -1360,6 +1376,9 @@ macro(build_thrift) BUILD_BYPRODUCTS "${THRIFT_STATIC_LIB}" "${THRIFT_COMPILER}" + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} CMAKE_ARGS ${THRIFT_CMAKE_ARGS} DEPENDS @@ -1401,6 +1420,9 @@ macro(build_yamlcpp) URL ${YAMLCPP_SOURCE_URL} ${EP_LOG_OPTIONS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} BUILD_BYPRODUCTS "${YAMLCPP_STATIC_LIB}" CMAKE_ARGS @@ -1448,6 +1470,9 @@ macro(build_zlib) URL ${ZLIB_SOURCE_URL} ${EP_LOG_OPTIONS} + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}" CMAKE_ARGS @@ -1515,6 +1540,9 @@ macro(build_zstd) ${ZSTD_CMAKE_ARGS} SOURCE_SUBDIR "build/cmake" + BUILD_COMMAND + ${MAKE} + ${MAKE_BUILD_ARGS} INSTALL_DIR ${ZSTD_PREFIX} URL diff --git a/cpp/src/thrift/cpp_gen.sh b/cpp/src/thrift/cpp_gen.sh index df04e46c99..c3065b1f50 100755 --- a/cpp/src/thrift/cpp_gen.sh +++ b/cpp/src/thrift/cpp_gen.sh @@ -1,4 +1,4 @@ #!/bin/bash -../../third_party/build/bin/thrift -r --gen cpp ./megasearch.thrift +thrift -r --gen cpp ./megasearch.thrift diff --git a/cpp/src/thrift/megasearch.thrift b/cpp/src/thrift/megasearch.thrift index e85b77dc3d..800faf5db8 100644 --- a/cpp/src/thrift/megasearch.thrift +++ b/cpp/src/thrift/megasearch.thrift @@ -3,7 +3,6 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ -namespace cl megasearch.thrift namespace cpp megasearch.thrift namespace py megasearch.thrift namespace d megasearch.thrift diff --git a/cpp/src/thrift/py_gen.sh b/cpp/src/thrift/py_gen.sh deleted file mode 100755 index de661cf0c3..0000000000 --- a/cpp/src/thrift/py_gen.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -../../third_party/build/bin/thrift -r --gen py ./megasearch.thrift - diff --git a/python/.gitignore b/python/.gitignore deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md new file mode 100644 index 0000000000..3b58294706 --- /dev/null +++ b/python/CHANGELOG.md @@ -0,0 +1,15 @@ +# Changelog + +Please mark all change in change log and use the ticket from JIRA. + +## [Unreleased] + +### Bug + +### Improvement + +### New Feature + +- MS-10 - Add Python SDK APIs + +### Task diff --git a/python/sdk/.gitignore b/python/sdk/.gitignore new file mode 100644 index 0000000000..723ef36f4e --- /dev/null +++ b/python/sdk/.gitignore @@ -0,0 +1 @@ +.idea \ No newline at end of file diff --git a/python/sdk/ClientInterface.py b/python/sdk/ClientInterface.py new file mode 100644 index 0000000000..e59f28f826 --- /dev/null +++ b/python/sdk/ClientInterface.py @@ -0,0 +1,433 @@ +from enum import IntEnum +from sdk.exceptions import ConnectParamMissingError +from sdk.Status import Status + + +class IndexType(IntEnum): + RAW = 1 + IVFFLAT = 2 + + +class ColumnType(IntEnum): + INVALID = 1 + INT8 = 2 + INT16 = 3 + INT32 = 4 + INT64 = 5 + FLOAT32 = 6 + FLOAT64 = 7 + DATE = 8 + VECTOR = 9 + + +class ConnectParam(object): + """ + Connect API parameter + + :type ip_address: str + :param ip_address: Server IP address + + :type port: str, + :param port: Sever PORT + + """ + def __init__(self, ip_address, port): + + self.ip_address = ip_address + self.port = port + + +class Column(object): + """ + Table column description + + :type type: ColumnType + :param type: type of the column + + :type name: str + :param name: name of the column + + """ + def __init__(self, name=None, type=ColumnType.INVALID): + self.type = type + self.name = name + + +class VectorColumn(Column): + """ + Table vector column description + + :type dimension: int, int64 + :param dimension: vector dimension + + :type index_type: IndexType + :param index_type: IndexType + + :type store_raw_vector: bool + :param store_raw_vector: Is vector self stored in the table + + """ + def __init__(self, dimension=0, + index_type=IndexType.RAW, + store_raw_vector=False): + self.dimension = dimension + self.index_type = index_type + self.store_raw_vector = store_raw_vector + super(VectorColumn, self).__init__(type=ColumnType.VECTOR) + + +class TableSchema(object): + """ + Table Schema + + :type table_name: str + :param table_name: Table name + + :type vector_columns: list[VectorColumn] + :param vector_columns: vector column description + + :type attribute_columns: list[Column] + :param attribute_columns: Columns description + + :type partition_column_names: list[str] + :param partition_column_names: Partition column name + + """ + def __init__(self, table_name, vector_columns, + attribute_columns, partition_column_names): + self.table_name = table_name + self.vector_columns = vector_columns + self.attribute_columns = attribute_columns + self.partition_column_names = partition_column_names + + +class Range(object): + """ + Range information + + :type start: str + :param start: Range start value + + :type end: str + :param end: Range end value + + """ + def __init__(self, start, end): + self.start = start + self.end = end + + +class CreateTablePartitionParam(object): + """ + Create table partition parameters + + :type table_name: str + :param table_name: Table name, + VECTOR/FLOAT32/FLOAT64 ColumnType is not allowed for partition + + :type partition_name: str + :param partition_name: partition name, created partition name + + :type column_name_to_range: dict{str : Range} + :param column_name_to_range: Column name to PartitionRange dictionary + """ + def __init__(self, table_name, partition_name, **column_name_to_range): + self.table_name = table_name + self.partition_name = partition_name + self.column_name_to_range = column_name_to_range + + +class DeleteTablePartitionParam(object): + """ + Delete table partition parameters + + :type table_name: str + :param table_name: Table name + + :type partition_names: iterable, str + :param partition_names: Partition name array + + """ + def __init__(self, table_name, *partition_names): + self.table_name = table_name + self.partition_names = partition_names + + +class RowRecord(object): + """ + Record inserted + + :type column_name_to_vector: dict{str : list[float]} + :param column_name_to_vector: Column name to vector map + + :type column_name_to_value: dict{str: str} + :param column_name_to_value: Other attribute columns + """ + def __init__(self, column_name_to_vector, column_name_to_value): + self.column_name_to_vector = column_name_to_vector + self.column_name_to_value = column_name_to_value + + +class QueryRecord(object): + """ + Query record + + :type column_name_to_vector: dict{str : list[float]} + :param column_name_to_vector: Query vectors, column name to vector map + + :type selected_columns: list[str] + :param selected_columns: Output column array + + :type name_to_partition_ranges: dict{str : list[Range]} + :param name_to_partition_ranges: Range used to select partitions + + """ + def __init__(self, column_name_to_vector, selected_columns, **name_to_partition_ranges): + self.column_name_to_vector = column_name_to_vector + self.selected_columns = selected_columns + self.name_to_partition_ranges = name_to_partition_ranges + + +class QueryResult(object): + """ + Query result + + :type id: int + :param id: Output result + + :type score: float + :param score: Vector similarity 0 <= score <= 100 + + :type column_name_to_value: dict{str : str} + :param column_name_to_value: Other columns + + """ + def __init__(self, id, score, **column_name_to_value): + self.id = id + self.score = score + self.column_name_to_value = column_name_to_value + + +class TopKQueryResult(object): + """ + TopK query results + + :type query_results: list[QueryResult] + :param query_results: TopK query results + + """ + def __init__(self, query_results): + self.query_results = query_results + + +def _abstract(): + raise NotImplementedError('You need to override this function') + + +class Connection(object): + """SDK client class""" + + @staticmethod + def create(): + """Create a connection instance and return it + should be implemented + + :return connection: Connection + """ + _abstract() + + @staticmethod + def destroy(connection): + """Destroy the connection instance + should be implemented + + :type connection: Connection + :param connection: The connection instance to be destroyed + + :return bool, return True if destroy is successful + """ + _abstract() + + def connect(self, param=None, uri=None): + """ + Connect method should be called before any operations + Server will be connected after connect return OK + should be implemented + + :type param: ConnectParam + :param param: ConnectParam + + :type uri: str + :param uri: uri param + + :return: Status, indicate if connect is successful + """ + if (not param and not uri) or (param and uri): + raise ConnectParamMissingError('You need to parse exact one param') + _abstract() + + def connected(self): + """ + connected, connection status + should be implemented + + :return: Status, indicate if connect is successful + """ + _abstract() + + def disconnect(self): + """ + Disconnect, server will be disconnected after disconnect return OK + should be implemented + + :return: Status, indicate if connect is successful + """ + _abstract() + + def create_table(self, param): + """ + Create table + should be implemented + + :type param: TableSchema + :param param: provide table information to be created + + :return: Status, indicate if connect is successful + """ + _abstract() + + def delete_table(self, table_name): + """ + Delete table + should be implemented + + :type table_name: str + :param table_name: table_name of the deleting table + + :return: Status, indicate if connect is successful + """ + _abstract() + + def create_table_partition(self, param): + """ + Create table partition + should be implemented + + :type param: CreateTablePartitionParam + :param param: provide partition information + + :return: Status, indicate if table partition is created successfully + """ + _abstract() + + def delete_table_partition(self, param): + """ + Delete table partition + should be implemented + + :type param: DeleteTablePartitionParam + :param param: provide partition information to be deleted + :return: Status, indicate if partition is deleted successfully + """ + _abstract() + + def add_vector(self, table_name, records, ids): + """ + Add vectors to table + should be implemented + + :type table_name: str + :param table_name: table name been inserted + + :type records: list[RowRecord] + :param records: list of vectors been inserted + + :type ids: list[int] + :param ids: list of ids + + :return: Status, indicate if vectors inserted successfully + """ + _abstract() + + def search_vector(self, table_name, query_records, query_results, top_k): + """ + Query vectors in a table + should be implemented + + :type table_name: str + :param table_name: table name been queried + + :type query_records: list[QueryRecord] + :param query_records: all vectors going to be queried + + :type query_results: list[TopKQueryResult] + :param query_results: list of results + + :type top_k: int + :param top_k: how many similar vectors will be searched + + :return: Status, indicate if query is successful + """ + _abstract() + + def describe_table(self, table_name, table_schema): + """ + Show table information + should be implemented + + :type table_name: str + :param table_name: which table to be shown + + :type table_schema: TableSchema + :param table_schema: table schema is given when operation is successful + + :return: Status, indicate if query is successful + """ + _abstract() + + def show_tables(self, tables): + """ + Show all tables in database + should be implemented + + :type tables: list[str] + :param tables: list of tables + + :return: Status, indicate if this operation is successful + """ + _abstract() + + def client_version(self): + """ + Provide server version + should be implemented + + :return: Server version + """ + _abstract() + pass + + def server_status(self): + """ + Provide server status + should be implemented + + :return: Server status + """ + _abstract() + pass + + + + + + + + + + + + + + + diff --git a/python/sdk/Status.py b/python/sdk/Status.py new file mode 100644 index 0000000000..17590826ad --- /dev/null +++ b/python/sdk/Status.py @@ -0,0 +1,21 @@ +from enum import IntEnum + + +class Status(IntEnum): + + def __new__(cls, code, message=''): + obj = int.__new__(cls, code) + obj._code_ = code + + obj.message = message + return obj + + def __str__(self): + return str(self.code) + + # success + OK = 200, 'OK' + + INVALID = 300, 'Invalid' + UNKNOWN = 400, 'Unknown error' + NOT_SUPPORTED = 500, 'Not supported' diff --git a/cpp/src/thrift/gen-py/__init__.py b/python/sdk/__init__.py similarity index 100% rename from cpp/src/thrift/gen-py/__init__.py rename to python/sdk/__init__.py diff --git a/python/sdk/exceptions.py b/python/sdk/exceptions.py new file mode 100644 index 0000000000..694c7c0af2 --- /dev/null +++ b/python/sdk/exceptions.py @@ -0,0 +1,2 @@ +class ConnectParamMissingError(ValueError): + pass