diff --git a/README.md b/README.md index b46ef88f61..5893b42cba 100644 --- a/README.md +++ b/README.md @@ -25,3 +25,23 @@ - `python` # enter python3 interaction environment - `from engine import db` - `db.create_all()` + +- table desc +group_table ++-------------+--------------+------+-----+---------+----------------+ +| Field | Type | Null | Key | Default | Extra | ++-------------+--------------+------+-----+---------+----------------+ +| id | int(11) | NO | PRI | NULL | auto_increment | +| group_name | varchar(100) | YES | | NULL | | +| file_number | int(11) | YES | | NULL | | ++-------------+--------------+------+-----+---------+----------------+ + +file_table ++------------+--------------+------+-----+---------+----------------+ +| Field | Type | Null | Key | Default | Extra | ++------------+--------------+------+-----+---------+----------------+ +| id | int(11) | NO | PRI | NULL | auto_increment | +| group_name | varchar(100) | YES | | NULL | | +| filename | varchar(100) | YES | | NULL | | +| row_number | int(11) | YES | | NULL | | ++------------+--------------+------+-----+---------+----------------+ diff --git a/pyengine/engine/controller/IndexManage.py b/pyengine/engine/controller/IndexManage.py index 70bb5fca91..df8fcafa2b 100644 --- a/pyengine/engine/controller/IndexManage.py +++ b/pyengine/engine/controller/IndexManage.py @@ -1,68 +1,72 @@ -from flask import Flask +from flask import Flask, jsonify, request from flask_restful import Resource, Api from engine import app, db +from engine.model.GroupTable import GroupTable +from engine.controller.VectorEngine import VectorEngine # app = Flask(__name__) api = Api(app) from flask_restful import reqparse +from flask_restful import request class Vector(Resource): def __init__(self): self.__parser = reqparse.RequestParser() - self.__parser.add_argument('groupid', type=str) - self.__parser.add_argument('vec', type=str) + self.__parser.add_argument('vector', type=float, action='append', location=['json']) - def post(self): - # args = self.__parser.parse_args() - # vec = args['vec'] - # groupid = args['groupid'] - return "vector post" + def post(self, group_id): + args = self.__parser.parse_args() + vector = args['vector'] + return VectorEngine.AddVector(group_id, vector) class VectorSearch(Resource): def __init__(self): self.__parser = reqparse.RequestParser() - self.__parser.add_argument('groupid', type=str) + self.__parser.add_argument('vector', type=float, action='append', location=['json']) - def post(self): + def post(self, group_id): + args = self.__parser.parse_args() + print('vector: ', args['vector']) + # go to search every thing return "vectorSearch post" class Index(Resource): def __init__(self): self.__parser = reqparse.RequestParser() - self.__parser.add_argument('groupid', type=str) + # self.__parser.add_argument('group_id', type=str) - def post(self): - return "index post" + def post(self, group_id): + return VectorEngine.CreateIndex(group_id) class Group(Resource): def __init__(self): self.__parser = reqparse.RequestParser() - self.__parser.add_argument('groupid', type=str) + self.__parser.add_argument('group_id', type=str) - def post(self, groupid): - return "group post" + def post(self, group_id): + return VectorEngine.AddGroup(group_id) - def get(self, groupid): - return "group get" + def get(self, group_id): + return VectorEngine.GetGroup(group_id) - def delete(self, groupid): - return "group delete" + def delete(self, group_id): + return VectorEngine.DeleteGroup(group_id) class GroupList(Resource): def get(self): - return "grouplist get" + return VectorEngine.GetGroupList() -api.add_resource(Vector, '/vector') -api.add_resource(Group, '/vector/group/') +api.add_resource(Vector, '/vector/add/') +api.add_resource(Group, '/vector/group/') api.add_resource(GroupList, '/vector/group') -api.add_resource(Index, '/vector/index') -api.add_resource(VectorSearch, '/vector/search') +api.add_resource(Index, '/vector/index/') +api.add_resource(VectorSearch, '/vector/search/') # if __name__ == '__main__': diff --git a/pyengine/engine/controller/VectorEngine.py b/pyengine/engine/controller/VectorEngine.py new file mode 100644 index 0000000000..6ddf40aba6 --- /dev/null +++ b/pyengine/engine/controller/VectorEngine.py @@ -0,0 +1,106 @@ +from engine.model.GroupTable import GroupTable +from engine.model.FileTable import FileTable +from flask import jsonify +from engine import db +import sys + +class VectorEngine(object): + + @staticmethod + def AddGroup(group_id): + group = GroupTable.query.filter(GroupTable.group_name==group_id).first() + if(group): + return jsonify({'code': 1, 'group_name': group_id, 'file_number': group.file_number}) + else: + new_group = GroupTable(group_id) + db.session.add(new_group) + db.session.commit() + return jsonify({'code': 0, 'group_name': group_id, 'file_number': 0}) + + @staticmethod + def GetGroup(group_id): + group = GroupTable.query.filter(GroupTable.group_name==group_id).first() + if(group): + return jsonify({'code': 0, 'group_name': group_id, 'file_number': group.file_number}) + else: + return jsonify({'code': 1, 'group_name': group_id, 'file_number': 0}) # not found + + + @staticmethod + def DeleteGroup(group_id): + group = GroupTable.query.filter(GroupTable.group_name==group_id).first() + if(group): + # old_group = GroupTable(group_id) + db.session.delete(group) + db.session.commit() + return jsonify({'code': 0, 'group_name': group_id, 'file_number': group.file_number}) + else: + return jsonify({'code': 0, 'group_name': group_id, 'file_number': 0}) + + @staticmethod + def GetGroupList(): + group = GroupTable.query.all() + group_list = [] + for group_tuple in group: + group_item = {} + group_item['group_name'] = group_tuple.group_name + group_item['file_number'] = group_tuple.file_number + group_list.append(group_item) + + print(group_list) + return jsonify(results = group_list) + + @staticmethod + def AddVector(group_id, vector): + print(group_id, vector) + file = FileTable.query.filter(and_(FileTable.group_name == group_id, FileTable.type == 'raw').first() + if (file): + if(file.row_number >= ROW_LIMIT): + # create index + index_filename = file.filename + "_index" + CreateIndex(group_id, index_filename) + + # create another raw file + raw_filename = file.group_id + '_' + file.seq_no + InsertVectorIntoRawFile(raw_filename, vector) + # insert a record into database + db.session.add(FileTable(group_id, raw_filename, 'raw', 1)) + db.session.commit() + else: + # we still can insert into exist raw file + InsertVectorIntoRawFile(file.filename, vector) + # update database + # FileTable.query.filter_by(FileTable.group_name == group_id).filter_by(FileTable.type == 'raw').update('row_number':file.row_number + 1) + else: + # first raw file + raw_filename = group_id + '_0' + # create and insert vector into raw file + InsertVectorIntoRawFile(raw_filename, vector) + # insert a record into database + db.session.add(FileTable(group_id, raw_filename, 'raw', 1)) + db.session.commit() + + return jsonify({'code': 0}) + + @staticmethod + def SearchVector(group_id, vector, limit): + # find all files + # according to difference files get topk of each + # reduce the topk from them + # construct response and send back + return jsonify({'code': 0}) + + @staticmethod + def CreateIndex(group_id): + print(group_id) + return jsonify({'code': 0}) + + @staticmethod + def CreateIndex(group_id, filename): + print(group_id, filename) + return jsonify({'code': 0}) + + @staticmethod + def InsertVectorIntoRawFile(filename, vector): + print(sys._getframe().f_code.co_name) + return filename \ No newline at end of file diff --git a/pyengine/engine/model/FileTable.py b/pyengine/engine/model/FileTable.py index 3b860d067e..c7ba52abf0 100644 --- a/pyengine/engine/model/FileTable.py +++ b/pyengine/engine/model/FileTable.py @@ -5,14 +5,17 @@ class FileTable(db.Model): id = db.Column(db.Integer, primary_key=True) group_name = db.Column(db.String(100)) filename = db.Column(db.String(100)) - type = (db.Integer) + type = db.Column(db.String(100)) row_number = db.Column(db.Integer) + seq_no = db.Column(db.Integer) - def __init__(self, group_name, filename, type): + def __init__(self, group_name, filename, type, row_number): self.group_name = group_name self.filename = filename self.type = type - self.row_number = 0 + self.row_number = row_number + self.type = type + self.seq_no = 0 def __repr__(self): return '' % self.tablename \ No newline at end of file diff --git a/pyengine/engine/model/GroupTable.py b/pyengine/engine/model/GroupTable.py index c66ee31566..9cf4f51793 100644 --- a/pyengine/engine/model/GroupTable.py +++ b/pyengine/engine/model/GroupTable.py @@ -11,4 +11,4 @@ class GroupTable(db.Model): self.file_number = 0 def __repr__(self): - return '' % self.group_name \ No newline at end of file + return '' % self.group_name \ No newline at end of file diff --git a/pyengine/engine/settings.py b/pyengine/engine/settings.py index 932352a6e8..2a657e8b70 100644 --- a/pyengine/engine/settings.py +++ b/pyengine/engine/settings.py @@ -3,4 +3,7 @@ DEBUG = True SQLALCHEMY_TRACK_MODIFICATIONS = False # SECRET_KEY='A0Zr98j/3yX R~XHH!jmN]LWX/,?RT' -SQLALCHEMY_DATABASE_URI = "mysql+pymysql://vecwise@127.0.0.1:3306/vecdata" \ No newline at end of file +SQLALCHEMY_DATABASE_URI = "mysql+pymysql://vecwise@127.0.0.1:3306/vecdata" + +ROW_LIMIT = 10000000 +DATABASE_DIRECTORY = '/home/jinhai/Document/development/vecwise_engine/db' \ No newline at end of file