From 101abe5dfd23c68bc5a7ffd95dfd012953cca81d Mon Sep 17 00:00:00 2001 From: jinhai Date: Wed, 20 Mar 2019 19:49:17 +0800 Subject: [PATCH 1/2] Before add real logic code --- README.md | 20 +++++++ pyengine/engine/controller/IndexManage.py | 67 +++++++++++++++++----- pyengine/engine/controller/VectorEngine.py | 0 pyengine/engine/model/GroupTable.py | 2 +- 4 files changed, 73 insertions(+), 16 deletions(-) create mode 100644 pyengine/engine/controller/VectorEngine.py diff --git a/README.md b/README.md index b46ef88f61..5893b42cba 100644 --- a/README.md +++ b/README.md @@ -25,3 +25,23 @@ - `python` # enter python3 interaction environment - `from engine import db` - `db.create_all()` + +- table desc +group_table ++-------------+--------------+------+-----+---------+----------------+ +| Field | Type | Null | Key | Default | Extra | ++-------------+--------------+------+-----+---------+----------------+ +| id | int(11) | NO | PRI | NULL | auto_increment | +| group_name | varchar(100) | YES | | NULL | | +| file_number | int(11) | YES | | NULL | | ++-------------+--------------+------+-----+---------+----------------+ + +file_table ++------------+--------------+------+-----+---------+----------------+ +| Field | Type | Null | Key | Default | Extra | ++------------+--------------+------+-----+---------+----------------+ +| id | int(11) | NO | PRI | NULL | auto_increment | +| group_name | varchar(100) | YES | | NULL | | +| filename | varchar(100) | YES | | NULL | | +| row_number | int(11) | YES | | NULL | | ++------------+--------------+------+-----+---------+----------------+ diff --git a/pyengine/engine/controller/IndexManage.py b/pyengine/engine/controller/IndexManage.py index 70bb5fca91..1930a5a7b6 100644 --- a/pyengine/engine/controller/IndexManage.py +++ b/pyengine/engine/controller/IndexManage.py @@ -1,31 +1,36 @@ -from flask import Flask +from flask import Flask, jsonify, request from flask_restful import Resource, Api from engine import app, db +from engine.model.GroupTable import GroupTable # app = Flask(__name__) api = Api(app) from flask_restful import reqparse +from flask_restful import request class Vector(Resource): def __init__(self): self.__parser = reqparse.RequestParser() - self.__parser.add_argument('groupid', type=str) - self.__parser.add_argument('vec', type=str) + self.__parser.add_argument('vector', type=float, action='append', location=['json']) - def post(self): - # args = self.__parser.parse_args() - # vec = args['vec'] - # groupid = args['groupid'] + def post(self, groupid): + args = self.__parser.parse_args() + vector = args['vector'] + # add vector into file + print("vector: ", vector) return "vector post" class VectorSearch(Resource): def __init__(self): self.__parser = reqparse.RequestParser() - self.__parser.add_argument('groupid', type=str) + self.__parser.add_argument('vector', type=float, action='append', location=['json']) - def post(self): + def post(self, groupid): + args = self.__parser.parse_args() + print('vector: ', args['vector']) + # go to search every thing return "vectorSearch post" @@ -35,6 +40,7 @@ class Index(Resource): self.__parser.add_argument('groupid', type=str) def post(self): + # go to create index for specific group return "index post" @@ -44,25 +50,56 @@ class Group(Resource): self.__parser.add_argument('groupid', type=str) def post(self, groupid): - return "group post" + args = self.__parser.parse_args() + group = GroupTable.query.filter(GroupTable.group_name==groupid).first() + if(group): + return jsonify({'code': 1, 'group_name': groupid, 'file_number': group.file_number}) + else: + new_group = GroupTable(groupid) + db.session.add(new_group) + db.session.commit() + return jsonify({'code': 0, 'group_name': groupid, 'file_number': 0}) + def get(self, groupid): - return "group get" + args = self.__parser.parse_args() + group = GroupTable.query.filter(GroupTable.group_name==groupid).first() + if(group): + return jsonify({'code': 0, 'group_name': groupid, 'file_number': group.file_number}) + else: + return jsonify({'code': 1, 'group_name': groupid, 'file_number': 0}) # not found def delete(self, groupid): - return "group delete" + args = self.__parser.parse_args() + group = GroupTable.query.filter(GroupTable.group_name==groupid).first() + if(group): + # old_group = GroupTable(groupid) + db.session.delete(group) + db.session.commit() + return jsonify({'code': 0, 'group_name': groupid, 'file_number': group.file_number}) + else: + return jsonify({'code': 0, 'group_name': groupid, 'file_number': 0}) class GroupList(Resource): def get(self): - return "grouplist get" + group = GroupTable.query.all() + group_list = [] + for group_tuple in group: + group_item = {} + group_item['group_name'] = group_tuple.group_name + group_item['file_number'] = group_tuple.file_number + group_list.append(group_item) + + print(group_list) + return jsonify(results = group_list) -api.add_resource(Vector, '/vector') +api.add_resource(Vector, '/vector/add/') api.add_resource(Group, '/vector/group/') api.add_resource(GroupList, '/vector/group') api.add_resource(Index, '/vector/index') -api.add_resource(VectorSearch, '/vector/search') +api.add_resource(VectorSearch, '/vector/search/') # if __name__ == '__main__': diff --git a/pyengine/engine/controller/VectorEngine.py b/pyengine/engine/controller/VectorEngine.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pyengine/engine/model/GroupTable.py b/pyengine/engine/model/GroupTable.py index c66ee31566..9cf4f51793 100644 --- a/pyengine/engine/model/GroupTable.py +++ b/pyengine/engine/model/GroupTable.py @@ -11,4 +11,4 @@ class GroupTable(db.Model): self.file_number = 0 def __repr__(self): - return '' % self.group_name \ No newline at end of file + return '' % self.group_name \ No newline at end of file From 25d18944f80a91470949fc3d25c4390a18e65263 Mon Sep 17 00:00:00 2001 From: jinhai Date: Wed, 20 Mar 2019 20:55:52 +0800 Subject: [PATCH 2/2] Refactor the code into VectorEngine --- pyengine/engine/controller/IndexManage.py | 71 ++++---------- pyengine/engine/controller/VectorEngine.py | 106 +++++++++++++++++++++ pyengine/engine/model/FileTable.py | 9 +- pyengine/engine/settings.py | 5 +- 4 files changed, 135 insertions(+), 56 deletions(-) diff --git a/pyengine/engine/controller/IndexManage.py b/pyengine/engine/controller/IndexManage.py index 1930a5a7b6..df8fcafa2b 100644 --- a/pyengine/engine/controller/IndexManage.py +++ b/pyengine/engine/controller/IndexManage.py @@ -2,6 +2,7 @@ from flask import Flask, jsonify, request from flask_restful import Resource, Api from engine import app, db from engine.model.GroupTable import GroupTable +from engine.controller.VectorEngine import VectorEngine # app = Flask(__name__) api = Api(app) @@ -14,12 +15,10 @@ class Vector(Resource): self.__parser = reqparse.RequestParser() self.__parser.add_argument('vector', type=float, action='append', location=['json']) - def post(self, groupid): + def post(self, group_id): args = self.__parser.parse_args() vector = args['vector'] - # add vector into file - print("vector: ", vector) - return "vector post" + return VectorEngine.AddVector(group_id, vector) class VectorSearch(Resource): @@ -27,7 +26,7 @@ class VectorSearch(Resource): self.__parser = reqparse.RequestParser() self.__parser.add_argument('vector', type=float, action='append', location=['json']) - def post(self, groupid): + def post(self, group_id): args = self.__parser.parse_args() print('vector: ', args['vector']) # go to search every thing @@ -37,69 +36,37 @@ class VectorSearch(Resource): class Index(Resource): def __init__(self): self.__parser = reqparse.RequestParser() - self.__parser.add_argument('groupid', type=str) + # self.__parser.add_argument('group_id', type=str) - def post(self): - # go to create index for specific group - return "index post" + def post(self, group_id): + return VectorEngine.CreateIndex(group_id) class Group(Resource): def __init__(self): self.__parser = reqparse.RequestParser() - self.__parser.add_argument('groupid', type=str) + self.__parser.add_argument('group_id', type=str) - def post(self, groupid): - args = self.__parser.parse_args() - group = GroupTable.query.filter(GroupTable.group_name==groupid).first() - if(group): - return jsonify({'code': 1, 'group_name': groupid, 'file_number': group.file_number}) - else: - new_group = GroupTable(groupid) - db.session.add(new_group) - db.session.commit() - return jsonify({'code': 0, 'group_name': groupid, 'file_number': 0}) + def post(self, group_id): + return VectorEngine.AddGroup(group_id) + def get(self, group_id): + return VectorEngine.GetGroup(group_id) - def get(self, groupid): - args = self.__parser.parse_args() - group = GroupTable.query.filter(GroupTable.group_name==groupid).first() - if(group): - return jsonify({'code': 0, 'group_name': groupid, 'file_number': group.file_number}) - else: - return jsonify({'code': 1, 'group_name': groupid, 'file_number': 0}) # not found - - def delete(self, groupid): - args = self.__parser.parse_args() - group = GroupTable.query.filter(GroupTable.group_name==groupid).first() - if(group): - # old_group = GroupTable(groupid) - db.session.delete(group) - db.session.commit() - return jsonify({'code': 0, 'group_name': groupid, 'file_number': group.file_number}) - else: - return jsonify({'code': 0, 'group_name': groupid, 'file_number': 0}) + def delete(self, group_id): + return VectorEngine.DeleteGroup(group_id) class GroupList(Resource): def get(self): - group = GroupTable.query.all() - group_list = [] - for group_tuple in group: - group_item = {} - group_item['group_name'] = group_tuple.group_name - group_item['file_number'] = group_tuple.file_number - group_list.append(group_item) - - print(group_list) - return jsonify(results = group_list) + return VectorEngine.GetGroupList() -api.add_resource(Vector, '/vector/add/') -api.add_resource(Group, '/vector/group/') +api.add_resource(Vector, '/vector/add/') +api.add_resource(Group, '/vector/group/') api.add_resource(GroupList, '/vector/group') -api.add_resource(Index, '/vector/index') -api.add_resource(VectorSearch, '/vector/search/') +api.add_resource(Index, '/vector/index/') +api.add_resource(VectorSearch, '/vector/search/') # if __name__ == '__main__': diff --git a/pyengine/engine/controller/VectorEngine.py b/pyengine/engine/controller/VectorEngine.py index e69de29bb2..6ddf40aba6 100644 --- a/pyengine/engine/controller/VectorEngine.py +++ b/pyengine/engine/controller/VectorEngine.py @@ -0,0 +1,106 @@ +from engine.model.GroupTable import GroupTable +from engine.model.FileTable import FileTable +from flask import jsonify +from engine import db +import sys + +class VectorEngine(object): + + @staticmethod + def AddGroup(group_id): + group = GroupTable.query.filter(GroupTable.group_name==group_id).first() + if(group): + return jsonify({'code': 1, 'group_name': group_id, 'file_number': group.file_number}) + else: + new_group = GroupTable(group_id) + db.session.add(new_group) + db.session.commit() + return jsonify({'code': 0, 'group_name': group_id, 'file_number': 0}) + + @staticmethod + def GetGroup(group_id): + group = GroupTable.query.filter(GroupTable.group_name==group_id).first() + if(group): + return jsonify({'code': 0, 'group_name': group_id, 'file_number': group.file_number}) + else: + return jsonify({'code': 1, 'group_name': group_id, 'file_number': 0}) # not found + + + @staticmethod + def DeleteGroup(group_id): + group = GroupTable.query.filter(GroupTable.group_name==group_id).first() + if(group): + # old_group = GroupTable(group_id) + db.session.delete(group) + db.session.commit() + return jsonify({'code': 0, 'group_name': group_id, 'file_number': group.file_number}) + else: + return jsonify({'code': 0, 'group_name': group_id, 'file_number': 0}) + + @staticmethod + def GetGroupList(): + group = GroupTable.query.all() + group_list = [] + for group_tuple in group: + group_item = {} + group_item['group_name'] = group_tuple.group_name + group_item['file_number'] = group_tuple.file_number + group_list.append(group_item) + + print(group_list) + return jsonify(results = group_list) + + @staticmethod + def AddVector(group_id, vector): + print(group_id, vector) + file = FileTable.query.filter(and_(FileTable.group_name == group_id, FileTable.type == 'raw').first() + if (file): + if(file.row_number >= ROW_LIMIT): + # create index + index_filename = file.filename + "_index" + CreateIndex(group_id, index_filename) + + # create another raw file + raw_filename = file.group_id + '_' + file.seq_no + InsertVectorIntoRawFile(raw_filename, vector) + # insert a record into database + db.session.add(FileTable(group_id, raw_filename, 'raw', 1)) + db.session.commit() + else: + # we still can insert into exist raw file + InsertVectorIntoRawFile(file.filename, vector) + # update database + # FileTable.query.filter_by(FileTable.group_name == group_id).filter_by(FileTable.type == 'raw').update('row_number':file.row_number + 1) + else: + # first raw file + raw_filename = group_id + '_0' + # create and insert vector into raw file + InsertVectorIntoRawFile(raw_filename, vector) + # insert a record into database + db.session.add(FileTable(group_id, raw_filename, 'raw', 1)) + db.session.commit() + + return jsonify({'code': 0}) + + @staticmethod + def SearchVector(group_id, vector, limit): + # find all files + # according to difference files get topk of each + # reduce the topk from them + # construct response and send back + return jsonify({'code': 0}) + + @staticmethod + def CreateIndex(group_id): + print(group_id) + return jsonify({'code': 0}) + + @staticmethod + def CreateIndex(group_id, filename): + print(group_id, filename) + return jsonify({'code': 0}) + + @staticmethod + def InsertVectorIntoRawFile(filename, vector): + print(sys._getframe().f_code.co_name) + return filename \ No newline at end of file diff --git a/pyengine/engine/model/FileTable.py b/pyengine/engine/model/FileTable.py index 3b860d067e..c7ba52abf0 100644 --- a/pyengine/engine/model/FileTable.py +++ b/pyengine/engine/model/FileTable.py @@ -5,14 +5,17 @@ class FileTable(db.Model): id = db.Column(db.Integer, primary_key=True) group_name = db.Column(db.String(100)) filename = db.Column(db.String(100)) - type = (db.Integer) + type = db.Column(db.String(100)) row_number = db.Column(db.Integer) + seq_no = db.Column(db.Integer) - def __init__(self, group_name, filename, type): + def __init__(self, group_name, filename, type, row_number): self.group_name = group_name self.filename = filename self.type = type - self.row_number = 0 + self.row_number = row_number + self.type = type + self.seq_no = 0 def __repr__(self): return '' % self.tablename \ No newline at end of file diff --git a/pyengine/engine/settings.py b/pyengine/engine/settings.py index 932352a6e8..2a657e8b70 100644 --- a/pyengine/engine/settings.py +++ b/pyengine/engine/settings.py @@ -3,4 +3,7 @@ DEBUG = True SQLALCHEMY_TRACK_MODIFICATIONS = False # SECRET_KEY='A0Zr98j/3yX R~XHH!jmN]LWX/,?RT' -SQLALCHEMY_DATABASE_URI = "mysql+pymysql://vecwise@127.0.0.1:3306/vecdata" \ No newline at end of file +SQLALCHEMY_DATABASE_URI = "mysql+pymysql://vecwise@127.0.0.1:3306/vecdata" + +ROW_LIMIT = 10000000 +DATABASE_DIRECTORY = '/home/jinhai/Document/development/vecwise_engine/db' \ No newline at end of file