enlarge docker memory usage (#501)

### What problem does this PR solve?

### Type of change

- [x] Refactoring
This commit is contained in:
KevinHuSh
2024-04-23 14:41:10 +08:00
committed by GitHub
parent 78402d9a57
commit 0dfc8ddc0f
12 changed files with 50 additions and 36 deletions

View File

@ -33,6 +33,8 @@ from api.settings import database_logger
from api.utils import get_format_time, get_uuid
from api.utils.file_utils import get_project_base_directory
from rag.utils.redis_conn import REDIS_CONN
from api.db.db_models import init_database_tables as init_web_db
from api.db.init_data import init_web_data
def collect(tm):
@ -181,6 +183,9 @@ if __name__ == "__main__":
peewee_logger.propagate = False
peewee_logger.addHandler(database_logger.handlers[0])
peewee_logger.setLevel(database_logger.level)
# init db
init_web_db()
init_web_data()
while True:
dispatch()

View File

@ -163,6 +163,7 @@ def build(row):
"doc_id": row["doc_id"],
"kb_id": [str(row["kb_id"])]
}
el = 0
for ck in cks:
d = copy.deepcopy(doc)
d.update(ck)
@ -182,10 +183,13 @@ def build(row):
else:
d["image"].save(output_buffer, format='JPEG')
st = timer()
MINIO.put(row["kb_id"], d["_id"], output_buffer.getvalue())
el += timer() - st
d["img_id"] = "{}-{}".format(row["kb_id"], d["_id"])
del d["image"]
docs.append(d)
cron_logger.info("MINIO PUT({}):{}".format(row["name"], el))
return docs
@ -258,7 +262,9 @@ def main(comm, mod):
callback(prog=-1, msg=str(e))
continue
st = timer()
cks = build(r)
cron_logger.info("Build chunks({}): {}".format(r["name"], timer()-st))
if cks is None:
continue
if not cks:
@ -277,12 +283,14 @@ def main(comm, mod):
callback(-1, "Embedding error:{}".format(str(e)))
cron_logger.error(str(e))
tk_count = 0
cron_logger.info("Embedding elapsed({}): {}".format(r["name"], timer()-st))
callback(msg="Finished embedding({})! Start to build index!".format(timer()-st))
init_kb(r)
chunk_count = len(set([c["_id"] for c in cks]))
st = timer()
es_r = ELASTICSEARCH.bulk(cks, search.index_name(r["tenant_id"]))
cron_logger.info("Indexing elapsed({}): {}".format(r["name"], timer()-st))
if es_r:
callback(-1, "Index failure!")
ELASTICSEARCH.deleteByQuery(