Fit a lot of encodings for text file. (#458)

### What problem does this PR solve?

#384

### Type of change

- [x] Performance Improvement
This commit is contained in:
KevinHuSh
2024-04-19 18:02:53 +08:00
committed by GitHub
parent cda7b607cb
commit ed6081845a
19 changed files with 118 additions and 55 deletions

View File

@ -121,6 +121,7 @@ def dispatch():
tsks.append(new_task())
bulk_insert_into_db(Task, tsks, True)
print("TSK:", len(tsks))
set_dispatching(r["id"])
except Exception as e:
cron_logger.exception(e)

View File

@ -19,6 +19,7 @@ import logging
import os
import hashlib
import copy
import random
import re
import sys
import time
@ -92,6 +93,7 @@ def set_progress(task_id, from_page=0, to_page=-1,
def collect(comm, mod, tm):
tasks = TaskService.get_tasks(tm, mod, comm)
#print(tasks)
if len(tasks) == 0:
time.sleep(1)
return pd.DataFrame()
@ -243,6 +245,7 @@ def main(comm, mod):
tmf = open(tm_fnm, "a+")
for _, r in rows.iterrows():
callback = partial(set_progress, r["id"], r["from_page"], r["to_page"])
#callback(random.random()/10., "Task has been received.")
try:
embd_mdl = LLMBundle(r["tenant_id"], LLMType.EMBEDDING, llm_name=r["embd_id"], lang=r["language"])
except Exception as e:
@ -300,9 +303,8 @@ if __name__ == "__main__":
peewee_logger.addHandler(database_logger.handlers[0])
peewee_logger.setLevel(database_logger.level)
from mpi4py import MPI
comm = MPI.COMM_WORLD
#from mpi4py import MPI
#comm = MPI.COMM_WORLD
while True:
main(int(sys.argv[2]), int(sys.argv[1]))
close_connection()