mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Introduced task priority (#6118)
### What problem does this PR solve? Introduced task priority ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -845,6 +845,7 @@ class Task(DataBaseModel):
|
||||
from_page = IntegerField(default=0)
|
||||
to_page = IntegerField(default=100000000)
|
||||
task_type = CharField(max_length=32, null=False, default="")
|
||||
priority = IntegerField(default=0)
|
||||
|
||||
begin_at = DateTimeField(null=True, index=True)
|
||||
process_duation = FloatField(default=0)
|
||||
@ -1122,3 +1123,10 @@ def migrate_db():
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(
|
||||
migrator.add_column("task", "priority",
|
||||
IntegerField(default=0))
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@ -34,7 +34,7 @@ from api.db.services.common_service import CommonService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.utils import current_timestamp, get_format_time, get_uuid
|
||||
from rag.nlp import rag_tokenizer, search
|
||||
from rag.settings import SVR_QUEUE_NAME
|
||||
from rag.settings import get_svr_queue_name
|
||||
from rag.utils.redis_conn import REDIS_CONN
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
|
||||
@ -392,6 +392,7 @@ class DocumentService(CommonService):
|
||||
has_graphrag = False
|
||||
e, doc = DocumentService.get_by_id(d["id"])
|
||||
status = doc.run # TaskStatus.RUNNING.value
|
||||
priority = 0
|
||||
for t in tsks:
|
||||
if 0 <= t.progress < 1:
|
||||
finished = False
|
||||
@ -403,16 +404,17 @@ class DocumentService(CommonService):
|
||||
has_raptor = True
|
||||
elif t.task_type == "graphrag":
|
||||
has_graphrag = True
|
||||
priority = max(priority, t.priority)
|
||||
prg /= len(tsks)
|
||||
if finished and bad:
|
||||
prg = -1
|
||||
status = TaskStatus.FAIL.value
|
||||
elif finished:
|
||||
if d["parser_config"].get("raptor", {}).get("use_raptor") and not has_raptor:
|
||||
queue_raptor_o_graphrag_tasks(d, "raptor")
|
||||
queue_raptor_o_graphrag_tasks(d, "raptor", priority)
|
||||
prg = 0.98 * len(tsks) / (len(tsks) + 1)
|
||||
elif d["parser_config"].get("graphrag", {}).get("use_graphrag") and not has_graphrag:
|
||||
queue_raptor_o_graphrag_tasks(d, "graphrag")
|
||||
queue_raptor_o_graphrag_tasks(d, "graphrag", priority)
|
||||
prg = 0.98 * len(tsks) / (len(tsks) + 1)
|
||||
else:
|
||||
status = TaskStatus.DONE.value
|
||||
@ -449,7 +451,7 @@ class DocumentService(CommonService):
|
||||
return False
|
||||
|
||||
|
||||
def queue_raptor_o_graphrag_tasks(doc, ty):
|
||||
def queue_raptor_o_graphrag_tasks(doc, ty, priority):
|
||||
chunking_config = DocumentService.get_chunking_config(doc["id"])
|
||||
hasher = xxhash.xxh64()
|
||||
for field in sorted(chunking_config.keys()):
|
||||
@ -472,7 +474,7 @@ def queue_raptor_o_graphrag_tasks(doc, ty):
|
||||
hasher.update(ty.encode("utf-8"))
|
||||
task["digest"] = hasher.hexdigest()
|
||||
bulk_insert_into_db(Task, [task], True)
|
||||
assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=task), "Can't access Redis. Please check the Redis' status."
|
||||
assert REDIS_CONN.queue_product(get_svr_queue_name(priority), message=task), "Can't access Redis. Please check the Redis' status."
|
||||
|
||||
|
||||
def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
||||
|
||||
@ -28,7 +28,7 @@ from api.db.services.common_service import CommonService
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.utils import current_timestamp, get_uuid
|
||||
from deepdoc.parser.excel_parser import RAGFlowExcelParser
|
||||
from rag.settings import SVR_QUEUE_NAME
|
||||
from rag.settings import get_svr_queue_name
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
from rag.utils.redis_conn import REDIS_CONN
|
||||
from api import settings
|
||||
@ -289,7 +289,7 @@ class TaskService(CommonService):
|
||||
).execute()
|
||||
|
||||
|
||||
def queue_tasks(doc: dict, bucket: str, name: str):
|
||||
def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
|
||||
"""Create and queue document processing tasks.
|
||||
|
||||
This function creates processing tasks for a document based on its type and configuration.
|
||||
@ -301,6 +301,7 @@ def queue_tasks(doc: dict, bucket: str, name: str):
|
||||
doc (dict): Document dictionary containing metadata and configuration.
|
||||
bucket (str): Storage bucket name where the document is stored.
|
||||
name (str): File name of the document.
|
||||
priority (int, optional): Priority level for task queueing (default is 0).
|
||||
|
||||
Note:
|
||||
- For PDF documents, tasks are created per page range based on configuration
|
||||
@ -358,6 +359,7 @@ def queue_tasks(doc: dict, bucket: str, name: str):
|
||||
task_digest = hasher.hexdigest()
|
||||
task["digest"] = task_digest
|
||||
task["progress"] = 0.0
|
||||
task["priority"] = priority
|
||||
|
||||
prev_tasks = TaskService.get_tasks(doc["id"])
|
||||
ck_num = 0
|
||||
@ -380,7 +382,7 @@ def queue_tasks(doc: dict, bucket: str, name: str):
|
||||
unfinished_task_array = [task for task in parse_task_array if task["progress"] < 1.0]
|
||||
for unfinished_task in unfinished_task_array:
|
||||
assert REDIS_CONN.queue_product(
|
||||
SVR_QUEUE_NAME, message=unfinished_task
|
||||
get_svr_queue_name(priority), message=unfinished_task
|
||||
), "Can't access Redis. Please check the Redis' status."
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user