mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-23 11:36:38 +08:00
Fix errors detected by Ruff (#3918)
### What problem does this PR solve? Fix errors detected by Ruff ### Type of change - [x] Refactoring
This commit is contained in:
@ -142,7 +142,6 @@ def set_conversation():
|
||||
if not objs:
|
||||
return get_json_result(
|
||||
data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
req = request.json
|
||||
try:
|
||||
if objs[0].source == "agent":
|
||||
e, cvs = UserCanvasService.get_by_id(objs[0].dialog_id)
|
||||
@ -188,7 +187,8 @@ def completion():
|
||||
e, conv = API4ConversationService.get_by_id(req["conversation_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Conversation not found!")
|
||||
if "quote" not in req: req["quote"] = False
|
||||
if "quote" not in req:
|
||||
req["quote"] = False
|
||||
|
||||
msg = []
|
||||
for m in req["messages"]:
|
||||
@ -197,7 +197,8 @@ def completion():
|
||||
if m["role"] == "assistant" and not msg:
|
||||
continue
|
||||
msg.append(m)
|
||||
if not msg[-1].get("id"): msg[-1]["id"] = get_uuid()
|
||||
if not msg[-1].get("id"):
|
||||
msg[-1]["id"] = get_uuid()
|
||||
message_id = msg[-1]["id"]
|
||||
|
||||
def fillin_conv(ans):
|
||||
@ -674,11 +675,13 @@ def completion_faq():
|
||||
e, conv = API4ConversationService.get_by_id(req["conversation_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Conversation not found!")
|
||||
if "quote" not in req: req["quote"] = True
|
||||
if "quote" not in req:
|
||||
req["quote"] = True
|
||||
|
||||
msg = []
|
||||
msg.append({"role": "user", "content": req["word"]})
|
||||
if not msg[-1].get("id"): msg[-1]["id"] = get_uuid()
|
||||
if not msg[-1].get("id"):
|
||||
msg[-1]["id"] = get_uuid()
|
||||
message_id = msg[-1]["id"]
|
||||
|
||||
def fillin_conv(ans):
|
||||
|
||||
@ -13,10 +13,8 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import logging
|
||||
import json
|
||||
import traceback
|
||||
from functools import partial
|
||||
from flask import request, Response
|
||||
from flask_login import login_required, current_user
|
||||
from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
|
||||
@ -60,7 +58,8 @@ def rm():
|
||||
def save():
|
||||
req = request.json
|
||||
req["user_id"] = current_user.id
|
||||
if not isinstance(req["dsl"], str): req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
|
||||
if not isinstance(req["dsl"], str):
|
||||
req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
|
||||
|
||||
req["dsl"] = json.loads(req["dsl"])
|
||||
if "id" not in req:
|
||||
@ -153,7 +152,8 @@ def run():
|
||||
return resp
|
||||
|
||||
for answer in canvas.run(stream=False):
|
||||
if answer.get("running_status"): continue
|
||||
if answer.get("running_status"):
|
||||
continue
|
||||
final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
|
||||
canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
|
||||
if final_ans.get("reference"):
|
||||
|
||||
@ -237,7 +237,8 @@ def create():
|
||||
e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Knowledgebase not found!")
|
||||
if kb.pagerank: d["pagerank_fea"] = kb.pagerank
|
||||
if kb.pagerank:
|
||||
d["pagerank_fea"] = kb.pagerank
|
||||
|
||||
embd_id = DocumentService.get_embd_id(req["doc_id"])
|
||||
embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING.value, embd_id)
|
||||
|
||||
@ -281,10 +281,12 @@ def thumbup():
|
||||
if req["message_id"] == msg.get("id", "") and msg.get("role", "") == "assistant":
|
||||
if up_down:
|
||||
msg["thumbup"] = True
|
||||
if "feedback" in msg: del msg["feedback"]
|
||||
if "feedback" in msg:
|
||||
del msg["feedback"]
|
||||
else:
|
||||
msg["thumbup"] = False
|
||||
if feedback: msg["feedback"] = feedback
|
||||
if feedback:
|
||||
msg["feedback"] = feedback
|
||||
break
|
||||
|
||||
ConversationService.update_by_id(conv["id"], conv)
|
||||
|
||||
@ -37,10 +37,12 @@ def set_dialog():
|
||||
top_n = req.get("top_n", 6)
|
||||
top_k = req.get("top_k", 1024)
|
||||
rerank_id = req.get("rerank_id", "")
|
||||
if not rerank_id: req["rerank_id"] = ""
|
||||
if not rerank_id:
|
||||
req["rerank_id"] = ""
|
||||
similarity_threshold = req.get("similarity_threshold", 0.1)
|
||||
vector_similarity_weight = req.get("vector_similarity_weight", 0.3)
|
||||
if vector_similarity_weight is None: vector_similarity_weight = 0.3
|
||||
if vector_similarity_weight is None:
|
||||
vector_similarity_weight = 0.3
|
||||
llm_setting = req.get("llm_setting", {})
|
||||
default_prompt = {
|
||||
"system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。
|
||||
|
||||
@ -13,7 +13,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License
|
||||
#
|
||||
import json
|
||||
import os.path
|
||||
import pathlib
|
||||
import re
|
||||
@ -90,7 +89,8 @@ def web_crawl():
|
||||
raise LookupError("Can't find this knowledgebase!")
|
||||
|
||||
blob = html2pdf(url)
|
||||
if not blob: return server_error_response(ValueError("Download failure."))
|
||||
if not blob:
|
||||
return server_error_response(ValueError("Download failure."))
|
||||
|
||||
root_folder = FileService.get_root_folder(current_user.id)
|
||||
pf_id = root_folder["id"]
|
||||
@ -290,7 +290,8 @@ def change_status():
|
||||
def rm():
|
||||
req = request.json
|
||||
doc_ids = req["doc_id"]
|
||||
if isinstance(doc_ids, str): doc_ids = [doc_ids]
|
||||
if isinstance(doc_ids, str):
|
||||
doc_ids = [doc_ids]
|
||||
|
||||
for doc_id in doc_ids:
|
||||
if not DocumentService.accessible4deletion(doc_id, current_user.id):
|
||||
|
||||
@ -351,8 +351,10 @@ def list_app():
|
||||
|
||||
llm_set = set([m["llm_name"] + "@" + m["fid"] for m in llms])
|
||||
for o in objs:
|
||||
if not o.api_key: continue
|
||||
if o.llm_name + "@" + o.llm_factory in llm_set: continue
|
||||
if not o.api_key:
|
||||
continue
|
||||
if o.llm_name + "@" + o.llm_factory in llm_set:
|
||||
continue
|
||||
llms.append({"llm_name": o.llm_name, "model_type": o.model_type, "fid": o.llm_factory, "available": True})
|
||||
|
||||
res = {}
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
|
||||
from api.db.services.canvas_service import UserCanvasService
|
||||
from api.utils.api_utils import get_error_data_result, token_required
|
||||
from api.utils.api_utils import get_result
|
||||
from flask import request
|
||||
|
||||
@ -41,7 +41,6 @@ from api.utils.api_utils import construct_json_result, get_parser_config
|
||||
from rag.nlp import search
|
||||
from rag.utils import rmSpace
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
import os
|
||||
|
||||
MAXIMUM_OF_UPLOADING_FILES = 256
|
||||
|
||||
@ -976,12 +975,12 @@ def add_chunk(tenant_id, dataset_id, document_id):
|
||||
if not req.get("content"):
|
||||
return get_error_data_result(message="`content` is required")
|
||||
if "important_keywords" in req:
|
||||
if type(req["important_keywords"]) != list:
|
||||
if not isinstance(req["important_keywords"], list):
|
||||
return get_error_data_result(
|
||||
"`important_keywords` is required to be a list"
|
||||
)
|
||||
if "questions" in req:
|
||||
if type(req["questions"]) != list:
|
||||
if not isinstance(req["questions"], list):
|
||||
return get_error_data_result(
|
||||
"`questions` is required to be a list"
|
||||
)
|
||||
|
||||
@ -143,8 +143,10 @@ def completion(tenant_id, chat_id):
|
||||
}
|
||||
conv.message.append(question)
|
||||
for m in conv.message:
|
||||
if m["role"] == "system": continue
|
||||
if m["role"] == "assistant" and not msg: continue
|
||||
if m["role"] == "system":
|
||||
continue
|
||||
if m["role"] == "assistant" and not msg:
|
||||
continue
|
||||
msg.append(m)
|
||||
message_id = msg[-1].get("id")
|
||||
e, dia = DialogService.get_by_id(conv.dialog_id)
|
||||
@ -267,7 +269,8 @@ def agent_completion(tenant_id, agent_id):
|
||||
if m["role"] == "assistant" and not msg:
|
||||
continue
|
||||
msg.append(m)
|
||||
if not msg[-1].get("id"): msg[-1]["id"] = get_uuid()
|
||||
if not msg[-1].get("id"):
|
||||
msg[-1]["id"] = get_uuid()
|
||||
message_id = msg[-1]["id"]
|
||||
|
||||
stream = req.get("stream", True)
|
||||
@ -361,7 +364,8 @@ def agent_completion(tenant_id, agent_id):
|
||||
return resp
|
||||
|
||||
for answer in canvas.run(stream=False):
|
||||
if answer.get("running_status"): continue
|
||||
if answer.get("running_status"):
|
||||
continue
|
||||
final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
|
||||
canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
|
||||
if final_ans.get("reference"):
|
||||
|
||||
@ -330,7 +330,7 @@ def user_info_from_github(access_token):
|
||||
headers=headers,
|
||||
).json()
|
||||
user_info["email"] = next(
|
||||
(email for email in email_info if email["primary"] == True), None
|
||||
(email for email in email_info if email["primary"]), None
|
||||
)["email"]
|
||||
return user_info
|
||||
|
||||
|
||||
@ -130,7 +130,7 @@ def is_continuous_field(cls: typing.Type) -> bool:
|
||||
for p in cls.__bases__:
|
||||
if p in CONTINUOUS_FIELD_TYPE:
|
||||
return True
|
||||
elif p != Field and p != object:
|
||||
elif p is not Field and p is not object:
|
||||
if is_continuous_field(p):
|
||||
return True
|
||||
else:
|
||||
|
||||
@ -170,7 +170,7 @@ def add_graph_templates():
|
||||
cnvs = json.load(open(os.path.join(dir, fnm), "r"))
|
||||
try:
|
||||
CanvasTemplateService.save(**cnvs)
|
||||
except:
|
||||
except Exception:
|
||||
CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
|
||||
except Exception:
|
||||
logging.exception("Add graph templates error: ")
|
||||
|
||||
@ -15,13 +15,14 @@
|
||||
#
|
||||
import pathlib
|
||||
import re
|
||||
from .user_service import UserService
|
||||
from .user_service import UserService as UserService
|
||||
|
||||
|
||||
def duplicate_name(query_func, **kwargs):
|
||||
fnm = kwargs["name"]
|
||||
objs = query_func(**kwargs)
|
||||
if not objs: return fnm
|
||||
if not objs:
|
||||
return fnm
|
||||
ext = pathlib.Path(fnm).suffix #.jpg
|
||||
nm = re.sub(r"%s$"%ext, "", fnm)
|
||||
r = re.search(r"\(([0-9]+)\)$", nm)
|
||||
@ -31,8 +32,8 @@ def duplicate_name(query_func, **kwargs):
|
||||
nm = re.sub(r"\([0-9]+\)$", "", nm)
|
||||
c += 1
|
||||
nm = f"{nm}({c})"
|
||||
if ext: nm += f"{ext}"
|
||||
if ext:
|
||||
nm += f"{ext}"
|
||||
|
||||
kwargs["name"] = nm
|
||||
return duplicate_name(query_func, **kwargs)
|
||||
|
||||
|
||||
@ -64,7 +64,8 @@ class API4ConversationService(CommonService):
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def stats(cls, tenant_id, from_date, to_date, source=None):
|
||||
if len(to_date) == 10: to_date += " 23:59:59"
|
||||
if len(to_date) == 10:
|
||||
to_date += " 23:59:59"
|
||||
return cls.model.select(
|
||||
cls.model.create_date.truncate("day").alias("dt"),
|
||||
peewee.fn.COUNT(
|
||||
|
||||
@ -13,9 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from datetime import datetime
|
||||
import peewee
|
||||
from api.db.db_models import DB, API4Conversation, APIToken, Dialog, CanvasTemplate, UserCanvas
|
||||
from api.db.db_models import DB, CanvasTemplate, UserCanvas
|
||||
from api.db.services.common_service import CommonService
|
||||
|
||||
|
||||
|
||||
@ -115,7 +115,7 @@ class CommonService:
|
||||
try:
|
||||
obj = cls.model.query(id=pid)[0]
|
||||
return True, obj
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
return False, None
|
||||
|
||||
@classmethod
|
||||
|
||||
@ -106,15 +106,15 @@ def message_fit_in(msg, max_length=4000):
|
||||
return c, msg
|
||||
|
||||
ll = num_tokens_from_string(msg_[0]["content"])
|
||||
l = num_tokens_from_string(msg_[-1]["content"])
|
||||
if ll / (ll + l) > 0.8:
|
||||
ll2 = num_tokens_from_string(msg_[-1]["content"])
|
||||
if ll / (ll + ll2) > 0.8:
|
||||
m = msg_[0]["content"]
|
||||
m = encoder.decode(encoder.encode(m)[:max_length - l])
|
||||
m = encoder.decode(encoder.encode(m)[:max_length - ll2])
|
||||
msg[0]["content"] = m
|
||||
return max_length, msg
|
||||
|
||||
m = msg_[1]["content"]
|
||||
m = encoder.decode(encoder.encode(m)[:max_length - l])
|
||||
m = encoder.decode(encoder.encode(m)[:max_length - ll2])
|
||||
msg[1]["content"] = m
|
||||
return max_length, msg
|
||||
|
||||
@ -257,7 +257,8 @@ def chat(dialog, messages, stream=True, **kwargs):
|
||||
idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
|
||||
recall_docs = [
|
||||
d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
|
||||
if not recall_docs: recall_docs = kbinfos["doc_aggs"]
|
||||
if not recall_docs:
|
||||
recall_docs = kbinfos["doc_aggs"]
|
||||
kbinfos["doc_aggs"] = recall_docs
|
||||
|
||||
refs = deepcopy(kbinfos)
|
||||
@ -433,13 +434,15 @@ def relevant(tenant_id, llm_id, question, contents: list):
|
||||
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
|
||||
No other words needed except 'yes' or 'no'.
|
||||
"""
|
||||
if not contents:return False
|
||||
if not contents:
|
||||
return False
|
||||
contents = "Documents: \n" + " - ".join(contents)
|
||||
contents = f"Question: {question}\n" + contents
|
||||
if num_tokens_from_string(contents) >= chat_mdl.max_length - 4:
|
||||
contents = encoder.decode(encoder.encode(contents)[:chat_mdl.max_length - 4])
|
||||
ans = chat_mdl.chat(prompt, [{"role": "user", "content": contents}], {"temperature": 0.01})
|
||||
if ans.lower().find("yes") >= 0: return True
|
||||
if ans.lower().find("yes") >= 0:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@ -481,8 +484,10 @@ Requirements:
|
||||
]
|
||||
_, msg = message_fit_in(msg, chat_mdl.max_length)
|
||||
kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
|
||||
if isinstance(kwd, tuple): kwd = kwd[0]
|
||||
if kwd.find("**ERROR**") >=0: return ""
|
||||
if isinstance(kwd, tuple):
|
||||
kwd = kwd[0]
|
||||
if kwd.find("**ERROR**") >=0:
|
||||
return ""
|
||||
return kwd
|
||||
|
||||
|
||||
@ -508,8 +513,10 @@ Requirements:
|
||||
]
|
||||
_, msg = message_fit_in(msg, chat_mdl.max_length)
|
||||
kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
|
||||
if isinstance(kwd, tuple): kwd = kwd[0]
|
||||
if kwd.find("**ERROR**") >= 0: return ""
|
||||
if isinstance(kwd, tuple):
|
||||
kwd = kwd[0]
|
||||
if kwd.find("**ERROR**") >= 0:
|
||||
return ""
|
||||
return kwd
|
||||
|
||||
|
||||
@ -520,7 +527,8 @@ def full_question(tenant_id, llm_id, messages):
|
||||
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id)
|
||||
conv = []
|
||||
for m in messages:
|
||||
if m["role"] not in ["user", "assistant"]: continue
|
||||
if m["role"] not in ["user", "assistant"]:
|
||||
continue
|
||||
conv.append("{}: {}".format(m["role"].upper(), m["content"]))
|
||||
conv = "\n".join(conv)
|
||||
today = datetime.date.today().isoformat()
|
||||
@ -581,7 +589,8 @@ Output: What's the weather in Rochester on {tomorrow}?
|
||||
|
||||
|
||||
def tts(tts_mdl, text):
|
||||
if not tts_mdl or not text: return
|
||||
if not tts_mdl or not text:
|
||||
return
|
||||
bin = b""
|
||||
for chunk in tts_mdl.tts(text):
|
||||
bin += chunk
|
||||
@ -641,7 +650,8 @@ def ask(question, kb_ids, tenant_id):
|
||||
idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
|
||||
recall_docs = [
|
||||
d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
|
||||
if not recall_docs: recall_docs = kbinfos["doc_aggs"]
|
||||
if not recall_docs:
|
||||
recall_docs = kbinfos["doc_aggs"]
|
||||
kbinfos["doc_aggs"] = recall_docs
|
||||
refs = deepcopy(kbinfos)
|
||||
for c in refs["chunks"]:
|
||||
|
||||
@ -532,7 +532,8 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
||||
try:
|
||||
mind_map = json.dumps(mindmap([c["content_with_weight"] for c in docs if c["doc_id"] == doc_id]).output,
|
||||
ensure_ascii=False, indent=2)
|
||||
if len(mind_map) < 32: raise Exception("Few content: " + mind_map)
|
||||
if len(mind_map) < 32:
|
||||
raise Exception("Few content: " + mind_map)
|
||||
cks.append({
|
||||
"id": get_uuid(),
|
||||
"doc_id": doc_id,
|
||||
|
||||
@ -20,7 +20,7 @@ from api.db.db_models import DB
|
||||
from api.db.db_models import File, File2Document
|
||||
from api.db.services.common_service import CommonService
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.utils import current_timestamp, datetime_format, get_uuid
|
||||
from api.utils import current_timestamp, datetime_format
|
||||
|
||||
|
||||
class File2DocumentService(CommonService):
|
||||
@ -63,7 +63,7 @@ class File2DocumentService(CommonService):
|
||||
def update_by_file_id(cls, file_id, obj):
|
||||
obj["update_time"] = current_timestamp()
|
||||
obj["update_date"] = datetime_format(datetime.now())
|
||||
num = cls.model.update(obj).where(cls.model.id == file_id).execute()
|
||||
# num = cls.model.update(obj).where(cls.model.id == file_id).execute()
|
||||
e, obj = cls.get_by_id(cls.model.id)
|
||||
return obj
|
||||
|
||||
|
||||
@ -85,7 +85,8 @@ class FileService(CommonService):
|
||||
.join(Document, on=(File2Document.document_id == Document.id))
|
||||
.join(Knowledgebase, on=(Knowledgebase.id == Document.kb_id))
|
||||
.where(cls.model.id == file_id))
|
||||
if not kbs: return []
|
||||
if not kbs:
|
||||
return []
|
||||
kbs_info_list = []
|
||||
for kb in list(kbs.dicts()):
|
||||
kbs_info_list.append({"kb_id": kb['id'], "kb_name": kb['name']})
|
||||
@ -304,7 +305,8 @@ class FileService(CommonService):
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def add_file_from_kb(cls, doc, kb_folder_id, tenant_id):
|
||||
for _ in File2DocumentService.get_by_document_id(doc["id"]): return
|
||||
for _ in File2DocumentService.get_by_document_id(doc["id"]):
|
||||
return
|
||||
file = {
|
||||
"id": get_uuid(),
|
||||
"parent_id": kb_folder_id,
|
||||
|
||||
@ -107,7 +107,8 @@ class TenantLLMService(CommonService):
|
||||
|
||||
model_config = cls.get_api_key(tenant_id, mdlnm)
|
||||
mdlnm, fid = TenantLLMService.split_model_name_and_factory(mdlnm)
|
||||
if model_config: model_config = model_config.to_dict()
|
||||
if model_config:
|
||||
model_config = model_config.to_dict()
|
||||
if not model_config:
|
||||
if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]:
|
||||
llm = LLMService.query(llm_name=mdlnm) if not fid else LLMService.query(llm_name=mdlnm, fid=fid)
|
||||
|
||||
@ -57,28 +57,33 @@ class TaskService(CommonService):
|
||||
Tenant.img2txt_id,
|
||||
Tenant.asr_id,
|
||||
Tenant.llm_id,
|
||||
cls.model.update_time]
|
||||
docs = cls.model.select(*fields) \
|
||||
.join(Document, on=(cls.model.doc_id == Document.id)) \
|
||||
.join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \
|
||||
.join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id)) \
|
||||
cls.model.update_time,
|
||||
]
|
||||
docs = (
|
||||
cls.model.select(*fields)
|
||||
.join(Document, on=(cls.model.doc_id == Document.id))
|
||||
.join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id))
|
||||
.join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))
|
||||
.where(cls.model.id == task_id)
|
||||
)
|
||||
docs = list(docs.dicts())
|
||||
if not docs: return None
|
||||
if not docs:
|
||||
return None
|
||||
|
||||
msg = "\nTask has been received."
|
||||
prog = random.random() / 10.
|
||||
prog = random.random() / 10.0
|
||||
if docs[0]["retry_count"] >= 3:
|
||||
msg = "\nERROR: Task is abandoned after 3 times attempts."
|
||||
prog = -1
|
||||
|
||||
cls.model.update(progress_msg=cls.model.progress_msg + msg,
|
||||
progress=prog,
|
||||
retry_count=docs[0]["retry_count"]+1
|
||||
).where(
|
||||
cls.model.id == docs[0]["id"]).execute()
|
||||
cls.model.update(
|
||||
progress_msg=cls.model.progress_msg + msg,
|
||||
progress=prog,
|
||||
retry_count=docs[0]["retry_count"] + 1,
|
||||
).where(cls.model.id == docs[0]["id"]).execute()
|
||||
|
||||
if docs[0]["retry_count"] >= 3: return None
|
||||
if docs[0]["retry_count"] >= 3:
|
||||
return None
|
||||
|
||||
return docs[0]
|
||||
|
||||
@ -86,21 +91,44 @@ class TaskService(CommonService):
|
||||
@DB.connection_context()
|
||||
def get_ongoing_doc_name(cls):
|
||||
with DB.lock("get_task", -1):
|
||||
docs = cls.model.select(*[Document.id, Document.kb_id, Document.location, File.parent_id]) \
|
||||
.join(Document, on=(cls.model.doc_id == Document.id)) \
|
||||
.join(File2Document, on=(File2Document.document_id == Document.id), join_type=JOIN.LEFT_OUTER) \
|
||||
.join(File, on=(File2Document.file_id == File.id), join_type=JOIN.LEFT_OUTER) \
|
||||
docs = (
|
||||
cls.model.select(
|
||||
*[Document.id, Document.kb_id, Document.location, File.parent_id]
|
||||
)
|
||||
.join(Document, on=(cls.model.doc_id == Document.id))
|
||||
.join(
|
||||
File2Document,
|
||||
on=(File2Document.document_id == Document.id),
|
||||
join_type=JOIN.LEFT_OUTER,
|
||||
)
|
||||
.join(
|
||||
File,
|
||||
on=(File2Document.file_id == File.id),
|
||||
join_type=JOIN.LEFT_OUTER,
|
||||
)
|
||||
.where(
|
||||
Document.status == StatusEnum.VALID.value,
|
||||
Document.run == TaskStatus.RUNNING.value,
|
||||
~(Document.type == FileType.VIRTUAL.value),
|
||||
cls.model.progress < 1,
|
||||
cls.model.create_time >= current_timestamp() - 1000 * 600
|
||||
cls.model.create_time >= current_timestamp() - 1000 * 600,
|
||||
)
|
||||
)
|
||||
docs = list(docs.dicts())
|
||||
if not docs: return []
|
||||
if not docs:
|
||||
return []
|
||||
|
||||
return list(set([(d["parent_id"] if d["parent_id"] else d["kb_id"], d["location"]) for d in docs]))
|
||||
return list(
|
||||
set(
|
||||
[
|
||||
(
|
||||
d["parent_id"] if d["parent_id"] else d["kb_id"],
|
||||
d["location"],
|
||||
)
|
||||
for d in docs
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
@ -118,28 +146,30 @@ class TaskService(CommonService):
|
||||
def update_progress(cls, id, info):
|
||||
if os.environ.get("MACOS"):
|
||||
if info["progress_msg"]:
|
||||
cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
|
||||
cls.model.id == id).execute()
|
||||
cls.model.update(
|
||||
progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]
|
||||
).where(cls.model.id == id).execute()
|
||||
if "progress" in info:
|
||||
cls.model.update(progress=info["progress"]).where(
|
||||
cls.model.id == id).execute()
|
||||
cls.model.id == id
|
||||
).execute()
|
||||
return
|
||||
|
||||
with DB.lock("update_progress", -1):
|
||||
if info["progress_msg"]:
|
||||
cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
|
||||
cls.model.id == id).execute()
|
||||
cls.model.update(
|
||||
progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]
|
||||
).where(cls.model.id == id).execute()
|
||||
if "progress" in info:
|
||||
cls.model.update(progress=info["progress"]).where(
|
||||
cls.model.id == id).execute()
|
||||
cls.model.id == id
|
||||
).execute()
|
||||
|
||||
|
||||
def queue_tasks(doc: dict, bucket: str, name: str):
|
||||
def new_task():
|
||||
return {
|
||||
"id": get_uuid(),
|
||||
"doc_id": doc["id"]
|
||||
}
|
||||
return {"id": get_uuid(), "doc_id": doc["id"]}
|
||||
|
||||
tsks = []
|
||||
|
||||
if doc["type"] == FileType.PDF.value:
|
||||
@ -150,8 +180,8 @@ def queue_tasks(doc: dict, bucket: str, name: str):
|
||||
if doc["parser_id"] == "paper":
|
||||
page_size = doc["parser_config"].get("task_page_size", 22)
|
||||
if doc["parser_id"] in ["one", "knowledge_graph"] or not do_layout:
|
||||
page_size = 10 ** 9
|
||||
page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
|
||||
page_size = 10**9
|
||||
page_ranges = doc["parser_config"].get("pages") or [(1, 10**5)]
|
||||
for s, e in page_ranges:
|
||||
s -= 1
|
||||
s = max(0, s)
|
||||
@ -177,4 +207,6 @@ def queue_tasks(doc: dict, bucket: str, name: str):
|
||||
DocumentService.begin2parse(doc["id"])
|
||||
|
||||
for t in tsks:
|
||||
assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=t), "Can't access Redis. Please check the Redis' status."
|
||||
assert REDIS_CONN.queue_product(
|
||||
SVR_QUEUE_NAME, message=t
|
||||
), "Can't access Redis. Please check the Redis' status."
|
||||
|
||||
@ -22,7 +22,7 @@ from api.db import UserTenantRole
|
||||
from api.db.db_models import DB, UserTenant
|
||||
from api.db.db_models import User, Tenant
|
||||
from api.db.services.common_service import CommonService
|
||||
from api.utils import get_uuid, get_format_time, current_timestamp, datetime_format
|
||||
from api.utils import get_uuid, current_timestamp, datetime_format
|
||||
from api.db import StatusEnum
|
||||
|
||||
|
||||
|
||||
@ -21,10 +21,7 @@
|
||||
import logging
|
||||
import os
|
||||
from api.utils.log_utils import initRootLogger
|
||||
LOG_LEVELS = os.environ.get("LOG_LEVELS", "")
|
||||
initRootLogger("ragflow_server", LOG_LEVELS)
|
||||
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
@ -44,6 +41,9 @@ from api.versions import get_ragflow_version
|
||||
from api.utils import show_configs
|
||||
from rag.settings import print_rag_settings
|
||||
|
||||
LOG_LEVELS = os.environ.get("LOG_LEVELS", "")
|
||||
initRootLogger("ragflow_server", LOG_LEVELS)
|
||||
|
||||
|
||||
def update_progress():
|
||||
while True:
|
||||
|
||||
@ -36,7 +36,6 @@ from werkzeug.http import HTTP_STATUS_CODES
|
||||
from api.db.db_models import APIToken
|
||||
from api import settings
|
||||
|
||||
from api import settings
|
||||
from api.utils import CustomJSONEncoder, get_uuid
|
||||
from api.utils import json_dumps
|
||||
from api.constants import REQUEST_WAIT_SEC, REQUEST_MAX_WAIT_SEC
|
||||
|
||||
@ -45,5 +45,5 @@ try:
|
||||
pool = Pool(processes=1)
|
||||
thread = pool.apply_async(download_nltk_data)
|
||||
binary = thread.get(timeout=60)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
print('\x1b[6;37;41m WARNING \x1b[0m' + "Downloading NLTK data failure.", flush=True)
|
||||
|
||||
Reference in New Issue
Block a user