mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix issues in API (#3008)
### What problem does this PR solve? Fix issues in API ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
This commit is contained in:
@ -64,7 +64,12 @@ def create(tenant_id):
|
||||
if not req.get("embedding_model"):
|
||||
req['embedding_model'] = t.embd_id
|
||||
else:
|
||||
if not TenantLLMService.query(tenant_id=tenant_id,model_type="embedding", llm_name=req.get("embedding_model")):
|
||||
valid_embedding_models=["BAAI/bge-large-zh-v1.5","BAAI/bge-base-en-v1.5","BAAI/bge-large-en-v1.5","BAAI/bge-small-en-v1.5",
|
||||
"BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
|
||||
"nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
|
||||
"text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
|
||||
if not TenantLLMService.query(tenant_id=tenant_id,model_type="embedding", llm_name=req.get("embedding_model"))\
|
||||
and req.get("embedding_model") not in valid_embedding_models:
|
||||
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
||||
key_mapping = {
|
||||
"chunk_num": "chunk_count",
|
||||
@ -133,6 +138,9 @@ def update(tenant_id,dataset_id):
|
||||
return get_error_data_result(
|
||||
retmsg="Can't change `tenant_id`.")
|
||||
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||
if "parser_config" in req:
|
||||
print(kb.parser_config,flush=True)
|
||||
req["parser_config"]=kb.parser_config.update(req["parser_config"])
|
||||
if "chunk_count" in req:
|
||||
if req["chunk_count"] != kb.chunk_num:
|
||||
return get_error_data_result(
|
||||
@ -153,10 +161,15 @@ def update(tenant_id,dataset_id):
|
||||
if "embedding_model" in req:
|
||||
if kb.chunk_num != 0 and req['embedding_model'] != kb.embd_id:
|
||||
return get_error_data_result(
|
||||
retmsg="If `chunk_count` is not 0, `embedding_method` is not changeable.")
|
||||
retmsg="If `chunk_count` is not 0, `embedding_model` is not changeable.")
|
||||
if not req.get("embedding_model"):
|
||||
return get_error_data_result("`embedding_model` can't be empty")
|
||||
if not TenantLLMService.query(tenant_id=tenant_id,model_type="embedding", llm_name=req.get("embedding_model")):
|
||||
valid_embedding_models=["BAAI/bge-large-zh-v1.5","BAAI/bge-base-en-v1.5","BAAI/bge-large-en-v1.5","BAAI/bge-small-en-v1.5",
|
||||
"BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
|
||||
"nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
|
||||
"text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
|
||||
if not TenantLLMService.query(tenant_id=tenant_id,model_type="embedding", llm_name=req.get("embedding_model"))\
|
||||
and req.get("embedding_model") not in valid_embedding_models:
|
||||
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
||||
req['embd_id'] = req.pop('embedding_model')
|
||||
if "name" in req:
|
||||
|
||||
@ -163,9 +163,6 @@ def update_doc(tenant_id, dataset_id, document_id):
|
||||
doc.process_duation * -1)
|
||||
if not e:
|
||||
return get_error_data_result(retmsg="Document not found!")
|
||||
tenant_id = DocumentService.get_tenant_id(req["id"])
|
||||
if not tenant_id:
|
||||
return get_error_data_result(retmsg="Tenant not found!")
|
||||
ELASTICSEARCH.deleteByQuery(
|
||||
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
|
||||
|
||||
@ -245,14 +242,22 @@ def delete(tenant_id,dataset_id):
|
||||
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
||||
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
|
||||
req = request.json
|
||||
if not req.get("ids"):
|
||||
return get_error_data_result(retmsg="`ids` is required")
|
||||
doc_ids = req["ids"]
|
||||
if not req:
|
||||
doc_ids=None
|
||||
else:
|
||||
doc_ids=req.get("ids")
|
||||
if not doc_ids:
|
||||
doc_list = []
|
||||
docs=DocumentService.query(kb_id=dataset_id)
|
||||
for doc in docs:
|
||||
doc_list.append(doc.id)
|
||||
else:
|
||||
doc_list=doc_ids
|
||||
root_folder = FileService.get_root_folder(tenant_id)
|
||||
pf_id = root_folder["id"]
|
||||
FileService.init_knowledgebase_docs(pf_id, tenant_id)
|
||||
errors = ""
|
||||
for doc_id in doc_ids:
|
||||
for doc_id in doc_list:
|
||||
try:
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if not e:
|
||||
@ -290,8 +295,11 @@ def parse(tenant_id,dataset_id):
|
||||
if not req.get("document_ids"):
|
||||
return get_error_data_result("`document_ids` is required")
|
||||
for id in req["document_ids"]:
|
||||
if not DocumentService.query(id=id,kb_id=dataset_id):
|
||||
doc = DocumentService.query(id=id,kb_id=dataset_id)
|
||||
if not doc:
|
||||
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
||||
if doc[0].progress != 0.0:
|
||||
return get_error_data_result("Can't stop parsing document with progress at 0 or 100")
|
||||
info = {"run": "1", "progress": 0}
|
||||
info["progress_msg"] = ""
|
||||
info["chunk_num"] = 0
|
||||
@ -349,7 +357,27 @@ def list_chunks(tenant_id,dataset_id,document_id):
|
||||
"doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True
|
||||
}
|
||||
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
||||
res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
|
||||
key_mapping = {
|
||||
"chunk_num": "chunk_count",
|
||||
"kb_id": "dataset_id",
|
||||
"token_num": "token_count",
|
||||
"parser_id": "chunk_method"
|
||||
}
|
||||
run_mapping = {
|
||||
"0": "UNSTART",
|
||||
"1": "RUNNING",
|
||||
"2": "CANCEL",
|
||||
"3": "DONE",
|
||||
"4": "FAIL"
|
||||
}
|
||||
doc=doc.to_dict()
|
||||
renamed_doc = {}
|
||||
for key, value in doc.items():
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(str(value))
|
||||
new_key = key_mapping.get(key, key)
|
||||
renamed_doc[new_key] = value
|
||||
res = {"total": sres.total, "chunks": [], "doc": renamed_doc}
|
||||
origin_chunks = []
|
||||
sign = 0
|
||||
for id in sres.ids:
|
||||
@ -388,7 +416,7 @@ def list_chunks(tenant_id,dataset_id,document_id):
|
||||
"content_with_weight": "content",
|
||||
"doc_id": "document_id",
|
||||
"important_kwd": "important_keywords",
|
||||
"img_id": "image_id",
|
||||
"img_id": "image_id"
|
||||
}
|
||||
renamed_chunk = {}
|
||||
for key, value in chunk.items():
|
||||
|
||||
Reference in New Issue
Block a user