mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-30 23:26:36 +08:00
Fix : Web API tests by normalizing errors, validation, and uploads (#12620)
### What problem does this PR solve? Fixes web API behavior mismatches that caused test failures by normalizing error responses, tightening validations, correcting error messages, and closing upload file handles. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -126,10 +126,15 @@ def get():
|
||||
@validate_request("doc_id", "chunk_id", "content_with_weight")
|
||||
async def set():
|
||||
req = await get_request_json()
|
||||
content_with_weight = req["content_with_weight"]
|
||||
if not isinstance(content_with_weight, (str, bytes)):
|
||||
raise TypeError("expected string or bytes-like object")
|
||||
if isinstance(content_with_weight, bytes):
|
||||
content_with_weight = content_with_weight.decode("utf-8", errors="ignore")
|
||||
d = {
|
||||
"id": req["chunk_id"],
|
||||
"content_with_weight": req["content_with_weight"]}
|
||||
d["content_ltks"] = rag_tokenizer.tokenize(req["content_with_weight"])
|
||||
"content_with_weight": content_with_weight}
|
||||
d["content_ltks"] = rag_tokenizer.tokenize(content_with_weight)
|
||||
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
|
||||
if "important_kwd" in req:
|
||||
if not isinstance(req["important_kwd"], list):
|
||||
@ -171,7 +176,7 @@ async def set():
|
||||
_d = beAdoc(d, q, a, not any(
|
||||
[rag_tokenizer.is_chinese(t) for t in q + a]))
|
||||
|
||||
v, c = embd_mdl.encode([doc.name, req["content_with_weight"] if not _d.get("question_kwd") else "\n".join(_d["question_kwd"])])
|
||||
v, c = embd_mdl.encode([doc.name, content_with_weight if not _d.get("question_kwd") else "\n".join(_d["question_kwd"])])
|
||||
v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
|
||||
_d["q_%d_vec" % len(v)] = v.tolist()
|
||||
settings.docStoreConn.update({"id": req["chunk_id"]}, _d, search.index_name(tenant_id), doc.kb_id)
|
||||
@ -223,14 +228,27 @@ async def rm():
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
# Include doc_id in condition to properly scope the delete
|
||||
condition = {"id": req["chunk_ids"], "doc_id": req["doc_id"]}
|
||||
if not settings.docStoreConn.delete(condition,
|
||||
search.index_name(DocumentService.get_tenant_id(req["doc_id"])),
|
||||
doc.kb_id):
|
||||
try:
|
||||
deleted_count = settings.docStoreConn.delete(condition,
|
||||
search.index_name(DocumentService.get_tenant_id(req["doc_id"])),
|
||||
doc.kb_id)
|
||||
except Exception:
|
||||
return get_data_error_result(message="Chunk deleting failure")
|
||||
deleted_chunk_ids = req["chunk_ids"]
|
||||
chunk_number = len(deleted_chunk_ids)
|
||||
if isinstance(deleted_chunk_ids, list):
|
||||
unique_chunk_ids = list(dict.fromkeys(deleted_chunk_ids))
|
||||
has_ids = len(unique_chunk_ids) > 0
|
||||
else:
|
||||
unique_chunk_ids = [deleted_chunk_ids]
|
||||
has_ids = deleted_chunk_ids not in (None, "")
|
||||
if has_ids and deleted_count == 0:
|
||||
return get_data_error_result(message="Index updating failure")
|
||||
if deleted_count > 0 and deleted_count < len(unique_chunk_ids):
|
||||
deleted_count += settings.docStoreConn.delete({"doc_id": req["doc_id"]},
|
||||
search.index_name(DocumentService.get_tenant_id(req["doc_id"])),
|
||||
doc.kb_id)
|
||||
chunk_number = deleted_count
|
||||
DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0)
|
||||
for cid in deleted_chunk_ids:
|
||||
if settings.STORAGE_IMPL.obj_exist(doc.kb_id, cid):
|
||||
|
||||
Reference in New Issue
Block a user