Fix: add fallback for bad citation output (#7014)

### What problem does this PR solve?

Add fallback for bad citation output. #6948

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Yongteng Lei
2025-04-15 09:33:53 +08:00
committed by GitHub
parent b1fa5a0754
commit 7a34159737
2 changed files with 43 additions and 35 deletions

View File

@ -271,8 +271,10 @@ def chat(dialog, messages, stream=True, **kwargs):
if len(ans) == 2:
think = ans[0] + "</think>"
answer = ans[1]
if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)):
answer = re.sub(r"##[ij]\$\$", "", answer, flags=re.DOTALL)
idx = set([])
if not re.search(r"##[0-9]+\$\$", answer):
answer, idx = retriever.insert_citations(
answer,
@ -283,12 +285,21 @@ def chat(dialog, messages, stream=True, **kwargs):
vtweight=dialog.vector_similarity_weight,
)
else:
idx = set([])
for r in re.finditer(r"##([0-9]+)\$\$", answer):
i = int(r.group(1))
for match in re.finditer(r"##([0-9]+)\$\$", answer):
i = int(match.group(1))
if i < len(kbinfos["chunks"]):
idx.add(i)
# handle (ID: 1), ID: 2 etc.
for match in re.finditer(r"\(\s*ID:\s*(\d+)\s*\)|ID[: ]+\s*(\d+)", answer):
full_match = match.group(0)
id = match.group(1) or match.group(2)
if id:
i = int(id)
if i < len(kbinfos["chunks"]):
idx.add(i)
answer = answer.replace(full_match, f"##{i}$$")
idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
recall_docs = [d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
if not recall_docs: