Apply agentic searching. (#5196)

### What problem does this PR solve?

#5173

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2025-02-20 17:41:01 +08:00
committed by GitHub
parent 744ff55c62
commit 7b3d700d5f
3 changed files with 242 additions and 27 deletions

View File

@ -17,6 +17,7 @@
import logging
import random
from collections import Counter
from typing import Optional
from rag.utils import num_tokens_from_string
from . import rag_tokenizer
@ -601,3 +602,11 @@ def naive_merge_docx(sections, chunk_token_num=128, delimiter="\n。"):
add_chunk(sec, image, '')
return cks, images
def extract_between(text: str, start_tag: str, end_tag: str) -> Optional[str]:
pattern = re.escape(start_tag) + r"(.*?)" + re.escape(end_tag)
matches = re.findall(pattern, text, flags=re.DOTALL)
if matches:
return matches[-1].strip()
return None

View File

@ -15,7 +15,6 @@
#
import logging
import re
import json
from dataclasses import dataclass
from rag.settings import TAG_FLD, PAGERANK_FLD
@ -259,7 +258,7 @@ class Dealer:
q_denor = np.sqrt(np.sum([s*s for t,s in query_rfea.items() if t != PAGERANK_FLD]))
for i in search_res.ids:
nor, denor = 0, 0
for t, sc in json.loads(search_res.field[i].get(TAG_FLD, "{}")).items():
for t, sc in eval(search_res.field[i].get(TAG_FLD, "{}")).items():
if t in query_rfea:
nor += query_rfea[t] * sc
denor += sc * sc