Feat: debug extractor... (#10294)

### What problem does this PR solve?

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2025-09-26 10:51:05 +08:00
committed by GitHub
parent ff49454501
commit c7efaab30e
10 changed files with 27 additions and 19 deletions

View File

@ -119,7 +119,7 @@ class Tokenizer(ProcessBase):
if ck.get("questions"):
ck["question_tks"] = rag_tokenizer.tokenize("\n".join(ck["questions"]))
if ck.get("keywords"):
ck["important_tks"] = rag_tokenizer.tokenize("\n".join(ck["keywords"]))
ck["important_tks"] = rag_tokenizer.tokenize(",".join(ck["keywords"]))
if ck.get("summary"):
ck["content_ltks"] = rag_tokenizer.tokenize(ck["summary"])
ck["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(ck["content_ltks"])