Refactor parser code (#9042)

### What problem does this PR solve?

Refactor code

### Type of change

- [x] Refactoring

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2025-07-25 12:04:07 +08:00
committed by GitHub
parent bcaac061ac
commit 03daf4618c
3 changed files with 7 additions and 7 deletions

View File

@ -33,7 +33,7 @@ class RAGFlowDocxParser:
def __compose_table_content(self, df):
def blockType(b):
patt = [
pattern = [
("^(20|19)[0-9]{2}[年/-][0-9]{1,2}[月/-][0-9]{1,2}日*$", "Dt"),
(r"^(20|19)[0-9]{2}年$", "Dt"),
(r"^(20|19)[0-9]{2}[年/-][0-9]{1,2}月*$", "Dt"),
@ -47,7 +47,7 @@ class RAGFlowDocxParser:
(r"^[0-9.,+-]+[0-9A-Za-z/$¥%<>()' -]+$", "NE"),
(r"^.{1}$", "Sg")
]
for p, n in patt:
for p, n in pattern:
if re.search(p, b):
return n
tks = [t for t in rag_tokenizer.tokenize(b).split() if len(t) > 1]