mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Add app to rag module: presentaion & laws (#43)
This commit is contained in:
48
rag/app/__init__.py
Normal file
48
rag/app/__init__.py
Normal file
@ -0,0 +1,48 @@
|
||||
import re
|
||||
|
||||
|
||||
def callback__(progress, msg, func):
|
||||
if not func :return
|
||||
func(progress, msg)
|
||||
|
||||
|
||||
BULLET_PATTERN = [[
|
||||
r"第[零一二三四五六七八九十百]+编",
|
||||
r"第[零一二三四五六七八九十百]+章",
|
||||
r"第[零一二三四五六七八九十百]+节",
|
||||
r"第[零一二三四五六七八九十百]+条",
|
||||
r"[\((][零一二三四五六七八九十百]+[\))]",
|
||||
], [
|
||||
r"[0-9]{,3}[\. 、]",
|
||||
r"[0-9]{,2}\.[0-9]{,2}",
|
||||
r"[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}",
|
||||
r"[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}",
|
||||
], [
|
||||
r"[零一二三四五六七八九十百]+[ 、]",
|
||||
r"[\((][零一二三四五六七八九十百]+[\))]",
|
||||
r"[\((][0-9]{,2}[\))]",
|
||||
] ,[
|
||||
r"PART (ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN)",
|
||||
r"Chapter (I+V?|VI*|XI|IX|X)",
|
||||
r"Section [0-9]+",
|
||||
r"Article [0-9]+"
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
def bullets_category(sections):
|
||||
global BULLET_PATTERN
|
||||
hits = [0] * len(BULLET_PATTERN)
|
||||
for i, pro in enumerate(BULLET_PATTERN):
|
||||
for sec in sections:
|
||||
for p in pro:
|
||||
if re.match(p, sec):
|
||||
hits[i] += 1
|
||||
break
|
||||
maxium = 0
|
||||
res = -1
|
||||
for i,h in enumerate(hits):
|
||||
if h <= maxium:continue
|
||||
res = i
|
||||
maxium = h
|
||||
return res
|
||||
Reference in New Issue
Block a user