Add app to rag module: presentaion & laws (#43)

This commit is contained in:
KevinHuSh
2024-01-25 18:57:39 +08:00
committed by GitHub
parent e32ef75e99
commit 072f9dd5bc
7 changed files with 473 additions and 103 deletions

48
rag/app/__init__.py Normal file
View File

@ -0,0 +1,48 @@
import re
def callback__(progress, msg, func):
if not func :return
func(progress, msg)
BULLET_PATTERN = [[
r"第[零一二三四五六七八九十百]+编",
r"第[零一二三四五六七八九十百]+章",
r"第[零一二三四五六七八九十百]+节",
r"第[零一二三四五六七八九十百]+条",
r"[\(][零一二三四五六七八九十百]+[\)]",
], [
r"[0-9]{,3}[\. 、]",
r"[0-9]{,2}\.[0-9]{,2}",
r"[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}",
r"[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}",
], [
r"[零一二三四五六七八九十百]+[ 、]",
r"[\(][零一二三四五六七八九十百]+[\)]",
r"[\(][0-9]{,2}[\)]",
] ,[
r"PART (ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN)",
r"Chapter (I+V?|VI*|XI|IX|X)",
r"Section [0-9]+",
r"Article [0-9]+"
]
]
def bullets_category(sections):
global BULLET_PATTERN
hits = [0] * len(BULLET_PATTERN)
for i, pro in enumerate(BULLET_PATTERN):
for sec in sections:
for p in pro:
if re.match(p, sec):
hits[i] += 1
break
maxium = 0
res = -1
for i,h in enumerate(hits):
if h <= maxium:continue
res = i
maxium = h
return res