remove unused codes, seperate layout detection out as a new api. Add new rag methed 'table' (#55)

This commit is contained in:
KevinHuSh
2024-02-05 18:08:17 +08:00
committed by GitHub
parent f305776217
commit 407b2523b6
33 changed files with 306 additions and 505 deletions

View File

@ -1,4 +1,5 @@
import copy
import random
from .pdf_parser import HuParser as PdfParser
from .docx_parser import HuDocxParser as DocxParser
@ -38,6 +39,9 @@ BULLET_PATTERN = [[
]
]
def random_choices(arr, k):
k = min(len(arr), k)
return random.choices(arr, k=k)
def bullets_category(sections):
global BULLET_PATTERN