mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Add docx support for manual parser (#1227)
### What problem does this PR solve? Add docx support for manual parser ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -497,3 +497,9 @@ def naive_merge(sections, chunk_token_num=128, delimiter="\n。;!?"):
|
||||
add_chunk(sec[s: e], pos)
|
||||
|
||||
return cks
|
||||
|
||||
def docx_question_level(p):
|
||||
if p.style.name.startswith('Heading'):
|
||||
return int(p.style.name.split(' ')[-1]), re.sub(r"\u3000", " ", p.text).strip()
|
||||
else:
|
||||
return 0, re.sub(r"\u3000", " ", p.text).strip()
|
||||
Reference in New Issue
Block a user