Feat: user defined prompt. (#9972)

### What problem does this PR solve?


### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2025-09-08 14:05:01 +08:00
committed by GitHub
parent cf18231713
commit e9ee9269f5
11 changed files with 203 additions and 66 deletions

View File

@ -687,8 +687,20 @@ def naive_merge_docx(sections, chunk_token_num=128, delimiter="\n。"):
tk_nums[-1] += tnum
dels = get_delimiters(delimiter)
line = ""
for sec, image in sections:
split_sec = re.split(r"(%s)" % dels, sec)
if not image:
line += sec + "\n"
continue
split_sec = re.split(r"(%s)" % dels, line + sec)
for sub_sec in split_sec:
if re.match(f"^{dels}$", sub_sec):
continue
add_chunk(sub_sec, image,"")
line = ""
if line:
split_sec = re.split(r"(%s)" % dels, line)
for sub_sec in split_sec:
if re.match(f"^{dels}$", sub_sec):
continue