mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat: add splitter (#10161)
### What problem does this PR solve? ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Lynn <lynn_inf@hotmail.com> Co-authored-by: chanx <1243304602@qq.com> Co-authored-by: balibabu <cike8899@users.noreply.github.com> Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com> Co-authored-by: huangzl <huangzl@shinemo.com> Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> Co-authored-by: Wilmer <33392318@qq.com> Co-authored-by: Adrian Weidig <adrianweidig@gmx.net> Co-authored-by: Zhichang Yu <yuzhichang@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Yongteng Lei <yongtengrey@outlook.com> Co-authored-by: Liu An <asiro@qq.com> Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com> Co-authored-by: BadwomanCraZY <511528396@qq.com> Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com> Co-authored-by: Russell Valentine <russ@coldstonelabs.org> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Billy Bao <newyorkupperbay@gmail.com> Co-authored-by: Zhedong Cen <cenzhedong2@126.com> Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com> Co-authored-by: TensorNull <tensor.null@gmail.com>
This commit is contained in:
@ -23,6 +23,7 @@ import time
|
||||
|
||||
from api.utils import get_uuid
|
||||
from api.utils.api_utils import timeout
|
||||
from api.utils.base64_image import image2id
|
||||
from api.utils.log_utils import init_root_logger, get_project_base_directory
|
||||
from graphrag.general.index import run_graphrag
|
||||
from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache
|
||||
@ -37,7 +38,6 @@ import xxhash
|
||||
import copy
|
||||
import re
|
||||
from functools import partial
|
||||
from io import BytesIO
|
||||
from multiprocessing.context import TimeoutError
|
||||
from timeit import default_timer as timer
|
||||
import tracemalloc
|
||||
@ -301,30 +301,8 @@ async def build_chunks(task, progress_callback):
|
||||
d["img_id"] = ""
|
||||
docs.append(d)
|
||||
return
|
||||
|
||||
with BytesIO() as output_buffer:
|
||||
if isinstance(d["image"], bytes):
|
||||
output_buffer.write(d["image"])
|
||||
output_buffer.seek(0)
|
||||
else:
|
||||
# If the image is in RGBA mode, convert it to RGB mode before saving it in JPEG format.
|
||||
if d["image"].mode in ("RGBA", "P"):
|
||||
converted_image = d["image"].convert("RGB")
|
||||
#d["image"].close() # Close original image
|
||||
d["image"] = converted_image
|
||||
try:
|
||||
d["image"].save(output_buffer, format='JPEG')
|
||||
except OSError as e:
|
||||
logging.warning(
|
||||
"Saving image of chunk {}/{}/{} got exception, ignore: {}".format(task["location"], task["name"], d["id"], str(e)))
|
||||
|
||||
async with minio_limiter:
|
||||
await trio.to_thread.run_sync(lambda: STORAGE_IMPL.put(task["kb_id"], d["id"], output_buffer.getvalue()))
|
||||
d["img_id"] = "{}-{}".format(task["kb_id"], d["id"])
|
||||
if not isinstance(d["image"], bytes):
|
||||
d["image"].close()
|
||||
del d["image"] # Remove image reference
|
||||
docs.append(d)
|
||||
await image2id(d, partial(STORAGE_IMPL.put), task["kb_id"], d["id"])
|
||||
docs.append(d)
|
||||
except Exception:
|
||||
logging.exception(
|
||||
"Saving image of chunk {}/{}/{} got exception".format(task["location"], task["name"], d["id"]))
|
||||
|
||||
Reference in New Issue
Block a user