diff --git a/rag/app/presentation.py b/rag/app/presentation.py index 03e89b075..d924f49b1 100644 --- a/rag/app/presentation.py +++ b/rag/app/presentation.py @@ -43,8 +43,9 @@ class Ppt(PptParser): with BytesIO() as buffered: slide.get_thumbnail( 0.5, 0.5).save( - buffered, drawing.imaging.ImageFormat.jpeg, quality=80) - imgs.append(Image.open(buffered)) + buffered, drawing.imaging.ImageFormat.jpeg) + buffered.seek(0) + imgs.append(Image.open(buffered).copy()) except RuntimeError as e: raise RuntimeError(f'ppt parse error at page {i+1}, original error: {str(e)}') from e assert len(imgs) == len( diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index b2a308f1b..952737d83 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -302,7 +302,7 @@ async def build_chunks(task, progress_callback): d["image"].close() # Close original image d["image"] = converted_image d["image"].save(output_buffer, format='JPEG') - + async with minio_limiter: await trio.to_thread.run_sync(lambda: STORAGE_IMPL.put(task["kb_id"], d["id"], output_buffer.getvalue())) d["img_id"] = "{}-{}".format(task["kb_id"], d["id"]) @@ -631,7 +631,7 @@ async def do_handle_task(task): nursery.start_soon(delete_image, task_dataset_id, chunk_id) progress_callback(-1, msg=f"Chunk updates failed since task {task['id']} is unknown.") return - + logging.info("Indexing doc({}), page({}-{}), chunks({}), elapsed: {:.2f}".format(task_document_name, task_from_page, task_to_page, len(chunks), timer() - start_ts)) @@ -723,8 +723,8 @@ async def report_status(): finally: redis_lock.release() await trio.sleep(30) - - + + async def task_manager(): try: await handle_task()