diff --git a/api/apps/document_app.py b/api/apps/document_app.py index 0b3bfd6ba..b80d59b09 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -557,8 +557,8 @@ def get(doc_id): @login_required @validate_request("doc_id") def change_parser(): - req = request.json + req = request.json if not DocumentService.accessible(req["doc_id"], current_user.id): return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR) @@ -582,7 +582,7 @@ def change_parser(): settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id) try: - if "pipeline_id" in req: + if "pipeline_id" in req and req["pipeline_id"] != "": if doc.pipeline_id == req["pipeline_id"]: return get_json_result(data=True) DocumentService.update_by_id(doc.id, {"pipeline_id": req["pipeline_id"]}) diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index ea3a87b14..c73b610ad 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -1274,12 +1274,16 @@ class VisionParser(RAGFlowPdfParser): prompt=vision_llm_describe_prompt(page=pdf_page_num + 1), callback=callback, ) + if kwargs.get("callback"): kwargs["callback"](idx * 1.0 / len(self.page_images), f"Processed: {idx + 1}/{len(self.page_images)}") if text: width, height = self.page_images[idx].size - all_docs.append((text, f"{pdf_page_num + 1} 0 {width / zoomin} 0 {height / zoomin}")) + all_docs.append(( + text, + f"@@{pdf_page_num + 1}\t{0.0:.1f}\t{width / zoomin:.1f}\t{0.0:.1f}\t{height / zoomin:.1f}##" + )) return all_docs, []