mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 12:32:30 +08:00
Feat: Add TCADP parser for PPTX and spreadsheet document types. (#11041)
### What problem does this PR solve? - Added TCADP Parser configuration fields to PDF, PPT, and spreadsheet parsing forms - Implemented support for setting table result type (Markdown/HTML) and Markdown image response type (URL/Text) - Updated TCADP Parser to handle return format settings from configuration or parameters - Enhanced frontend to dynamically show TCADP options based on selected parsing method - Modified backend to pass format parameters when calling TCADP API - Optimized form default value logic for TCADP configuration items - Updated multilingual resource files for new configuration options ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -192,12 +192,16 @@ class TencentCloudAPIClient:
|
||||
|
||||
|
||||
class TCADPParser(RAGFlowPdfParser):
|
||||
def __init__(self, secret_id: str = None, secret_key: str = None, region: str = "ap-guangzhou"):
|
||||
def __init__(self, secret_id: str = None, secret_key: str = None, region: str = "ap-guangzhou",
|
||||
table_result_type: str = None, markdown_image_response_type: str = None):
|
||||
super().__init__()
|
||||
|
||||
# First initialize logger
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
|
||||
# Log received parameters
|
||||
self.logger.info(f"[TCADP] Initializing with parameters - table_result_type: {table_result_type}, markdown_image_response_type: {markdown_image_response_type}")
|
||||
|
||||
# Priority: read configuration from RAGFlow configuration system (service_conf.yaml)
|
||||
try:
|
||||
tcadp_parser = get_base_config("tcadp_config", {})
|
||||
@ -205,14 +209,30 @@ class TCADPParser(RAGFlowPdfParser):
|
||||
self.secret_id = secret_id or tcadp_parser.get("secret_id")
|
||||
self.secret_key = secret_key or tcadp_parser.get("secret_key")
|
||||
self.region = region or tcadp_parser.get("region", "ap-guangzhou")
|
||||
self.table_result_type = tcadp_parser.get("table_result_type", "1")
|
||||
self.markdown_image_response_type = tcadp_parser.get("markdown_image_response_type", "1")
|
||||
self.logger.info("[TCADP] Configuration read from service_conf.yaml")
|
||||
# Set table_result_type and markdown_image_response_type from config or parameters
|
||||
self.table_result_type = table_result_type if table_result_type is not None else tcadp_parser.get("table_result_type", "1")
|
||||
self.markdown_image_response_type = markdown_image_response_type if markdown_image_response_type is not None else tcadp_parser.get("markdown_image_response_type", "1")
|
||||
|
||||
else:
|
||||
self.logger.error("[TCADP] Please configure tcadp_config in service_conf.yaml first")
|
||||
# If config file is empty, use provided parameters or defaults
|
||||
self.secret_id = secret_id
|
||||
self.secret_key = secret_key
|
||||
self.region = region or "ap-guangzhou"
|
||||
self.table_result_type = table_result_type if table_result_type is not None else "1"
|
||||
self.markdown_image_response_type = markdown_image_response_type if markdown_image_response_type is not None else "1"
|
||||
|
||||
except ImportError:
|
||||
self.logger.info("[TCADP] Configuration module import failed")
|
||||
# If config file is not available, use provided parameters or defaults
|
||||
self.secret_id = secret_id
|
||||
self.secret_key = secret_key
|
||||
self.region = region or "ap-guangzhou"
|
||||
self.table_result_type = table_result_type if table_result_type is not None else "1"
|
||||
self.markdown_image_response_type = markdown_image_response_type if markdown_image_response_type is not None else "1"
|
||||
|
||||
# Log final values
|
||||
self.logger.info(f"[TCADP] Final values - table_result_type: {self.table_result_type}, markdown_image_response_type: {self.markdown_image_response_type}")
|
||||
|
||||
if not self.secret_id or not self.secret_key:
|
||||
raise ValueError("[TCADP] Please set Tencent Cloud API keys, configure tcadp_config in service_conf.yaml")
|
||||
@ -400,6 +420,8 @@ class TCADPParser(RAGFlowPdfParser):
|
||||
"TableResultType": self.table_result_type,
|
||||
"MarkdownImageResponseType": self.markdown_image_response_type
|
||||
}
|
||||
|
||||
self.logger.info(f"[TCADP] API request config - TableResultType: {self.table_result_type}, MarkdownImageResponseType: {self.markdown_image_response_type}")
|
||||
|
||||
result = client.reconstruct_document_sse(
|
||||
file_type=file_type,
|
||||
|
||||
Reference in New Issue
Block a user