mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-24 15:36:50 +08:00
### Type of change - [x] New Feature (non-breaking change which adds functionality) ### What problem does this PR solve? This PR introduces a new Docs Generator agent component for producing downloadable PDF, DOCX, or TXT files from Markdown content generated within a RAGFlow workflow. ### **Key Features** **Backend** - New component: DocsGenerator (agent/component/docs_generator.py) - - Markdown → PDF/DOCX/TXT conversion - - Supports tables, lists, code blocks, headings, and rich formatting - - Configurable document style (fonts, margins, colors, page size, orientation) - - Optional header logo and footer with page numbers/timestamps - **Frontend** - New configuration UI for the Docs Generator - - Download button integrated into the chat interface - - Output wired to the Message component - - Full i18n support **Documentation** Added component guide: docs/guides/agent/agent_component_reference/docs_generator.md **Usage** Add the Docs Generator to a workflow, connect Markdown output from an upstream component, configure metadata/style, and feed its output into the Message component. Users will see a document download button directly in the chat. **Contributor Note** We have been following RAGFlow since more than a year and half now and have worked extensively on personalizing the framework and integrating it into several of our internal systems. Over the past year and a half, we have built multiple platforms that rely on RAGFlow as a core component, which has given us a strong appreciation for how flexible and powerful the project is. We also previously contributed the full Italian translation, and we were glad to see it accepted. This new Docs Generator component was created for our own production needs, and we believe that it may be useful for many others in the community as well. We want to sincerely thank the entire RAGFlow team for the remarkable work you have done and continue to do. If there are opportunities to contribute further, we would be glad to help whenever we have time available. It would be a pleasure to support the project in any way we can. If appropriate, we would be glad to be listed among the project’s contributors, but in any case we look forward to continuing to support and contribute to the project. PentaFrame Development Team --------- Co-authored-by: PentaFrame <info@pentaframe.it> Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
283 lines
7.4 KiB
TOML
283 lines
7.4 KiB
TOML
[project]
|
|
name = "ragflow"
|
|
version = "0.22.1"
|
|
description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data."
|
|
authors = [{ name = "Zhichang Yu", email = "yuzhichang@gmail.com" }]
|
|
license-files = ["LICENSE"]
|
|
readme = "README.md"
|
|
requires-python = ">=3.12,<3.15"
|
|
dependencies = [
|
|
"datrie>=0.8.3,<0.9.0",
|
|
"akshare>=1.15.78,<2.0.0",
|
|
"azure-storage-blob==12.22.0",
|
|
"azure-identity==1.17.1",
|
|
"azure-storage-file-datalake==12.16.0",
|
|
"anthropic==0.34.1",
|
|
"arxiv==2.1.3",
|
|
"aspose-slides>=25.10.0,<26.0.0; platform_machine == 'x86_64' or (sys_platform == 'darwin' and platform_machine == 'arm64')",
|
|
"atlassian-python-api==4.0.7",
|
|
"beartype>=0.20.0,<1.0.0",
|
|
"bio==1.7.1",
|
|
"blinker==1.7.0",
|
|
"boto3==1.34.140",
|
|
"botocore==1.34.140",
|
|
"cachetools==5.3.3",
|
|
"chardet==5.2.0",
|
|
"cn2an==0.5.22",
|
|
"cohere==5.6.2",
|
|
"Crawl4AI>=0.4.0,<1.0.0",
|
|
"dashscope==1.20.11",
|
|
"deepl==1.18.0",
|
|
"demjson3==3.0.6",
|
|
"discord-py==2.3.2",
|
|
"dropbox==12.0.2",
|
|
"duckduckgo-search>=7.2.0,<8.0.0",
|
|
"editdistance==0.8.1",
|
|
"elastic-transport==8.12.0",
|
|
"elasticsearch==8.12.1",
|
|
"elasticsearch-dsl==8.12.0",
|
|
"extract-msg>=0.39.0",
|
|
"filelock==3.15.4",
|
|
"flask==3.0.3",
|
|
"flask-cors==5.0.0",
|
|
"flask-login==0.6.3",
|
|
"flask-session==0.8.0",
|
|
"google-search-results==2.4.2",
|
|
"google-auth-oauthlib>=1.2.0,<2.0.0",
|
|
"groq==0.9.0",
|
|
"hanziconv==0.3.2",
|
|
"html-text==0.6.2",
|
|
"httpx[socks]>=0.28.1,<0.29.0",
|
|
"huggingface-hub>=0.25.0,<0.26.0",
|
|
"infinity-sdk==0.6.11",
|
|
"infinity-emb>=0.0.66,<0.0.67",
|
|
"itsdangerous==2.1.2",
|
|
"json-repair==0.35.0",
|
|
"jira==3.10.5",
|
|
"markdown==3.6",
|
|
"markdown-to-json==2.1.1",
|
|
"minio==7.2.4",
|
|
"mistralai==0.4.2",
|
|
"mypy-boto3-s3==1.40.26",
|
|
"nltk==3.9.1",
|
|
"numpy>=1.26.0,<2.0.0",
|
|
"Office365-REST-Python-Client==2.6.2",
|
|
"ollama>=0.5.0",
|
|
"onnxruntime==1.23.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
|
|
"onnxruntime-gpu==1.23.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
|
|
"openai>=1.45.0",
|
|
"opencv-python==4.10.0.84",
|
|
"opencv-python-headless==4.10.0.84",
|
|
"openpyxl>=3.1.0,<4.0.0",
|
|
"opendal>=0.45.0,<0.46.0",
|
|
"ormsgpack==1.5.0",
|
|
"pandas>=2.2.0,<3.0.0",
|
|
"pdfplumber==0.10.4",
|
|
"peewee==3.17.1",
|
|
"pillow>=10.4.0,<13.0.0",
|
|
"protobuf==5.27.2",
|
|
"psycopg2-binary>=2.9.11,<3.0.0",
|
|
"pyclipper>=1.4.0,<2.0.0",
|
|
"pycryptodomex==3.20.0",
|
|
"pymysql>=1.1.1,<2.0.0",
|
|
"pypdf==6.4.0",
|
|
"python-dotenv==1.0.1",
|
|
"python-dateutil==2.8.2",
|
|
"python-pptx>=1.0.2,<2.0.0",
|
|
"pywencai>=0.13.1,<1.0.0",
|
|
"qianfan==0.4.6",
|
|
"quart-auth==0.11.0",
|
|
"quart-cors==0.8.0",
|
|
"Quart==0.20.0",
|
|
"ranx==0.3.20",
|
|
"readability-lxml>=0.8.4,<1.0.0",
|
|
"valkey==6.0.2",
|
|
"requests>=2.32.3,<3.0.0",
|
|
"replicate==0.31.0",
|
|
"roman-numbers==1.0.2",
|
|
"ruamel-base==1.0.0",
|
|
"ruamel-yaml>=0.18.6,<0.19.0",
|
|
"scholarly==1.7.11",
|
|
"scikit-learn==1.5.0",
|
|
"selenium==4.22.0",
|
|
"selenium-wire==5.1.0",
|
|
"setuptools>=78.1.1,<81.0.0",
|
|
"shapely==2.0.5",
|
|
"six==1.16.0",
|
|
"slack-sdk==3.37.0",
|
|
"strenum==0.4.15",
|
|
"tabulate==0.9.0",
|
|
"tavily-python==0.5.1",
|
|
"tencentcloud-sdk-python==3.0.1478",
|
|
"tika==2.6.0",
|
|
"tiktoken==0.7.0",
|
|
"umap_learn==0.5.6",
|
|
"vertexai==1.70.0",
|
|
"google-genai>=1.41.0,<2.0.0",
|
|
"volcengine==1.0.194",
|
|
"voyageai==0.2.3",
|
|
"webdav4>=0.10.0,<0.11.0",
|
|
"webdriver-manager==4.0.1",
|
|
"werkzeug==3.0.6",
|
|
"wikipedia==1.4.0",
|
|
"word2number==1.1",
|
|
"xgboost==1.6.0",
|
|
"xpinyin==0.7.6",
|
|
"yfinance==0.2.65",
|
|
"zhipuai==2.0.1",
|
|
"google-generativeai>=0.8.1,<0.9.0", # Needed for cv_model and embedding_model
|
|
"python-docx>=1.1.2,<2.0.0",
|
|
"pypdf2>=3.0.1,<4.0.0",
|
|
"graspologic @ git+https://github.com/yuzhichang/graspologic.git@38e680cab72bc9fb68a7992c3bcc2d53b24e42fd",
|
|
"mini-racer>=0.12.4,<0.13.0",
|
|
"pyodbc>=5.2.0,<6.0.0",
|
|
"flasgger>=0.9.7.1,<0.10.0",
|
|
"xxhash>=3.5.0,<4.0.0",
|
|
"trio>=0.17.0,<0.29.0",
|
|
"langfuse>=2.60.0",
|
|
"debugpy>=1.8.13",
|
|
"mcp>=1.9.4",
|
|
"opensearch-py==2.7.1",
|
|
"pluginlib==0.9.4",
|
|
"click>=8.1.8",
|
|
"python-calamine>=0.4.0",
|
|
"litellm>=1.74.15.post1",
|
|
"flask-mail>=0.10.0",
|
|
"lark>=1.2.2",
|
|
"mammoth>=1.11.0",
|
|
"markdownify>=1.2.0",
|
|
"captcha>=0.7.1",
|
|
"pip>=25.2",
|
|
"moodlepy>=0.23.0",
|
|
"pypandoc>=1.16",
|
|
"pyobvector==0.2.18",
|
|
"exceptiongroup>=1.3.0,<2.0.0",
|
|
"ffmpeg-python>=0.2.0",
|
|
"imageio-ffmpeg>=0.6.0",
|
|
"reportlab>=4.4.1",
|
|
"jinja2>=3.1.0",
|
|
"boxsdk>=10.1.0",
|
|
"aiosmtplib>=5.0.0"
|
|
]
|
|
|
|
[dependency-groups]
|
|
test = [
|
|
"hypothesis>=6.132.0",
|
|
"openpyxl>=3.1.5",
|
|
"pillow>=10.4.0,<13.0.0",
|
|
"pytest>=8.3.5",
|
|
"pytest-asyncio>=1.3.0",
|
|
"pytest-xdist>=3.8.0",
|
|
"pytest-cov>=7.0.0",
|
|
"python-docx>=1.1.2",
|
|
"python-pptx>=1.0.2",
|
|
"reportlab>=4.4.1",
|
|
"requests>=2.32.2",
|
|
"requests-toolbelt>=1.0.0",
|
|
]
|
|
|
|
[[tool.uv.index]]
|
|
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
|
|
|
|
[tool.setuptools]
|
|
packages = [
|
|
'agent',
|
|
'agentic_reasoning',
|
|
'api',
|
|
'deepdoc',
|
|
'graphrag',
|
|
'intergrations.chatgpt-on-wechat.plugins',
|
|
'mcp.server',
|
|
'rag',
|
|
'sdk.python.ragflow_sdk',
|
|
]
|
|
|
|
[tool.ruff]
|
|
line-length = 200
|
|
exclude = [".venv", "rag/svr/discord_svr.py"]
|
|
|
|
[tool.ruff.lint]
|
|
extend-select = ["ASYNC", "ASYNC1"]
|
|
ignore = ["E402"]
|
|
|
|
[tool.pytest.ini_options]
|
|
pythonpath = [
|
|
"."
|
|
]
|
|
|
|
testpaths = ["test"]
|
|
python_files = ["test_*.py"]
|
|
python_classes = ["Test*"]
|
|
python_functions = ["test_*"]
|
|
|
|
markers = [
|
|
"p1: high priority test cases",
|
|
"p2: medium priority test cases",
|
|
"p3: low priority test cases",
|
|
]
|
|
|
|
# Test collection and runtime configuration
|
|
filterwarnings = [
|
|
"error", # Treat warnings as errors
|
|
"ignore::DeprecationWarning", # Ignore specific warnings
|
|
]
|
|
|
|
# Command line options
|
|
addopts = [
|
|
"-v", # Verbose output
|
|
"--strict-markers", # Enforce marker definitions
|
|
"--tb=short", # Simplified traceback
|
|
"--disable-warnings", # Disable warnings
|
|
"--color=yes" # Colored output
|
|
]
|
|
|
|
|
|
# Coverage configuration
|
|
[tool.coverage.run]
|
|
# Source paths - adjust according to your project structure
|
|
source = [
|
|
# "../../api/db/services",
|
|
# Add more directories if needed:
|
|
"../../common",
|
|
# "../../utils",
|
|
]
|
|
|
|
# Files/directories to exclude
|
|
omit = [
|
|
"*/tests/*",
|
|
"*/test_*",
|
|
"*/__pycache__/*",
|
|
"*/.pytest_cache/*",
|
|
"*/venv/*",
|
|
"*/.venv/*",
|
|
"*/env/*",
|
|
"*/site-packages/*",
|
|
"*/dist/*",
|
|
"*/build/*",
|
|
"*/migrations/*",
|
|
"setup.py"
|
|
]
|
|
|
|
[tool.coverage.report]
|
|
# Report configuration
|
|
precision = 2
|
|
show_missing = true
|
|
skip_covered = false
|
|
fail_under = 0 # Minimum coverage requirement (0-100)
|
|
|
|
# Lines to exclude (optional)
|
|
exclude_lines = [
|
|
# "pragma: no cover",
|
|
# "def __repr__",
|
|
# "raise AssertionError",
|
|
# "raise NotImplementedError",
|
|
# "if __name__ == .__main__.:",
|
|
# "if TYPE_CHECKING:",
|
|
"pass"
|
|
]
|
|
|
|
[tool.coverage.html]
|
|
# HTML report configuration
|
|
directory = "htmlcov"
|
|
title = "Test Coverage Report"
|
|
# extra_css = "custom.css" # Optional custom CSS |