Files
ragflow/pyproject.toml
PentaFDevs f9510edbbc Feature/docs generator (#11858)
### Type of change

- [x] New Feature (non-breaking change which adds functionality)


### What problem does this PR solve?

This PR introduces a new Docs Generator agent component for producing
downloadable PDF, DOCX, or TXT files from Markdown content generated
within a RAGFlow workflow.

### **Key Features**

**Backend**

- New component: DocsGenerator (agent/component/docs_generator.py)
- 
- Markdown → PDF/DOCX/TXT conversion
- 
- Supports tables, lists, code blocks, headings, and rich formatting
- 
- Configurable document style (fonts, margins, colors, page size,
orientation)
- 
- Optional header logo and footer with page numbers/timestamps
- 

**Frontend**

- New configuration UI for the Docs Generator
- 
- Download button integrated into the chat interface
- 
- Output wired to the Message component
- 
- Full i18n support

**Documentation**

Added component guide:
docs/guides/agent/agent_component_reference/docs_generator.md

**Usage**

Add the Docs Generator to a workflow, connect Markdown output from an
upstream component, configure metadata/style, and feed its output into
the Message component. Users will see a document download button
directly in the chat.

**Contributor Note**

We have been following RAGFlow since more than a year and half now and
have worked extensively on personalizing the framework and integrating
it into several of our internal systems. Over the past year and a half,
we have built multiple platforms that rely on RAGFlow as a core
component, which has given us a strong appreciation for how flexible and
powerful the project is.

We also previously contributed the full Italian translation, and we were
glad to see it accepted. This new Docs Generator component was created
for our own production needs, and we believe that it may be useful for
many others in the community as well.

We want to sincerely thank the entire RAGFlow team for the remarkable
work you have done and continue to do. If there are opportunities to
contribute further, we would be glad to help whenever we have time
available. It would be a pleasure to support the project in any way we
can.

If appropriate, we would be glad to be listed among the project’s
contributors, but in any case we look forward to continuing to support
and contribute to the project.

PentaFrame Development Team

---------

Co-authored-by: PentaFrame <info@pentaframe.it>
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
2025-12-12 14:59:43 +08:00

283 lines
7.4 KiB
TOML

[project]
name = "ragflow"
version = "0.22.1"
description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data."
authors = [{ name = "Zhichang Yu", email = "yuzhichang@gmail.com" }]
license-files = ["LICENSE"]
readme = "README.md"
requires-python = ">=3.12,<3.15"
dependencies = [
"datrie>=0.8.3,<0.9.0",
"akshare>=1.15.78,<2.0.0",
"azure-storage-blob==12.22.0",
"azure-identity==1.17.1",
"azure-storage-file-datalake==12.16.0",
"anthropic==0.34.1",
"arxiv==2.1.3",
"aspose-slides>=25.10.0,<26.0.0; platform_machine == 'x86_64' or (sys_platform == 'darwin' and platform_machine == 'arm64')",
"atlassian-python-api==4.0.7",
"beartype>=0.20.0,<1.0.0",
"bio==1.7.1",
"blinker==1.7.0",
"boto3==1.34.140",
"botocore==1.34.140",
"cachetools==5.3.3",
"chardet==5.2.0",
"cn2an==0.5.22",
"cohere==5.6.2",
"Crawl4AI>=0.4.0,<1.0.0",
"dashscope==1.20.11",
"deepl==1.18.0",
"demjson3==3.0.6",
"discord-py==2.3.2",
"dropbox==12.0.2",
"duckduckgo-search>=7.2.0,<8.0.0",
"editdistance==0.8.1",
"elastic-transport==8.12.0",
"elasticsearch==8.12.1",
"elasticsearch-dsl==8.12.0",
"extract-msg>=0.39.0",
"filelock==3.15.4",
"flask==3.0.3",
"flask-cors==5.0.0",
"flask-login==0.6.3",
"flask-session==0.8.0",
"google-search-results==2.4.2",
"google-auth-oauthlib>=1.2.0,<2.0.0",
"groq==0.9.0",
"hanziconv==0.3.2",
"html-text==0.6.2",
"httpx[socks]>=0.28.1,<0.29.0",
"huggingface-hub>=0.25.0,<0.26.0",
"infinity-sdk==0.6.11",
"infinity-emb>=0.0.66,<0.0.67",
"itsdangerous==2.1.2",
"json-repair==0.35.0",
"jira==3.10.5",
"markdown==3.6",
"markdown-to-json==2.1.1",
"minio==7.2.4",
"mistralai==0.4.2",
"mypy-boto3-s3==1.40.26",
"nltk==3.9.1",
"numpy>=1.26.0,<2.0.0",
"Office365-REST-Python-Client==2.6.2",
"ollama>=0.5.0",
"onnxruntime==1.23.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
"onnxruntime-gpu==1.23.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
"openai>=1.45.0",
"opencv-python==4.10.0.84",
"opencv-python-headless==4.10.0.84",
"openpyxl>=3.1.0,<4.0.0",
"opendal>=0.45.0,<0.46.0",
"ormsgpack==1.5.0",
"pandas>=2.2.0,<3.0.0",
"pdfplumber==0.10.4",
"peewee==3.17.1",
"pillow>=10.4.0,<13.0.0",
"protobuf==5.27.2",
"psycopg2-binary>=2.9.11,<3.0.0",
"pyclipper>=1.4.0,<2.0.0",
"pycryptodomex==3.20.0",
"pymysql>=1.1.1,<2.0.0",
"pypdf==6.4.0",
"python-dotenv==1.0.1",
"python-dateutil==2.8.2",
"python-pptx>=1.0.2,<2.0.0",
"pywencai>=0.13.1,<1.0.0",
"qianfan==0.4.6",
"quart-auth==0.11.0",
"quart-cors==0.8.0",
"Quart==0.20.0",
"ranx==0.3.20",
"readability-lxml>=0.8.4,<1.0.0",
"valkey==6.0.2",
"requests>=2.32.3,<3.0.0",
"replicate==0.31.0",
"roman-numbers==1.0.2",
"ruamel-base==1.0.0",
"ruamel-yaml>=0.18.6,<0.19.0",
"scholarly==1.7.11",
"scikit-learn==1.5.0",
"selenium==4.22.0",
"selenium-wire==5.1.0",
"setuptools>=78.1.1,<81.0.0",
"shapely==2.0.5",
"six==1.16.0",
"slack-sdk==3.37.0",
"strenum==0.4.15",
"tabulate==0.9.0",
"tavily-python==0.5.1",
"tencentcloud-sdk-python==3.0.1478",
"tika==2.6.0",
"tiktoken==0.7.0",
"umap_learn==0.5.6",
"vertexai==1.70.0",
"google-genai>=1.41.0,<2.0.0",
"volcengine==1.0.194",
"voyageai==0.2.3",
"webdav4>=0.10.0,<0.11.0",
"webdriver-manager==4.0.1",
"werkzeug==3.0.6",
"wikipedia==1.4.0",
"word2number==1.1",
"xgboost==1.6.0",
"xpinyin==0.7.6",
"yfinance==0.2.65",
"zhipuai==2.0.1",
"google-generativeai>=0.8.1,<0.9.0", # Needed for cv_model and embedding_model
"python-docx>=1.1.2,<2.0.0",
"pypdf2>=3.0.1,<4.0.0",
"graspologic @ git+https://github.com/yuzhichang/graspologic.git@38e680cab72bc9fb68a7992c3bcc2d53b24e42fd",
"mini-racer>=0.12.4,<0.13.0",
"pyodbc>=5.2.0,<6.0.0",
"flasgger>=0.9.7.1,<0.10.0",
"xxhash>=3.5.0,<4.0.0",
"trio>=0.17.0,<0.29.0",
"langfuse>=2.60.0",
"debugpy>=1.8.13",
"mcp>=1.9.4",
"opensearch-py==2.7.1",
"pluginlib==0.9.4",
"click>=8.1.8",
"python-calamine>=0.4.0",
"litellm>=1.74.15.post1",
"flask-mail>=0.10.0",
"lark>=1.2.2",
"mammoth>=1.11.0",
"markdownify>=1.2.0",
"captcha>=0.7.1",
"pip>=25.2",
"moodlepy>=0.23.0",
"pypandoc>=1.16",
"pyobvector==0.2.18",
"exceptiongroup>=1.3.0,<2.0.0",
"ffmpeg-python>=0.2.0",
"imageio-ffmpeg>=0.6.0",
"reportlab>=4.4.1",
"jinja2>=3.1.0",
"boxsdk>=10.1.0",
"aiosmtplib>=5.0.0"
]
[dependency-groups]
test = [
"hypothesis>=6.132.0",
"openpyxl>=3.1.5",
"pillow>=10.4.0,<13.0.0",
"pytest>=8.3.5",
"pytest-asyncio>=1.3.0",
"pytest-xdist>=3.8.0",
"pytest-cov>=7.0.0",
"python-docx>=1.1.2",
"python-pptx>=1.0.2",
"reportlab>=4.4.1",
"requests>=2.32.2",
"requests-toolbelt>=1.0.0",
]
[[tool.uv.index]]
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
[tool.setuptools]
packages = [
'agent',
'agentic_reasoning',
'api',
'deepdoc',
'graphrag',
'intergrations.chatgpt-on-wechat.plugins',
'mcp.server',
'rag',
'sdk.python.ragflow_sdk',
]
[tool.ruff]
line-length = 200
exclude = [".venv", "rag/svr/discord_svr.py"]
[tool.ruff.lint]
extend-select = ["ASYNC", "ASYNC1"]
ignore = ["E402"]
[tool.pytest.ini_options]
pythonpath = [
"."
]
testpaths = ["test"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
markers = [
"p1: high priority test cases",
"p2: medium priority test cases",
"p3: low priority test cases",
]
# Test collection and runtime configuration
filterwarnings = [
"error", # Treat warnings as errors
"ignore::DeprecationWarning", # Ignore specific warnings
]
# Command line options
addopts = [
"-v", # Verbose output
"--strict-markers", # Enforce marker definitions
"--tb=short", # Simplified traceback
"--disable-warnings", # Disable warnings
"--color=yes" # Colored output
]
# Coverage configuration
[tool.coverage.run]
# Source paths - adjust according to your project structure
source = [
# "../../api/db/services",
# Add more directories if needed:
"../../common",
# "../../utils",
]
# Files/directories to exclude
omit = [
"*/tests/*",
"*/test_*",
"*/__pycache__/*",
"*/.pytest_cache/*",
"*/venv/*",
"*/.venv/*",
"*/env/*",
"*/site-packages/*",
"*/dist/*",
"*/build/*",
"*/migrations/*",
"setup.py"
]
[tool.coverage.report]
# Report configuration
precision = 2
show_missing = true
skip_covered = false
fail_under = 0 # Minimum coverage requirement (0-100)
# Lines to exclude (optional)
exclude_lines = [
# "pragma: no cover",
# "def __repr__",
# "raise AssertionError",
# "raise NotImplementedError",
# "if __name__ == .__main__.:",
# "if TYPE_CHECKING:",
"pass"
]
[tool.coverage.html]
# HTML report configuration
directory = "htmlcov"
title = "Test Coverage Report"
# extra_css = "custom.css" # Optional custom CSS