mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-05 01:55:05 +08:00
### What problem does this PR solve? This PR adds MySQL and PostgreSQL as data source connectors, allowing users to import data directly from relational databases into RAGFlow for RAG workflows. Many users store their knowledge in databases (product catalogs, documentation, FAQs, etc.) and currently have no way to sync this data into RAGFlow without exporting to files first. This feature lets them connect directly to their databases, run SQL queries, and automatically create documents from the results. Closes #763 Closes #11560 ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): ### What this PR does **New capabilities:** - Connect to MySQL and PostgreSQL databases - Run custom SQL queries to extract data - Map database columns to document content (vectorized) and metadata (searchable) - Support incremental sync using a timestamp column - Full frontend UI with connection form and tooltips **Files changed:** Backend: - `common/constants.py` - Added MYSQL/POSTGRESQL to FileSource enum - `common/data_source/config.py` - Added to DocumentSource enum - `common/data_source/rdbms_connector.py` - New connector (368 lines) - `common/data_source/__init__.py` - Exported the connector - `rag/svr/sync_data_source.py` - Added MySQL and PostgreSQL sync classes - `pyproject.toml` - Added mysql-connector-python dependency Frontend: - `web/src/pages/user-setting/data-source/constant/index.tsx` - Form fields - `web/src/locales/en.ts` - English translations - `web/src/assets/svg/data-source/mysql.svg` - MySQL icon - `web/src/assets/svg/data-source/postgresql.svg` - PostgreSQL icon ### Testing done Tested with MySQL 8.0 and PostgreSQL 16: - Connection validation works correctly - Full sync imports all query results as documents - Incremental sync only fetches rows updated since last sync - Custom SQL queries filter data as expected - Invalid credentials show clear error messages - Lint checks pass (`ruff check` returns no errors) --------- Co-authored-by: mkdev11 <YOUR_GITHUB_ID+MkDev11@users.noreply.github.com>
282 lines
7.7 KiB
TOML
282 lines
7.7 KiB
TOML
[project]
|
|
name = "ragflow"
|
|
version = "0.23.1"
|
|
description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data."
|
|
authors = [{ name = "Zhichang Yu", email = "yuzhichang@gmail.com" }]
|
|
license-files = ["LICENSE"]
|
|
readme = "README.md"
|
|
requires-python = ">=3.12,<3.15"
|
|
dependencies = [
|
|
"aiosmtplib>=5.0.0",
|
|
"akshare>=1.15.78,<2.0.0",
|
|
"anthropic==0.34.1",
|
|
"arxiv==2.1.3",
|
|
"atlassian-python-api==4.0.7",
|
|
"azure-identity==1.17.1",
|
|
"azure-storage-file-datalake==12.16.0",
|
|
"beartype>=0.20.0,<1.0.0",
|
|
"bio==1.7.1",
|
|
"boxsdk>=10.1.0",
|
|
"captcha>=0.7.1",
|
|
"cn2an==0.5.22",
|
|
"cohere==5.6.2",
|
|
"Crawl4AI>=0.4.0,<1.0.0",
|
|
"dashscope==1.20.11",
|
|
"deepl==1.18.0",
|
|
"demjson3==3.0.6",
|
|
"discord-py==2.3.2",
|
|
"dropbox==12.0.2",
|
|
"duckduckgo-search>=7.2.0,<8.0.0",
|
|
"editdistance==0.8.1",
|
|
"elasticsearch-dsl==8.12.0",
|
|
"exceptiongroup>=1.3.0,<2.0.0",
|
|
"extract-msg>=0.39.0",
|
|
"ffmpeg-python>=0.2.0",
|
|
"flasgger>=0.9.7.1,<0.10.0",
|
|
"flask-cors==6.0.2",
|
|
"flask-login==0.6.3",
|
|
"flask-mail>=0.10.0",
|
|
"flask-session==0.8.0",
|
|
"google-auth-oauthlib>=1.2.0,<2.0.0",
|
|
"google-genai>=1.41.0,<2.0.0",
|
|
"google-generativeai>=0.8.1,<0.9.0", # Needed for cv_model and embedding_model
|
|
"google-search-results==2.4.2",
|
|
"graspologic @ git+https://github.com/yuzhichang/graspologic.git@38e680cab72bc9fb68a7992c3bcc2d53b24e42fd",
|
|
"groq==0.9.0",
|
|
"grpcio-status==1.67.1",
|
|
"html-text==0.6.2",
|
|
"infinity-sdk==0.7.0-dev2",
|
|
"infinity-emb>=0.0.66,<0.0.67",
|
|
"jira==3.10.5",
|
|
"json-repair==0.35.0",
|
|
"langfuse>=2.60.0",
|
|
"mammoth>=1.11.0",
|
|
"markdown==3.6",
|
|
"markdown-to-json==2.1.1",
|
|
"markdownify>=1.2.0",
|
|
"mcp>=1.19.0",
|
|
"mini-racer>=0.12.4,<0.13.0",
|
|
"minio==7.2.4",
|
|
"mistralai==0.4.2",
|
|
"mysql-connector-python>=9.0.0,<10.0.0",
|
|
"moodlepy>=0.23.0",
|
|
"mypy-boto3-s3==1.40.26",
|
|
"Office365-REST-Python-Client==2.6.2",
|
|
"ollama>=0.5.0",
|
|
"onnxruntime==1.23.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
|
|
"onnxruntime-gpu==1.23.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
|
|
"opencv-python==4.10.0.84",
|
|
"opencv-python-headless==4.10.0.84",
|
|
"opendal>=0.45.0,<0.46.0",
|
|
"opensearch-py==2.7.1",
|
|
"ormsgpack==1.5.0",
|
|
"pdfplumber==0.10.4",
|
|
"pluginlib==0.9.4",
|
|
"psycopg2-binary>=2.9.11,<3.0.0",
|
|
"pyclipper>=1.4.0,<2.0.0",
|
|
# "pywencai>=0.13.1,<1.0.0", # Temporarily disabled: conflicts with agentrun-sdk (pydash>=8), needed for agent/tools/wencai.py
|
|
"pycryptodomex==3.20.0",
|
|
"pyobvector==0.2.22",
|
|
"pyodbc>=5.2.0,<6.0.0",
|
|
"pypandoc>=1.16",
|
|
"pypdf>=6.6.2",
|
|
"pypdf2>=3.0.1,<4.0.0",
|
|
"python-calamine>=0.4.0",
|
|
"python-docx>=1.1.2,<2.0.0",
|
|
"python-pptx>=1.0.2,<2.0.0",
|
|
# "pywencai>=0.13.1,<1.0.0", # Temporarily disabled: conflicts with agentrun-sdk (pydash>=8), needed for agent/tools/wencai.py
|
|
"qianfan==0.4.6",
|
|
"quart-auth==0.11.0",
|
|
"quart-cors==0.8.0",
|
|
"ranx==0.3.20",
|
|
"readability-lxml>=0.8.4,<1.0.0",
|
|
"replicate==0.31.0",
|
|
"reportlab>=4.4.1",
|
|
"roman-numbers==1.0.2",
|
|
"ruamel-base==1.0.0",
|
|
"ruamel-yaml>=0.18.6,<0.19.0",
|
|
"scholarly==1.7.11",
|
|
"selenium-wire==5.1.0",
|
|
"slack-sdk==3.37.0",
|
|
"socksio==1.0.0",
|
|
"agentrun-sdk>=0.0.16,<1.0.0",
|
|
"nest-asyncio>=1.6.0,<2.0.0", # Needed for agent/component/message.py
|
|
"sqlglotrs==0.9.0",
|
|
"strenum==0.4.15",
|
|
"tavily-python==0.5.1",
|
|
"tencentcloud-sdk-python==3.0.1478",
|
|
"tika==2.6.0",
|
|
"valkey==6.0.2",
|
|
"vertexai==1.70.0",
|
|
"volcengine==1.0.194",
|
|
"voyageai==0.2.3",
|
|
"webdav4>=0.10.0,<0.11.0",
|
|
"webdriver-manager==4.0.1",
|
|
"wikipedia==1.4.0",
|
|
"word2number==1.1",
|
|
"xgboost==1.6.0",
|
|
"xpinyin==0.7.6",
|
|
"yfinance==0.2.65",
|
|
"zhipuai==2.0.1",
|
|
# following modules aren't necessary
|
|
# "nltk==3.9.1",
|
|
# "numpy>=1.26.0,<2.0.0",
|
|
# "openai>=1.45.0",
|
|
# "openpyxl>=3.1.0,<4.0.0",
|
|
# "pandas>=2.2.0,<3.0.0",
|
|
# "peewee==3.17.1",
|
|
# "pillow>=10.4.0,<13.0.0",
|
|
# "protobuf==5.27.2",
|
|
# "pymysql>=1.1.1,<2.0.0",
|
|
# "python-dotenv==1.0.1",
|
|
# "python-dateutil==2.8.2",
|
|
# "Quart==0.20.0",
|
|
# "requests>=2.32.3,<3.0.0",
|
|
# "scikit-learn==1.5.0",
|
|
# "selenium==4.22.0",
|
|
# "setuptools>=78.1.1,<81.0.0",
|
|
# "shapely==2.0.5",
|
|
# "six==1.16.0",
|
|
# "tabulate==0.9.0",
|
|
# "tiktoken==0.7.0",
|
|
# "umap_learn==0.5.6",
|
|
# "werkzeug==3.0.6",
|
|
# "xxhash>=3.5.0,<4.0.0",
|
|
# "trio>=0.17.0,<0.29.0",
|
|
# "debugpy>=1.8.13",
|
|
# "click>=8.1.8",
|
|
# "litellm>=1.74.15.post1",
|
|
# "lark>=1.2.2",
|
|
# "pip>=25.2",
|
|
# "imageio-ffmpeg>=0.6.0",
|
|
# "cryptography==46.0.3",
|
|
# "jinja2>=3.1.0",
|
|
"pyairtable>=3.3.0",
|
|
"pygithub>=2.8.1",
|
|
"asana>=5.2.2",
|
|
"python-gitlab>=7.0.0",
|
|
"quart-schema==0.23.0",
|
|
]
|
|
|
|
[dependency-groups]
|
|
test = [
|
|
"hypothesis>=6.132.0",
|
|
"openpyxl>=3.1.5",
|
|
"pillow>=10.4.0,<13.0.0",
|
|
"pytest>=8.3.5",
|
|
"pytest-asyncio>=1.3.0",
|
|
"pytest-xdist>=3.8.0",
|
|
"pytest-cov>=7.0.0",
|
|
"python-docx>=1.1.2",
|
|
"python-pptx>=1.0.2",
|
|
"reportlab>=4.4.1",
|
|
"requests>=2.32.2",
|
|
"requests-toolbelt>=1.0.0",
|
|
"pycryptodomex==3.20.0",
|
|
"codecov>=2.1.13",
|
|
]
|
|
|
|
[[tool.uv.index]]
|
|
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
|
|
|
|
[tool.setuptools]
|
|
packages = [
|
|
'agent',
|
|
'api',
|
|
'deepdoc',
|
|
'graphrag',
|
|
'intergrations.chatgpt-on-wechat.plugins',
|
|
'mcp.server',
|
|
'rag',
|
|
'sdk.python.ragflow_sdk',
|
|
]
|
|
|
|
[tool.ruff]
|
|
line-length = 200
|
|
exclude = [".venv", "rag/svr/discord_svr.py"]
|
|
|
|
[tool.ruff.lint]
|
|
extend-select = ["ASYNC", "ASYNC1"]
|
|
ignore = ["E402"]
|
|
|
|
[tool.pytest.ini_options]
|
|
pythonpath = [
|
|
"."
|
|
]
|
|
|
|
testpaths = ["test"]
|
|
python_files = ["test_*.py"]
|
|
python_classes = ["Test*"]
|
|
python_functions = ["test_*"]
|
|
|
|
markers = [
|
|
"p1: high priority test cases",
|
|
"p2: medium priority test cases",
|
|
"p3: low priority test cases",
|
|
]
|
|
|
|
# Test collection and runtime configuration
|
|
filterwarnings = [
|
|
"error", # Treat warnings as errors
|
|
"ignore::DeprecationWarning", # Ignore specific warnings
|
|
]
|
|
|
|
# Command line options
|
|
addopts = [
|
|
"-v", # Verbose output
|
|
"--strict-markers", # Enforce marker definitions
|
|
"--tb=short", # Simplified traceback
|
|
"--disable-warnings", # Disable warnings
|
|
"--color=yes" # Colored output
|
|
]
|
|
|
|
|
|
# Coverage configuration
|
|
[tool.coverage.run]
|
|
# Source paths - adjust according to your project structure
|
|
source = [
|
|
# "../../api/db/services",
|
|
# Add more directories if needed:
|
|
"../../common",
|
|
# "../../utils",
|
|
]
|
|
|
|
# Files/directories to exclude
|
|
omit = [
|
|
"*/tests/*",
|
|
"*/test_*",
|
|
"*/__pycache__/*",
|
|
"*/.pytest_cache/*",
|
|
"*/venv/*",
|
|
"*/.venv/*",
|
|
"*/env/*",
|
|
"*/site-packages/*",
|
|
"*/dist/*",
|
|
"*/build/*",
|
|
"*/migrations/*",
|
|
"setup.py"
|
|
]
|
|
|
|
[tool.coverage.report]
|
|
# Report configuration
|
|
precision = 2
|
|
show_missing = true
|
|
skip_covered = false
|
|
fail_under = 0 # Minimum coverage requirement (0-100)
|
|
|
|
# Lines to exclude (optional)
|
|
exclude_lines = [
|
|
# "pragma: no cover",
|
|
# "def __repr__",
|
|
# "raise AssertionError",
|
|
# "raise NotImplementedError",
|
|
# "if __name__ == .__main__.:",
|
|
# "if TYPE_CHECKING:",
|
|
"pass"
|
|
]
|
|
|
|
[tool.coverage.html]
|
|
# HTML report configuration
|
|
directory = "htmlcov"
|
|
title = "Test Coverage Report"
|
|
# extra_css = "custom.css" # Optional custom CSS |