mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-04 17:45:07 +08:00
feat/add MySQL and PostgreSQL data source connectors (#12817)
### What problem does this PR solve? This PR adds MySQL and PostgreSQL as data source connectors, allowing users to import data directly from relational databases into RAGFlow for RAG workflows. Many users store their knowledge in databases (product catalogs, documentation, FAQs, etc.) and currently have no way to sync this data into RAGFlow without exporting to files first. This feature lets them connect directly to their databases, run SQL queries, and automatically create documents from the results. Closes #763 Closes #11560 ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): ### What this PR does **New capabilities:** - Connect to MySQL and PostgreSQL databases - Run custom SQL queries to extract data - Map database columns to document content (vectorized) and metadata (searchable) - Support incremental sync using a timestamp column - Full frontend UI with connection form and tooltips **Files changed:** Backend: - `common/constants.py` - Added MYSQL/POSTGRESQL to FileSource enum - `common/data_source/config.py` - Added to DocumentSource enum - `common/data_source/rdbms_connector.py` - New connector (368 lines) - `common/data_source/__init__.py` - Exported the connector - `rag/svr/sync_data_source.py` - Added MySQL and PostgreSQL sync classes - `pyproject.toml` - Added mysql-connector-python dependency Frontend: - `web/src/pages/user-setting/data-source/constant/index.tsx` - Form fields - `web/src/locales/en.ts` - English translations - `web/src/assets/svg/data-source/mysql.svg` - MySQL icon - `web/src/assets/svg/data-source/postgresql.svg` - PostgreSQL icon ### Testing done Tested with MySQL 8.0 and PostgreSQL 16: - Connection validation works correctly - Full sync imports all query results as documents - Incremental sync only fetches rows updated since last sync - Custom SQL queries filter data as expected - Invalid credentials show clear error messages - Lint checks pass (`ruff check` returns no errors) --------- Co-authored-by: mkdev11 <YOUR_GITHUB_ID+MkDev11@users.noreply.github.com>
This commit is contained in:
1
.github/workflows/tests.yml
vendored
1
.github/workflows/tests.yml
vendored
@ -187,7 +187,6 @@ jobs:
|
||||
echo -e "EXPOSE_MYSQL_PORT=${EXPOSE_MYSQL_PORT}" >> docker/.env
|
||||
echo -e "MINIO_PORT=${MINIO_PORT}" >> docker/.env
|
||||
echo -e "MINIO_CONSOLE_PORT=${MINIO_CONSOLE_PORT}" >> docker/.env
|
||||
echo -e "REDIS_PORT=${REDIS_PORT}" >> docker/.env
|
||||
echo -e "TEI_PORT=${TEI_PORT}" >> docker/.env
|
||||
echo -e "KIBANA_PORT=${KIBANA_PORT}" >> docker/.env
|
||||
echo -e "SVR_HTTP_PORT=${SVR_HTTP_PORT}" >> docker/.env
|
||||
|
||||
@ -136,6 +136,8 @@ class FileSource(StrEnum):
|
||||
BITBUCKET = "bitbucket"
|
||||
ZENDESK = "zendesk"
|
||||
SEAFILE = "seafile"
|
||||
MYSQL = "mysql"
|
||||
POSTGRESQL = "postgresql"
|
||||
|
||||
|
||||
class PipelineTaskType(StrEnum):
|
||||
|
||||
@ -40,6 +40,7 @@ from .asana_connector import AsanaConnector
|
||||
from .imap_connector import ImapConnector
|
||||
from .zendesk_connector import ZendeskConnector
|
||||
from .seafile_connector import SeaFileConnector
|
||||
from .rdbms_connector import RDBMSConnector
|
||||
from .config import BlobType, DocumentSource
|
||||
from .models import Document, TextSection, ImageSection, BasicExpertInfo
|
||||
from .exceptions import (
|
||||
@ -79,4 +80,5 @@ __all__ = [
|
||||
"ImapConnector",
|
||||
"ZendeskConnector",
|
||||
"SeaFileConnector",
|
||||
"RDBMSConnector",
|
||||
]
|
||||
|
||||
@ -63,7 +63,9 @@ class DocumentSource(str, Enum):
|
||||
IMAP = "imap"
|
||||
BITBUCKET = "bitbucket"
|
||||
ZENDESK = "zendesk"
|
||||
SEAFILE = "seafile"
|
||||
SEAFILE = "seafile"
|
||||
MYSQL = "mysql"
|
||||
POSTGRESQL = "postgresql"
|
||||
|
||||
|
||||
class FileOrigin(str, Enum):
|
||||
|
||||
403
common/data_source/rdbms_connector.py
Normal file
403
common/data_source/rdbms_connector.py
Normal file
@ -0,0 +1,403 @@
|
||||
"""RDBMS (MySQL/PostgreSQL) data source connector for importing data from relational databases."""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Generator, Optional, Union
|
||||
|
||||
from common.data_source.config import DocumentSource, INDEX_BATCH_SIZE
|
||||
from common.data_source.exceptions import (
|
||||
ConnectorMissingCredentialError,
|
||||
ConnectorValidationError,
|
||||
)
|
||||
from common.data_source.interfaces import LoadConnector, PollConnector, SecondsSinceUnixEpoch
|
||||
from common.data_source.models import Document
|
||||
|
||||
|
||||
class DatabaseType(str, Enum):
|
||||
"""Supported database types."""
|
||||
MYSQL = "mysql"
|
||||
POSTGRESQL = "postgresql"
|
||||
|
||||
|
||||
class RDBMSConnector(LoadConnector, PollConnector):
|
||||
"""
|
||||
RDBMS connector for importing data from MySQL and PostgreSQL databases.
|
||||
|
||||
This connector allows users to:
|
||||
1. Connect to a MySQL or PostgreSQL database
|
||||
2. Execute a SQL query to extract data
|
||||
3. Map columns to content (for vectorization) and metadata
|
||||
4. Sync data in batch or incremental mode using a timestamp column
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
db_type: str,
|
||||
host: str,
|
||||
port: int,
|
||||
database: str,
|
||||
query: str,
|
||||
content_columns: str,
|
||||
metadata_columns: Optional[str] = None,
|
||||
id_column: Optional[str] = None,
|
||||
timestamp_column: Optional[str] = None,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the RDBMS connector.
|
||||
|
||||
Args:
|
||||
db_type: Database type ('mysql' or 'postgresql')
|
||||
host: Database host
|
||||
port: Database port
|
||||
database: Database name
|
||||
query: SQL query to execute (e.g., "SELECT * FROM products WHERE status = 'active'")
|
||||
content_columns: Comma-separated column names to use for document content
|
||||
metadata_columns: Comma-separated column names to use as metadata (optional)
|
||||
id_column: Column to use as unique document ID (optional, will generate hash if not provided)
|
||||
timestamp_column: Column to use for incremental sync (optional, must be datetime/timestamp type)
|
||||
batch_size: Number of documents per batch
|
||||
"""
|
||||
self.db_type = DatabaseType(db_type.lower())
|
||||
self.host = host.strip()
|
||||
self.port = port
|
||||
self.database = database.strip()
|
||||
self.query = query.strip()
|
||||
self.content_columns = [c.strip() for c in content_columns.split(",") if c.strip()]
|
||||
self.metadata_columns = [c.strip() for c in (metadata_columns or "").split(",") if c.strip()]
|
||||
self.id_column = id_column.strip() if id_column else None
|
||||
self.timestamp_column = timestamp_column.strip() if timestamp_column else None
|
||||
self.batch_size = batch_size
|
||||
|
||||
self._connection = None
|
||||
self._credentials: Dict[str, Any] = {}
|
||||
|
||||
def load_credentials(self, credentials: Dict[str, Any]) -> Dict[str, Any] | None:
|
||||
"""Load database credentials."""
|
||||
logging.debug(f"Loading credentials for {self.db_type} database: {self.database}")
|
||||
|
||||
required_keys = ["username", "password"]
|
||||
for key in required_keys:
|
||||
if not credentials.get(key):
|
||||
raise ConnectorMissingCredentialError(f"RDBMS ({self.db_type}): missing {key}")
|
||||
|
||||
self._credentials = credentials
|
||||
return None
|
||||
|
||||
def _get_connection(self):
|
||||
"""Create and return a database connection."""
|
||||
if self._connection is not None:
|
||||
return self._connection
|
||||
|
||||
username = self._credentials.get("username")
|
||||
password = self._credentials.get("password")
|
||||
|
||||
if self.db_type == DatabaseType.MYSQL:
|
||||
try:
|
||||
import mysql.connector
|
||||
except ImportError:
|
||||
raise ConnectorValidationError(
|
||||
"MySQL connector not installed. Please install mysql-connector-python."
|
||||
)
|
||||
try:
|
||||
self._connection = mysql.connector.connect(
|
||||
host=self.host,
|
||||
port=self.port,
|
||||
database=self.database,
|
||||
user=username,
|
||||
password=password,
|
||||
charset='utf8mb4',
|
||||
use_unicode=True,
|
||||
)
|
||||
except Exception as e:
|
||||
raise ConnectorValidationError(f"Failed to connect to MySQL: {e}")
|
||||
elif self.db_type == DatabaseType.POSTGRESQL:
|
||||
try:
|
||||
import psycopg2
|
||||
except ImportError:
|
||||
raise ConnectorValidationError(
|
||||
"PostgreSQL connector not installed. Please install psycopg2-binary."
|
||||
)
|
||||
try:
|
||||
self._connection = psycopg2.connect(
|
||||
host=self.host,
|
||||
port=self.port,
|
||||
dbname=self.database,
|
||||
user=username,
|
||||
password=password,
|
||||
)
|
||||
except Exception as e:
|
||||
raise ConnectorValidationError(f"Failed to connect to PostgreSQL: {e}")
|
||||
|
||||
return self._connection
|
||||
|
||||
def _close_connection(self):
|
||||
"""Close the database connection."""
|
||||
if self._connection is not None:
|
||||
try:
|
||||
self._connection.close()
|
||||
except Exception:
|
||||
pass
|
||||
self._connection = None
|
||||
|
||||
def _get_tables(self) -> list[str]:
|
||||
"""Get list of all tables in the database."""
|
||||
connection = self._get_connection()
|
||||
cursor = connection.cursor()
|
||||
|
||||
try:
|
||||
if self.db_type == DatabaseType.MYSQL:
|
||||
cursor.execute("SHOW TABLES")
|
||||
else:
|
||||
cursor.execute(
|
||||
"SELECT table_name FROM information_schema.tables "
|
||||
"WHERE table_schema = 'public' AND table_type = 'BASE TABLE'"
|
||||
)
|
||||
tables = [row[0] for row in cursor.fetchall()]
|
||||
return tables
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
def _build_query_with_time_filter(
|
||||
self,
|
||||
start: Optional[datetime] = None,
|
||||
end: Optional[datetime] = None,
|
||||
) -> str:
|
||||
"""Build the query with optional time filtering for incremental sync."""
|
||||
if not self.query:
|
||||
return "" # Will be handled by table discovery
|
||||
base_query = self.query.rstrip(";")
|
||||
|
||||
if not self.timestamp_column or (start is None and end is None):
|
||||
return base_query
|
||||
|
||||
has_where = "where" in base_query.lower()
|
||||
connector = " AND" if has_where else " WHERE"
|
||||
|
||||
time_conditions = []
|
||||
if start is not None:
|
||||
if self.db_type == DatabaseType.MYSQL:
|
||||
time_conditions.append(f"{self.timestamp_column} > '{start.strftime('%Y-%m-%d %H:%M:%S')}'")
|
||||
else:
|
||||
time_conditions.append(f"{self.timestamp_column} > '{start.isoformat()}'")
|
||||
|
||||
if end is not None:
|
||||
if self.db_type == DatabaseType.MYSQL:
|
||||
time_conditions.append(f"{self.timestamp_column} <= '{end.strftime('%Y-%m-%d %H:%M:%S')}'")
|
||||
else:
|
||||
time_conditions.append(f"{self.timestamp_column} <= '{end.isoformat()}'")
|
||||
|
||||
if time_conditions:
|
||||
return f"{base_query}{connector} {' AND '.join(time_conditions)}"
|
||||
|
||||
return base_query
|
||||
|
||||
def _row_to_document(self, row: Union[tuple, list, Dict[str, Any]], column_names: list) -> Document:
|
||||
"""Convert a database row to a Document."""
|
||||
row_dict = dict(zip(column_names, row)) if isinstance(row, (list, tuple)) else row
|
||||
|
||||
content_parts = []
|
||||
for col in self.content_columns:
|
||||
if col in row_dict and row_dict[col] is not None:
|
||||
value = row_dict[col]
|
||||
if isinstance(value, (dict, list)):
|
||||
value = json.dumps(value, ensure_ascii=False)
|
||||
content_parts.append(f"{col}: {value}")
|
||||
|
||||
content = "\n".join(content_parts)
|
||||
|
||||
if self.id_column and self.id_column in row_dict:
|
||||
doc_id = f"{self.db_type}:{self.database}:{row_dict[self.id_column]}"
|
||||
else:
|
||||
content_hash = hashlib.md5(content.encode()).hexdigest()
|
||||
doc_id = f"{self.db_type}:{self.database}:{content_hash}"
|
||||
|
||||
metadata = {}
|
||||
for col in self.metadata_columns:
|
||||
if col in row_dict and row_dict[col] is not None:
|
||||
value = row_dict[col]
|
||||
if isinstance(value, datetime):
|
||||
value = value.isoformat()
|
||||
elif isinstance(value, (dict, list)):
|
||||
value = json.dumps(value, ensure_ascii=False)
|
||||
else:
|
||||
value = str(value)
|
||||
metadata[col] = value
|
||||
|
||||
doc_updated_at = datetime.now(timezone.utc)
|
||||
if self.timestamp_column and self.timestamp_column in row_dict:
|
||||
ts_value = row_dict[self.timestamp_column]
|
||||
if isinstance(ts_value, datetime):
|
||||
if ts_value.tzinfo is None:
|
||||
doc_updated_at = ts_value.replace(tzinfo=timezone.utc)
|
||||
else:
|
||||
doc_updated_at = ts_value
|
||||
|
||||
first_content_col = self.content_columns[0] if self.content_columns else "record"
|
||||
semantic_id = str(row_dict.get(first_content_col, "database_record"))[:100]
|
||||
|
||||
return Document(
|
||||
id=doc_id,
|
||||
blob=content.encode("utf-8"),
|
||||
source=DocumentSource(self.db_type.value),
|
||||
semantic_identifier=semantic_id,
|
||||
extension=".txt",
|
||||
doc_updated_at=doc_updated_at,
|
||||
size_bytes=len(content.encode("utf-8")),
|
||||
metadata=metadata if metadata else None,
|
||||
)
|
||||
|
||||
def _yield_documents_from_query(
|
||||
self,
|
||||
query: str,
|
||||
) -> Generator[list[Document], None, None]:
|
||||
"""Generate documents from a single query."""
|
||||
connection = self._get_connection()
|
||||
cursor = connection.cursor()
|
||||
|
||||
try:
|
||||
logging.info(f"Executing query: {query[:200]}...")
|
||||
cursor.execute(query)
|
||||
column_names = [desc[0] for desc in cursor.description]
|
||||
|
||||
batch: list[Document] = []
|
||||
for row in cursor:
|
||||
try:
|
||||
doc = self._row_to_document(row, column_names)
|
||||
batch.append(doc)
|
||||
|
||||
if len(batch) >= self.batch_size:
|
||||
yield batch
|
||||
batch = []
|
||||
except Exception as e:
|
||||
logging.warning(f"Error converting row to document: {e}")
|
||||
continue
|
||||
|
||||
if batch:
|
||||
yield batch
|
||||
|
||||
finally:
|
||||
try:
|
||||
cursor.fetchall()
|
||||
except Exception:
|
||||
pass
|
||||
cursor.close()
|
||||
|
||||
def _yield_documents(
|
||||
self,
|
||||
start: Optional[datetime] = None,
|
||||
end: Optional[datetime] = None,
|
||||
) -> Generator[list[Document], None, None]:
|
||||
"""Generate documents from database query results."""
|
||||
if self.query:
|
||||
query = self._build_query_with_time_filter(start, end)
|
||||
yield from self._yield_documents_from_query(query)
|
||||
else:
|
||||
tables = self._get_tables()
|
||||
logging.info(f"No query specified. Loading all {len(tables)} tables: {tables}")
|
||||
for table in tables:
|
||||
query = f"SELECT * FROM {table}"
|
||||
logging.info(f"Loading table: {table}")
|
||||
yield from self._yield_documents_from_query(query)
|
||||
|
||||
self._close_connection()
|
||||
|
||||
def load_from_state(self) -> Generator[list[Document], None, None]:
|
||||
"""Load all documents from the database (full sync)."""
|
||||
logging.debug(f"Loading all records from {self.db_type} database: {self.database}")
|
||||
return self._yield_documents()
|
||||
|
||||
def poll_source(
|
||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||
) -> Generator[list[Document], None, None]:
|
||||
"""Poll for new/updated documents since the last sync (incremental sync)."""
|
||||
if not self.timestamp_column:
|
||||
logging.warning(
|
||||
"No timestamp column configured for incremental sync. "
|
||||
"Falling back to full sync."
|
||||
)
|
||||
return self.load_from_state()
|
||||
|
||||
start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
|
||||
end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)
|
||||
|
||||
logging.debug(
|
||||
f"Polling {self.db_type} database {self.database} "
|
||||
f"from {start_datetime} to {end_datetime}"
|
||||
)
|
||||
|
||||
return self._yield_documents(start_datetime, end_datetime)
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
"""Validate connector settings by testing the connection."""
|
||||
if not self._credentials:
|
||||
raise ConnectorMissingCredentialError("RDBMS credentials not loaded.")
|
||||
|
||||
if not self.host:
|
||||
raise ConnectorValidationError("Database host is required.")
|
||||
|
||||
if not self.database:
|
||||
raise ConnectorValidationError("Database name is required.")
|
||||
|
||||
if not self.content_columns:
|
||||
raise ConnectorValidationError(
|
||||
"At least one content column must be specified."
|
||||
)
|
||||
|
||||
try:
|
||||
connection = self._get_connection()
|
||||
cursor = connection.cursor()
|
||||
|
||||
test_query = "SELECT 1"
|
||||
cursor.execute(test_query)
|
||||
cursor.fetchone()
|
||||
cursor.close()
|
||||
|
||||
logging.info(f"Successfully connected to {self.db_type} database: {self.database}")
|
||||
|
||||
except ConnectorValidationError:
|
||||
self._close_connection()
|
||||
raise
|
||||
except Exception as e:
|
||||
self._close_connection()
|
||||
raise ConnectorValidationError(
|
||||
f"Failed to connect to {self.db_type} database: {str(e)}"
|
||||
)
|
||||
finally:
|
||||
self._close_connection()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
|
||||
credentials_dict = {
|
||||
"username": os.environ.get("DB_USERNAME", "root"),
|
||||
"password": os.environ.get("DB_PASSWORD", ""),
|
||||
}
|
||||
|
||||
connector = RDBMSConnector(
|
||||
db_type="mysql",
|
||||
host=os.environ.get("DB_HOST", "localhost"),
|
||||
port=int(os.environ.get("DB_PORT", "3306")),
|
||||
database=os.environ.get("DB_NAME", "test"),
|
||||
query="SELECT * FROM products LIMIT 10",
|
||||
content_columns="name,description",
|
||||
metadata_columns="id,category,price",
|
||||
id_column="id",
|
||||
timestamp_column="updated_at",
|
||||
)
|
||||
|
||||
try:
|
||||
connector.load_credentials(credentials_dict)
|
||||
connector.validate_connector_settings()
|
||||
|
||||
for batch in connector.load_from_state():
|
||||
print(f"Batch of {len(batch)} documents:")
|
||||
for doc in batch:
|
||||
print(f" - {doc.id}: {doc.semantic_identifier}")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
@ -58,6 +58,7 @@ dependencies = [
|
||||
"mini-racer>=0.12.4,<0.13.0",
|
||||
"minio==7.2.4",
|
||||
"mistralai==0.4.2",
|
||||
"mysql-connector-python>=9.0.0,<10.0.0",
|
||||
"moodlepy>=0.23.0",
|
||||
"mypy-boto3-s3==1.40.26",
|
||||
"Office365-REST-Python-Client==2.6.2",
|
||||
|
||||
@ -54,6 +54,7 @@ from common.data_source import (
|
||||
ImapConnector,
|
||||
ZendeskConnector,
|
||||
SeaFileConnector,
|
||||
RDBMSConnector,
|
||||
)
|
||||
from common.constants import FileSource, TaskStatus
|
||||
from common.data_source.config import INDEX_BATCH_SIZE
|
||||
@ -1213,6 +1214,79 @@ class SeaFile(SyncBase):
|
||||
)
|
||||
return document_generator
|
||||
|
||||
|
||||
class MySQL(SyncBase):
|
||||
SOURCE_NAME: str = FileSource.MYSQL
|
||||
|
||||
async def _generate(self, task: dict):
|
||||
self.connector = RDBMSConnector(
|
||||
db_type="mysql",
|
||||
host=self.conf.get("host", "localhost"),
|
||||
port=int(self.conf.get("port", 3306)),
|
||||
database=self.conf.get("database", ""),
|
||||
query=self.conf.get("query", ""),
|
||||
content_columns=self.conf.get("content_columns", ""),
|
||||
batch_size=self.conf.get("batch_size", INDEX_BATCH_SIZE),
|
||||
)
|
||||
|
||||
credentials = self.conf.get("credentials")
|
||||
if not credentials:
|
||||
raise ValueError("MySQL connector is missing credentials.")
|
||||
|
||||
self.connector.load_credentials(credentials)
|
||||
self.connector.validate_connector_settings()
|
||||
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]:
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
else:
|
||||
poll_start = task["poll_range_start"]
|
||||
document_generator = self.connector.poll_source(
|
||||
poll_start.timestamp(),
|
||||
datetime.now(timezone.utc).timestamp()
|
||||
)
|
||||
begin_info = f"from {poll_start}"
|
||||
|
||||
logging.info(f"[MySQL] Connect to {self.conf.get('host')}:{self.conf.get('database')} {begin_info}")
|
||||
return document_generator
|
||||
|
||||
|
||||
class PostgreSQL(SyncBase):
|
||||
SOURCE_NAME: str = FileSource.POSTGRESQL
|
||||
|
||||
async def _generate(self, task: dict):
|
||||
self.connector = RDBMSConnector(
|
||||
db_type="postgresql",
|
||||
host=self.conf.get("host", "localhost"),
|
||||
port=int(self.conf.get("port", 5432)),
|
||||
database=self.conf.get("database", ""),
|
||||
query=self.conf.get("query", ""),
|
||||
content_columns=self.conf.get("content_columns", ""),
|
||||
batch_size=self.conf.get("batch_size", INDEX_BATCH_SIZE),
|
||||
)
|
||||
|
||||
credentials = self.conf.get("credentials")
|
||||
if not credentials:
|
||||
raise ValueError("PostgreSQL connector is missing credentials.")
|
||||
|
||||
self.connector.load_credentials(credentials)
|
||||
self.connector.validate_connector_settings()
|
||||
|
||||
if task["reindex"] == "1" or not task["poll_range_start"]:
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
else:
|
||||
poll_start = task["poll_range_start"]
|
||||
document_generator = self.connector.poll_source(
|
||||
poll_start.timestamp(),
|
||||
datetime.now(timezone.utc).timestamp()
|
||||
)
|
||||
begin_info = f"from {poll_start}"
|
||||
|
||||
logging.info(f"[PostgreSQL] Connect to {self.conf.get('host')}:{self.conf.get('database')} {begin_info}")
|
||||
return document_generator
|
||||
|
||||
|
||||
func_factory = {
|
||||
FileSource.S3: S3,
|
||||
FileSource.R2: R2,
|
||||
@ -1238,7 +1312,9 @@ func_factory = {
|
||||
FileSource.GITHUB: Github,
|
||||
FileSource.GITLAB: Gitlab,
|
||||
FileSource.BITBUCKET: Bitbucket,
|
||||
FileSource.SEAFILE: SeaFile,
|
||||
FileSource.SEAFILE: SeaFile,
|
||||
FileSource.MYSQL: MySQL,
|
||||
FileSource.POSTGRESQL: PostgreSQL,
|
||||
}
|
||||
|
||||
|
||||
|
||||
2
uv.lock
generated
2
uv.lock
generated
@ -6174,6 +6174,7 @@ dependencies = [
|
||||
{ name = "mistralai" },
|
||||
{ name = "moodlepy" },
|
||||
{ name = "mypy-boto3-s3" },
|
||||
{ name = "mysql-connector-python" },
|
||||
{ name = "nest-asyncio" },
|
||||
{ name = "office365-rest-python-client" },
|
||||
{ name = "ollama" },
|
||||
@ -6309,6 +6310,7 @@ requires-dist = [
|
||||
{ name = "mistralai", specifier = "==0.4.2" },
|
||||
{ name = "moodlepy", specifier = ">=0.23.0" },
|
||||
{ name = "mypy-boto3-s3", specifier = "==1.40.26" },
|
||||
{ name = "mysql-connector-python", specifier = ">=9.0.0,<10.0.0" },
|
||||
{ name = "nest-asyncio", specifier = ">=1.6.0,<2.0.0" },
|
||||
{ name = "office365-rest-python-client", specifier = "==2.6.2" },
|
||||
{ name = "ollama", specifier = ">=0.5.0" },
|
||||
|
||||
3
web/src/assets/svg/data-source/mysql.svg
Normal file
3
web/src/assets/svg/data-source/mysql.svg
Normal file
@ -0,0 +1,3 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 128 128">
|
||||
<path fill="#00618A" d="M116.948 97.807c-6.863-.187-12.104.452-16.585 2.341-1.273.537-3.305.552-3.513 2.147.7.733.807 1.83 1.365 2.731 1.07 1.73 2.876 4.052 4.488 5.268 1.762 1.33 3.577 2.751 5.465 3.902 3.358 2.047 7.107 3.217 10.34 5.268 1.906 1.21 3.799 2.733 5.658 4.097.92.675 1.537 1.724 2.732 2.147v-.194c-.628-.79-.79-1.86-1.365-2.731l-2.537-2.537c-2.48-3.292-5.629-6.184-8.976-8.585-2.669-1.916-8.642-4.504-9.755-7.609l-.195-.195c1.892-.214 4.107-.898 5.854-1.367 2.934-.786 5.556-.583 8.585-1.365l4.097-1.171v-.78c-1.531-1.571-2.623-3.651-4.292-5.072-4.37-3.72-9.138-7.437-14.048-10.537-2.724-1.718-6.089-2.835-8.976-4.292-.971-.491-2.677-.746-3.318-1.562-1.517-1.932-2.342-4.382-3.511-6.633-2.449-4.717-4.854-9.868-7.024-14.831-1.48-3.384-2.447-6.72-4.292-9.756-8.85-14.567-18.396-23.358-33.169-32-3.144-1.838-6.929-2.563-10.929-3.513l-6.439-.391c-1.307-.547-2.666-2.149-3.902-2.927C17.811 4.565 5.257-2.16 1.633 6.682c-2.289 5.581 3.421 11.025 5.462 13.854 1.434 1.982 3.269 4.207 4.293 6.438.674 1.467.79 2.938 1.367 4.489 1.417 3.822 2.652 7.98 4.487 11.511.927 1.788 1.949 3.67 3.122 5.268.718.981 1.951 1.413 2.145 2.927-1.204 1.686-1.273 4.304-1.95 6.44-3.05 9.615-1.899 21.567 2.537 28.683 1.36 2.186 4.567 6.871 8.975 5.073 3.856-1.57 2.995-6.438 4.098-10.732.249-.973.096-1.689.585-2.341v.195l3.513 7.024c2.6 4.187 7.212 8.562 11.122 11.514 2.027 1.531 3.623 4.177 6.244 5.073v-.196h-.195c-.508-.791-1.303-1.119-1.951-1.755-1.527-1.497-3.225-3.358-4.487-5.073-3.556-4.827-6.698-10.11-9.561-15.609-1.368-2.627-2.557-5.523-3.709-8.196-.444-1.03-.438-2.589-1.364-3.122-1.263 1.958-3.122 3.542-4.098 5.854-1.561 3.696-1.762 8.204-2.341 12.878-.342.122-.19.038-.391.194-2.718-.655-3.672-3.452-4.683-5.853-2.554-6.07-3.029-15.842-.781-22.829.582-1.809 3.21-7.501 2.146-9.172-.508-1.666-2.184-2.63-3.121-3.903-1.161-1.574-2.319-3.646-3.124-5.464-2.09-4.731-3.066-10.044-5.267-14.828-1.053-2.287-2.832-4.602-4.293-6.634-1.617-2.253-3.429-3.912-4.683-6.635-.446-.968-1.051-2.518-.391-3.513.21-.671.507-.951 1.171-1.17 1.132-.873 4.284.29 5.462.779 3.129 1.3 5.741 2.538 8.392 4.294 1.271.84 2.557 2.471 4.097 2.926h1.756c2.747.631 5.824.195 8.391.975 4.536 1.378 8.601 3.523 12.292 5.854 11.246 7.102 20.442 17.21 26.732 29.269 1.012 1.942 1.45 3.794 2.341 5.854 1.798 4.153 4.063 8.426 5.852 12.488 1.786 4.052 3.526 8.141 6.05 11.513 1.327 1.772 6.451 2.723 8.781 3.708 1.632.689 4.307 1.409 5.854 2.34 2.953 1.782 5.815 3.903 8.586 5.855 1.383.975 5.64 3.116 5.852 4.879zM29.729 23.466c-1.431-.027-2.443.156-3.513.389v.195h.195c.683 1.402 1.888 2.306 2.731 3.513.65 1.367 1.301 2.732 1.952 4.097l.194-.193c1.209-.853 1.762-2.214 1.755-4.294-.484-.509-.555-1.147-.975-1.755-.556-.811-1.635-1.272-2.339-1.952z"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 2.7 KiB |
6
web/src/assets/svg/data-source/postgresql.svg
Normal file
6
web/src/assets/svg/data-source/postgresql.svg
Normal file
@ -0,0 +1,6 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 128 128">
|
||||
<path d="M93.809 92.112c.785-6.533.55-7.492 5.416-6.433l1.235.108c3.742.17 8.637-.602 11.513-1.938 6.191-2.873 9.861-7.668 3.758-6.409-13.924 2.873-14.881-1.842-14.881-1.842 14.703-21.815 20.849-49.508 15.543-56.287-14.47-18.489-39.517-9.746-39.936-9.52l-.134.025c-2.751-.571-5.83-.912-9.289-.968-6.301-.104-11.082 1.652-14.709 4.402 0 0-44.683-18.409-42.604 23.151.442 8.841 12.672 66.898 27.26 49.362 5.332-6.412 10.484-11.834 10.484-11.834 2.558 1.699 5.622 2.567 8.834 2.255l.249-.212c-.078.796-.044 1.575.099 2.497-3.757 4.199-2.653 4.936-10.166 6.482-7.602 1.566-3.136 4.355-.221 5.084 3.535.884 11.712 2.136 17.238-5.598l-.22.882c1.474 1.18 1.375 8.477 1.583 13.69.208 5.214.558 10.079 1.621 12.948 1.063 2.868 2.317 10.256 12.191 8.14 8.252-1.764 14.561-4.309 15.136-27.985" fill="#336791"/>
|
||||
<path d="M66.509 129.502c-.169-2.431-.311-4.739-.311-5.868 0-14.703.837-18.013 3.047-19.678 2.212-1.665 6.505-2.332 7.327-10.148.468-4.448-2.016-6.448-2.016-6.448s3.798-16.395 3.798-31.193c0-14.798-7.326-23.263-7.326-23.263s15.756-7.326 18.924-7.326c3.169 0 7.326 2.363 7.326 10.956 0 8.592-3.169 30.847-3.169 30.847s2.363 1.712 2.363 4.879c0 3.167-3.169 25.968-3.169 25.968s-7.326 3.169-7.326 10.495c0 3.975.837 7.326.837 7.326M45.607 104.059c-.418-3.921-.418-7.842-.418-7.842 0-7.326 3.169-18.088 3.169-18.088s-3.169-1.665-3.169-8.99c0-7.326 3.169-23.264 3.169-23.264s-7.327 0-10.956-3.629c-3.629-3.629-7.326-11.362-7.326-16.241 0-4.879 3.697-7.326 7.326-7.326 3.629 0 25.968 10.956 25.968 10.956" fill="none" stroke="#fff" stroke-width="4.8" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M93.809 92.112c.785-6.533.55-7.492 5.416-6.433l1.235.108c3.742.17 8.637-.602 11.513-1.938 6.191-2.873 9.861-7.668 3.758-6.409-13.924 2.873-14.881-1.842-14.881-1.842 14.703-21.815 20.849-49.508 15.543-56.287-14.47-18.489-39.517-9.746-39.936-9.52l-.134.025c-2.751-.571-5.83-.912-9.289-.968-6.301-.104-11.082 1.652-14.709 4.402 0 0-44.683-18.409-42.604 23.151.442 8.841 12.672 66.898 27.26 49.362 5.332-6.412 10.484-11.834 10.484-11.834 2.558 1.699 5.622 2.567 8.834 2.255l.249-.212c-.078.796-.044 1.575.099 2.497-3.757 4.199-2.653 4.936-10.166 6.482-7.602 1.566-3.136 4.355-.221 5.084 3.535.884 11.712 2.136 17.238-5.598l-.22.882c1.474 1.18 1.375 8.477 1.583 13.69.208 5.214.558 10.079 1.621 12.948 1.063 2.868 2.317 10.256 12.191 8.14 8.252-1.764 14.561-4.309 15.136-27.985" fill="none" stroke="#fff" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M60.087 89.226c-.157 7.853.256 15.735 1.231 19.469 1.503 5.751 4.696 9.168 13.295 7.36 6.739-1.418 9.246-4.622 10.76-9.553.853-2.781 1.435-16.308 1.595-22.48" fill="none" stroke="#fff" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 2.7 KiB |
@ -36,6 +36,8 @@ export enum DataSourceKey {
|
||||
BITBUCKET = 'bitbucket',
|
||||
ZENDESK = 'zendesk',
|
||||
SEAFILE = 'seafile',
|
||||
MYSQL = 'mysql',
|
||||
POSTGRESQL = 'postgresql',
|
||||
// SHAREPOINT = 'sharepoint',
|
||||
// SLACK = 'slack',
|
||||
// TEAMS = 'teams',
|
||||
@ -161,6 +163,16 @@ export const generateDataSourceInfo = (t: TFunction) => {
|
||||
description: t(`setting.${DataSourceKey.SEAFILE}Description`),
|
||||
icon: <SvgIcon name={'data-source/seafile'} width={38} />,
|
||||
},
|
||||
[DataSourceKey.MYSQL]: {
|
||||
name: 'MySQL',
|
||||
description: t(`setting.${DataSourceKey.MYSQL}Description`),
|
||||
icon: <SvgIcon name={'data-source/mysql'} width={38} />,
|
||||
},
|
||||
[DataSourceKey.POSTGRESQL]: {
|
||||
name: 'PostgreSQL',
|
||||
description: t(`setting.${DataSourceKey.POSTGRESQL}Description`),
|
||||
icon: <SvgIcon name={'data-source/postgresql'} width={38} />,
|
||||
},
|
||||
};
|
||||
};
|
||||
|
||||
@ -854,6 +866,106 @@ export const DataSourceFormFields = {
|
||||
tooltip: t('setting.seafileBatchSizeTip'),
|
||||
},
|
||||
],
|
||||
[DataSourceKey.MYSQL]: [
|
||||
{
|
||||
label: 'Host',
|
||||
name: 'config.host',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
placeholder: 'localhost',
|
||||
},
|
||||
{
|
||||
label: 'Port',
|
||||
name: 'config.port',
|
||||
type: FormFieldType.Number,
|
||||
required: true,
|
||||
placeholder: '3306',
|
||||
},
|
||||
{
|
||||
label: 'Database',
|
||||
name: 'config.database',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'Username',
|
||||
name: 'config.credentials.username',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'Password',
|
||||
name: 'config.credentials.password',
|
||||
type: FormFieldType.Password,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'SQL Query',
|
||||
name: 'config.query',
|
||||
type: FormFieldType.Textarea,
|
||||
required: false,
|
||||
placeholder: 'Leave empty to load all tables',
|
||||
tooltip: t('setting.mysqlQueryTip'),
|
||||
},
|
||||
{
|
||||
label: 'Content Columns',
|
||||
name: 'config.content_columns',
|
||||
type: FormFieldType.Text,
|
||||
required: false,
|
||||
placeholder: 'title,description,content',
|
||||
tooltip: t('setting.mysqlContentColumnsTip'),
|
||||
},
|
||||
],
|
||||
[DataSourceKey.POSTGRESQL]: [
|
||||
{
|
||||
label: 'Host',
|
||||
name: 'config.host',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
placeholder: 'localhost',
|
||||
},
|
||||
{
|
||||
label: 'Port',
|
||||
name: 'config.port',
|
||||
type: FormFieldType.Number,
|
||||
required: true,
|
||||
placeholder: '5432',
|
||||
},
|
||||
{
|
||||
label: 'Database',
|
||||
name: 'config.database',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'Username',
|
||||
name: 'config.credentials.username',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'Password',
|
||||
name: 'config.credentials.password',
|
||||
type: FormFieldType.Password,
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
label: 'SQL Query',
|
||||
name: 'config.query',
|
||||
type: FormFieldType.Textarea,
|
||||
required: false,
|
||||
placeholder: 'Leave empty to load all tables',
|
||||
tooltip: t('setting.postgresqlQueryTip'),
|
||||
},
|
||||
{
|
||||
label: 'Content Columns',
|
||||
name: 'config.content_columns',
|
||||
type: FormFieldType.Text,
|
||||
required: false,
|
||||
placeholder: 'title,description,content',
|
||||
tooltip: t('setting.postgresqlContentColumnsTip'),
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
export const DataSourceFormDefaultValues = {
|
||||
@ -1135,7 +1247,6 @@ export const DataSourceFormDefaultValues = {
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
[DataSourceKey.SEAFILE]: {
|
||||
name: '',
|
||||
source: DataSourceKey.SEAFILE,
|
||||
@ -1148,4 +1259,40 @@ export const DataSourceFormDefaultValues = {
|
||||
},
|
||||
},
|
||||
},
|
||||
[DataSourceKey.MYSQL]: {
|
||||
name: '',
|
||||
source: DataSourceKey.MYSQL,
|
||||
config: {
|
||||
host: 'localhost',
|
||||
port: 3306,
|
||||
database: '',
|
||||
query: '',
|
||||
content_columns: '',
|
||||
metadata_columns: '',
|
||||
id_column: '',
|
||||
timestamp_column: '',
|
||||
credentials: {
|
||||
username: '',
|
||||
password: '',
|
||||
},
|
||||
},
|
||||
},
|
||||
[DataSourceKey.POSTGRESQL]: {
|
||||
name: '',
|
||||
source: DataSourceKey.POSTGRESQL,
|
||||
config: {
|
||||
host: 'localhost',
|
||||
port: 5432,
|
||||
database: '',
|
||||
query: '',
|
||||
content_columns: '',
|
||||
metadata_columns: '',
|
||||
id_column: '',
|
||||
timestamp_column: '',
|
||||
credentials: {
|
||||
username: '',
|
||||
password: '',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user