mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat: add gmail connector (#11549)
### What problem does this PR solve? _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -28,8 +28,8 @@ from api.db import InputType
|
||||
from api.db.services.connector_service import ConnectorService, SyncLogsService
|
||||
from api.utils.api_utils import get_data_error_result, get_json_result, validate_request
|
||||
from common.constants import RetCode, TaskStatus
|
||||
from common.data_source.config import GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI, DocumentSource
|
||||
from common.data_source.google_util.constant import GOOGLE_DRIVE_WEB_OAUTH_POPUP_TEMPLATE, GOOGLE_SCOPES
|
||||
from common.data_source.config import GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI, GMAIL_WEB_OAUTH_REDIRECT_URI, DocumentSource
|
||||
from common.data_source.google_util.constant import GOOGLE_WEB_OAUTH_POPUP_TEMPLATE, GOOGLE_SCOPES
|
||||
from common.misc_utils import get_uuid
|
||||
from rag.utils.redis_conn import REDIS_CONN
|
||||
from api.apps import login_required, current_user
|
||||
@ -122,12 +122,30 @@ GOOGLE_WEB_FLOW_RESULT_PREFIX = "google_drive_web_flow_result"
|
||||
WEB_FLOW_TTL_SECS = 15 * 60
|
||||
|
||||
|
||||
def _web_state_cache_key(flow_id: str) -> str:
|
||||
return f"{GOOGLE_WEB_FLOW_STATE_PREFIX}:{flow_id}"
|
||||
def _web_state_cache_key(flow_id: str, source_type: str | None = None) -> str:
|
||||
"""Return Redis key for web OAuth state.
|
||||
|
||||
The default prefix keeps backward compatibility for Google Drive.
|
||||
When source_type == "gmail", a different prefix is used so that
|
||||
Drive/Gmail flows don't clash in Redis.
|
||||
"""
|
||||
if source_type == "gmail":
|
||||
prefix = "gmail_web_flow_state"
|
||||
else:
|
||||
prefix = GOOGLE_WEB_FLOW_STATE_PREFIX
|
||||
return f"{prefix}:{flow_id}"
|
||||
|
||||
|
||||
def _web_result_cache_key(flow_id: str) -> str:
|
||||
return f"{GOOGLE_WEB_FLOW_RESULT_PREFIX}:{flow_id}"
|
||||
def _web_result_cache_key(flow_id: str, source_type: str | None = None) -> str:
|
||||
"""Return Redis key for web OAuth result.
|
||||
|
||||
Mirrors _web_state_cache_key logic for result storage.
|
||||
"""
|
||||
if source_type == "gmail":
|
||||
prefix = "gmail_web_flow_result"
|
||||
else:
|
||||
prefix = GOOGLE_WEB_FLOW_RESULT_PREFIX
|
||||
return f"{prefix}:{flow_id}"
|
||||
|
||||
|
||||
def _load_credentials(payload: str | dict[str, Any]) -> dict[str, Any]:
|
||||
@ -146,19 +164,22 @@ def _get_web_client_config(credentials: dict[str, Any]) -> dict[str, Any]:
|
||||
return {"web": web_section}
|
||||
|
||||
|
||||
async def _render_web_oauth_popup(flow_id: str, success: bool, message: str):
|
||||
async def _render_web_oauth_popup(flow_id: str, success: bool, message: str, source="drive"):
|
||||
status = "success" if success else "error"
|
||||
auto_close = "window.close();" if success else ""
|
||||
escaped_message = escape(message)
|
||||
payload_json = json.dumps(
|
||||
{
|
||||
"type": "ragflow-google-drive-oauth",
|
||||
# TODO(google-oauth): include connector type (drive/gmail) in payload type if needed
|
||||
"type": f"ragflow-google-{source}-oauth",
|
||||
"status": status,
|
||||
"flowId": flow_id or "",
|
||||
"message": message,
|
||||
}
|
||||
)
|
||||
html = GOOGLE_DRIVE_WEB_OAUTH_POPUP_TEMPLATE.format(
|
||||
# TODO(google-oauth): title/heading/message may need to reflect drive/gmail based on cached type
|
||||
html = GOOGLE_WEB_OAUTH_POPUP_TEMPLATE.format(
|
||||
title=f"Google {source.capitalize()} Authorization",
|
||||
heading="Authorization complete" if success else "Authorization failed",
|
||||
message=escaped_message,
|
||||
payload_json=payload_json,
|
||||
@ -169,20 +190,33 @@ async def _render_web_oauth_popup(flow_id: str, success: bool, message: str):
|
||||
return response
|
||||
|
||||
|
||||
@manager.route("/google-drive/oauth/web/start", methods=["POST"]) # noqa: F821
|
||||
@manager.route("/google/oauth/web/start", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("credentials")
|
||||
async def start_google_drive_web_oauth():
|
||||
if not GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI:
|
||||
async def start_google_web_oauth():
|
||||
source = request.args.get("type", "google-drive")
|
||||
if source not in ("google-drive", "gmail"):
|
||||
return get_json_result(code=RetCode.ARGUMENT_ERROR, message="Invalid Google OAuth type.")
|
||||
|
||||
if source == "gmail":
|
||||
redirect_uri = GMAIL_WEB_OAUTH_REDIRECT_URI
|
||||
scopes = GOOGLE_SCOPES[DocumentSource.GMAIL]
|
||||
else:
|
||||
redirect_uri = GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI if source == "google-drive" else GMAIL_WEB_OAUTH_REDIRECT_URI
|
||||
scopes = GOOGLE_SCOPES[DocumentSource.GOOGLE_DRIVE if source == "google-drive" else DocumentSource.GMAIL]
|
||||
|
||||
if not redirect_uri:
|
||||
return get_json_result(
|
||||
code=RetCode.SERVER_ERROR,
|
||||
message="Google Drive OAuth redirect URI is not configured on the server.",
|
||||
message="Google OAuth redirect URI is not configured on the server.",
|
||||
)
|
||||
|
||||
req = await request.json or {}
|
||||
raw_credentials = req.get("credentials", "")
|
||||
|
||||
try:
|
||||
credentials = _load_credentials(raw_credentials)
|
||||
print(credentials)
|
||||
except ValueError as exc:
|
||||
return get_json_result(code=RetCode.ARGUMENT_ERROR, message=str(exc))
|
||||
|
||||
@ -199,8 +233,8 @@ async def start_google_drive_web_oauth():
|
||||
|
||||
flow_id = str(uuid.uuid4())
|
||||
try:
|
||||
flow = Flow.from_client_config(client_config, scopes=GOOGLE_SCOPES[DocumentSource.GOOGLE_DRIVE])
|
||||
flow.redirect_uri = GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI
|
||||
flow = Flow.from_client_config(client_config, scopes=scopes)
|
||||
flow.redirect_uri = redirect_uri
|
||||
authorization_url, _ = flow.authorization_url(
|
||||
access_type="offline",
|
||||
include_granted_scopes="true",
|
||||
@ -219,7 +253,7 @@ async def start_google_drive_web_oauth():
|
||||
"client_config": client_config,
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
REDIS_CONN.set_obj(_web_state_cache_key(flow_id), cache_payload, WEB_FLOW_TTL_SECS)
|
||||
REDIS_CONN.set_obj(_web_state_cache_key(flow_id, source), cache_payload, WEB_FLOW_TTL_SECS)
|
||||
|
||||
return get_json_result(
|
||||
data={
|
||||
@ -230,60 +264,122 @@ async def start_google_drive_web_oauth():
|
||||
)
|
||||
|
||||
|
||||
@manager.route("/google-drive/oauth/web/callback", methods=["GET"]) # noqa: F821
|
||||
async def google_drive_web_oauth_callback():
|
||||
@manager.route("/gmail/oauth/web/callback", methods=["GET"]) # noqa: F821
|
||||
async def google_gmail_web_oauth_callback():
|
||||
state_id = request.args.get("state")
|
||||
error = request.args.get("error")
|
||||
source = "gmail"
|
||||
if source != 'gmail':
|
||||
return await _render_web_oauth_popup("", False, "Invalid Google OAuth type.", source)
|
||||
|
||||
error_description = request.args.get("error_description") or error
|
||||
|
||||
if not state_id:
|
||||
return await _render_web_oauth_popup("", False, "Missing OAuth state parameter.")
|
||||
return await _render_web_oauth_popup("", False, "Missing OAuth state parameter.", source)
|
||||
|
||||
state_cache = REDIS_CONN.get(_web_state_cache_key(state_id))
|
||||
state_cache = REDIS_CONN.get(_web_state_cache_key(state_id, source))
|
||||
if not state_cache:
|
||||
return await _render_web_oauth_popup(state_id, False, "Authorization session expired. Please restart from the main window.")
|
||||
return await _render_web_oauth_popup(state_id, False, "Authorization session expired. Please restart from the main window.", source)
|
||||
|
||||
state_obj = json.loads(state_cache)
|
||||
client_config = state_obj.get("client_config")
|
||||
if not client_config:
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id))
|
||||
return await _render_web_oauth_popup(state_id, False, "Authorization session was invalid. Please retry.")
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||
return await _render_web_oauth_popup(state_id, False, "Authorization session was invalid. Please retry.", source)
|
||||
|
||||
if error:
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id))
|
||||
return await _render_web_oauth_popup(state_id, False, error_description or "Authorization was cancelled.")
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||
return await _render_web_oauth_popup(state_id, False, error_description or "Authorization was cancelled.", source)
|
||||
|
||||
code = request.args.get("code")
|
||||
if not code:
|
||||
return await _render_web_oauth_popup(state_id, False, "Missing authorization code from Google.")
|
||||
return await _render_web_oauth_popup(state_id, False, "Missing authorization code from Google.", source)
|
||||
|
||||
try:
|
||||
flow = Flow.from_client_config(client_config, scopes=GOOGLE_SCOPES[DocumentSource.GOOGLE_DRIVE])
|
||||
flow.redirect_uri = GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI
|
||||
# TODO(google-oauth): branch scopes/redirect_uri based on source_type (drive vs gmail)
|
||||
flow = Flow.from_client_config(client_config, scopes=GOOGLE_SCOPES[DocumentSource.GMAIL])
|
||||
flow.redirect_uri = GMAIL_WEB_OAUTH_REDIRECT_URI
|
||||
flow.fetch_token(code=code)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logging.exception("Failed to exchange Google OAuth code: %s", exc)
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id))
|
||||
return await _render_web_oauth_popup(state_id, False, "Failed to exchange tokens with Google. Please retry.")
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||
return await _render_web_oauth_popup(state_id, False, "Failed to exchange tokens with Google. Please retry.", source)
|
||||
|
||||
creds_json = flow.credentials.to_json()
|
||||
result_payload = {
|
||||
"user_id": state_obj.get("user_id"),
|
||||
"credentials": creds_json,
|
||||
}
|
||||
REDIS_CONN.set_obj(_web_result_cache_key(state_id), result_payload, WEB_FLOW_TTL_SECS)
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id))
|
||||
REDIS_CONN.set_obj(_web_result_cache_key(state_id, source), result_payload, WEB_FLOW_TTL_SECS)
|
||||
|
||||
return await _render_web_oauth_popup(state_id, True, "Authorization completed successfully.")
|
||||
print("\n\n", _web_result_cache_key(state_id, source), "\n\n")
|
||||
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||
|
||||
return await _render_web_oauth_popup(state_id, True, "Authorization completed successfully.", source)
|
||||
|
||||
|
||||
@manager.route("/google-drive/oauth/web/result", methods=["POST"]) # noqa: F821
|
||||
@manager.route("/google-drive/oauth/web/callback", methods=["GET"]) # noqa: F821
|
||||
async def google_drive_web_oauth_callback():
|
||||
state_id = request.args.get("state")
|
||||
error = request.args.get("error")
|
||||
source = "google-drive"
|
||||
if source not in ("google-drive", "gmail"):
|
||||
return await _render_web_oauth_popup("", False, "Invalid Google OAuth type.", source)
|
||||
|
||||
error_description = request.args.get("error_description") or error
|
||||
|
||||
if not state_id:
|
||||
return await _render_web_oauth_popup("", False, "Missing OAuth state parameter.", source)
|
||||
|
||||
state_cache = REDIS_CONN.get(_web_state_cache_key(state_id, source))
|
||||
if not state_cache:
|
||||
return await _render_web_oauth_popup(state_id, False, "Authorization session expired. Please restart from the main window.", source)
|
||||
|
||||
state_obj = json.loads(state_cache)
|
||||
client_config = state_obj.get("client_config")
|
||||
if not client_config:
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||
return await _render_web_oauth_popup(state_id, False, "Authorization session was invalid. Please retry.", source)
|
||||
|
||||
if error:
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||
return await _render_web_oauth_popup(state_id, False, error_description or "Authorization was cancelled.", source)
|
||||
|
||||
code = request.args.get("code")
|
||||
if not code:
|
||||
return await _render_web_oauth_popup(state_id, False, "Missing authorization code from Google.", source)
|
||||
|
||||
try:
|
||||
# TODO(google-oauth): branch scopes/redirect_uri based on source_type (drive vs gmail)
|
||||
flow = Flow.from_client_config(client_config, scopes=GOOGLE_SCOPES[DocumentSource.GOOGLE_DRIVE])
|
||||
flow.redirect_uri = GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI
|
||||
flow.fetch_token(code=code)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logging.exception("Failed to exchange Google OAuth code: %s", exc)
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||
return await _render_web_oauth_popup(state_id, False, "Failed to exchange tokens with Google. Please retry.", source)
|
||||
|
||||
creds_json = flow.credentials.to_json()
|
||||
result_payload = {
|
||||
"user_id": state_obj.get("user_id"),
|
||||
"credentials": creds_json,
|
||||
}
|
||||
REDIS_CONN.set_obj(_web_result_cache_key(state_id, source), result_payload, WEB_FLOW_TTL_SECS)
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||
|
||||
return await _render_web_oauth_popup(state_id, True, "Authorization completed successfully.", source)
|
||||
|
||||
@manager.route("/google/oauth/web/result", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("flow_id")
|
||||
async def poll_google_drive_web_result():
|
||||
async def poll_google_web_result():
|
||||
req = await request.json or {}
|
||||
source = request.args.get("type")
|
||||
if source not in ("google-drive", "gmail"):
|
||||
return get_json_result(code=RetCode.ARGUMENT_ERROR, message="Invalid Google OAuth type.")
|
||||
flow_id = req.get("flow_id")
|
||||
cache_raw = REDIS_CONN.get(_web_result_cache_key(flow_id))
|
||||
cache_raw = REDIS_CONN.get(_web_result_cache_key(flow_id, source))
|
||||
if not cache_raw:
|
||||
return get_json_result(code=RetCode.RUNNING, message="Authorization is still pending.")
|
||||
|
||||
@ -291,5 +387,5 @@ async def poll_google_drive_web_result():
|
||||
if result.get("user_id") != current_user.id:
|
||||
return get_json_result(code=RetCode.PERMISSION_ERROR, message="You are not allowed to access this authorization result.")
|
||||
|
||||
REDIS_CONN.delete(_web_result_cache_key(flow_id))
|
||||
REDIS_CONN.delete(_web_result_cache_key(flow_id, source))
|
||||
return get_json_result(data={"credentials": result.get("credentials")})
|
||||
|
||||
@ -121,8 +121,8 @@ async def login():
|
||||
response_data = user.to_json()
|
||||
user.access_token = get_uuid()
|
||||
login_user(user)
|
||||
user.update_time = (current_timestamp(),)
|
||||
user.update_date = (datetime_format(datetime.now()),)
|
||||
user.update_time = current_timestamp()
|
||||
user.update_date = datetime_format(datetime.now())
|
||||
user.save()
|
||||
msg = "Welcome back!"
|
||||
|
||||
@ -1002,8 +1002,8 @@ async def forget():
|
||||
# Auto login (reuse login flow)
|
||||
user.access_token = get_uuid()
|
||||
login_user(user)
|
||||
user.update_time = (current_timestamp(),)
|
||||
user.update_date = (datetime_format(datetime.now()),)
|
||||
user.update_time = current_timestamp()
|
||||
user.update_date = datetime_format(datetime.now())
|
||||
user.save()
|
||||
msg = "Password reset successful. Logged in."
|
||||
return construct_response(data=user.to_json(), auth=user.get_id(), message=msg)
|
||||
|
||||
@ -217,6 +217,7 @@ OAUTH_GOOGLE_DRIVE_CLIENT_SECRET = os.environ.get(
|
||||
"OAUTH_GOOGLE_DRIVE_CLIENT_SECRET", ""
|
||||
)
|
||||
GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI = os.environ.get("GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI", "http://localhost:9380/v1/connector/google-drive/oauth/web/callback")
|
||||
GMAIL_WEB_OAUTH_REDIRECT_URI = os.environ.get("GMAIL_WEB_OAUTH_REDIRECT_URI", "http://localhost:9380/v1/connector/gmail/oauth/web/callback")
|
||||
|
||||
CONFLUENCE_OAUTH_TOKEN_URL = "https://auth.atlassian.com/oauth/token"
|
||||
RATE_LIMIT_MESSAGE_LOWERCASE = "Rate limit exceeded".lower()
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import logging
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from google.oauth2.credentials import Credentials as OAuthCredentials
|
||||
from google.oauth2.service_account import Credentials as ServiceAccountCredentials
|
||||
from googleapiclient.errors import HttpError
|
||||
@ -9,10 +9,10 @@ from common.data_source.config import INDEX_BATCH_SIZE, SLIM_BATCH_SIZE, Documen
|
||||
from common.data_source.google_util.auth import get_google_creds
|
||||
from common.data_source.google_util.constant import DB_CREDENTIALS_PRIMARY_ADMIN_KEY, MISSING_SCOPES_ERROR_STR, SCOPE_INSTRUCTIONS, USER_FIELDS
|
||||
from common.data_source.google_util.resource import get_admin_service, get_gmail_service
|
||||
from common.data_source.google_util.util import _execute_single_retrieval, execute_paginated_retrieval
|
||||
from common.data_source.google_util.util import _execute_single_retrieval, execute_paginated_retrieval, sanitize_filename, clean_string
|
||||
from common.data_source.interfaces import LoadConnector, PollConnector, SecondsSinceUnixEpoch, SlimConnectorWithPermSync
|
||||
from common.data_source.models import BasicExpertInfo, Document, ExternalAccess, GenerateDocumentsOutput, GenerateSlimDocumentOutput, SlimDocument, TextSection
|
||||
from common.data_source.utils import build_time_range_query, clean_email_and_extract_name, get_message_body, is_mail_service_disabled_error, time_str_to_utc
|
||||
from common.data_source.utils import build_time_range_query, clean_email_and_extract_name, get_message_body, is_mail_service_disabled_error, gmail_time_str_to_utc
|
||||
|
||||
# Constants for Gmail API fields
|
||||
THREAD_LIST_FIELDS = "nextPageToken, threads(id)"
|
||||
@ -67,7 +67,6 @@ def message_to_section(message: dict[str, Any]) -> tuple[TextSection, dict[str,
|
||||
message_data += f"{name}: {value}\n"
|
||||
|
||||
message_body_text: str = get_message_body(payload)
|
||||
|
||||
return TextSection(link=link, text=message_body_text + message_data), metadata
|
||||
|
||||
|
||||
@ -97,13 +96,15 @@ def thread_to_document(full_thread: dict[str, Any], email_used_to_fetch_thread:
|
||||
|
||||
if not semantic_identifier:
|
||||
semantic_identifier = message_metadata.get("subject", "")
|
||||
semantic_identifier = clean_string(semantic_identifier)
|
||||
semantic_identifier = sanitize_filename(semantic_identifier)
|
||||
|
||||
if message_metadata.get("updated_at"):
|
||||
updated_at = message_metadata.get("updated_at")
|
||||
|
||||
updated_at_datetime = None
|
||||
if updated_at:
|
||||
updated_at_datetime = time_str_to_utc(updated_at)
|
||||
updated_at_datetime = gmail_time_str_to_utc(updated_at)
|
||||
|
||||
thread_id = full_thread.get("id")
|
||||
if not thread_id:
|
||||
@ -115,15 +116,24 @@ def thread_to_document(full_thread: dict[str, Any], email_used_to_fetch_thread:
|
||||
if not semantic_identifier:
|
||||
semantic_identifier = "(no subject)"
|
||||
|
||||
combined_sections = "\n\n".join(
|
||||
sec.text for sec in sections if hasattr(sec, "text")
|
||||
)
|
||||
blob = combined_sections
|
||||
size_bytes = len(blob)
|
||||
extension = '.txt'
|
||||
|
||||
return Document(
|
||||
id=thread_id,
|
||||
semantic_identifier=semantic_identifier,
|
||||
sections=sections,
|
||||
blob=blob,
|
||||
size_bytes=size_bytes,
|
||||
extension=extension,
|
||||
source=DocumentSource.GMAIL,
|
||||
primary_owners=primary_owners,
|
||||
secondary_owners=secondary_owners,
|
||||
doc_updated_at=updated_at_datetime,
|
||||
metadata={},
|
||||
metadata=message_metadata,
|
||||
external_access=ExternalAccess(
|
||||
external_user_emails={email_used_to_fetch_thread},
|
||||
external_user_group_ids=set(),
|
||||
@ -214,15 +224,13 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
q=query,
|
||||
continue_on_404_or_403=True,
|
||||
):
|
||||
full_threads = _execute_single_retrieval(
|
||||
full_thread = _execute_single_retrieval(
|
||||
retrieval_function=gmail_service.users().threads().get,
|
||||
list_key=None,
|
||||
userId=user_email,
|
||||
fields=THREAD_FIELDS,
|
||||
id=thread["id"],
|
||||
continue_on_404_or_403=True,
|
||||
)
|
||||
full_thread = list(full_threads)[0]
|
||||
doc = thread_to_document(full_thread, user_email)
|
||||
if doc is None:
|
||||
continue
|
||||
@ -310,4 +318,30 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pass
|
||||
import time
|
||||
import os
|
||||
from common.data_source.google_util.util import get_credentials_from_env
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
try:
|
||||
email = os.environ.get("GMAIL_TEST_EMAIL", "newyorkupperbay@gmail.com")
|
||||
creds = get_credentials_from_env(email, oauth=True, source="gmail")
|
||||
print("Credentials loaded successfully")
|
||||
print(f"{creds=}")
|
||||
|
||||
connector = GmailConnector(batch_size=2)
|
||||
print("GmailConnector initialized")
|
||||
connector.load_credentials(creds)
|
||||
print("Credentials loaded into connector")
|
||||
|
||||
print("Gmail is ready to use")
|
||||
|
||||
for file in connector._fetch_threads(
|
||||
int(time.time()) - 1 * 24 * 60 * 60,
|
||||
int(time.time()),
|
||||
):
|
||||
print("new batch","-"*80)
|
||||
for f in file:
|
||||
print(f)
|
||||
print("\n\n")
|
||||
except Exception as e:
|
||||
logging.exception(f"Error loading credentials: {e}")
|
||||
@ -1,7 +1,6 @@
|
||||
"""Google Drive connector"""
|
||||
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
@ -32,7 +31,6 @@ from common.data_source.google_drive.file_retrieval import (
|
||||
from common.data_source.google_drive.model import DriveRetrievalStage, GoogleDriveCheckpoint, GoogleDriveFileType, RetrievedDriveFile, StageCompletion
|
||||
from common.data_source.google_util.auth import get_google_creds
|
||||
from common.data_source.google_util.constant import DB_CREDENTIALS_PRIMARY_ADMIN_KEY, MISSING_SCOPES_ERROR_STR, USER_FIELDS
|
||||
from common.data_source.google_util.oauth_flow import ensure_oauth_token_dict
|
||||
from common.data_source.google_util.resource import GoogleDriveService, get_admin_service, get_drive_service
|
||||
from common.data_source.google_util.util import GoogleFields, execute_paginated_retrieval, get_file_owners
|
||||
from common.data_source.google_util.util_threadpool_concurrency import ThreadSafeDict
|
||||
@ -1138,39 +1136,6 @@ class GoogleDriveConnector(SlimConnectorWithPermSync, CheckpointedConnectorWithP
|
||||
return GoogleDriveCheckpoint.model_validate_json(checkpoint_json)
|
||||
|
||||
|
||||
def get_credentials_from_env(email: str, oauth: bool = False) -> dict:
|
||||
try:
|
||||
if oauth:
|
||||
raw_credential_string = os.environ["GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR"]
|
||||
else:
|
||||
raw_credential_string = os.environ["GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR"]
|
||||
except KeyError:
|
||||
raise ValueError("Missing Google Drive credentials in environment variables")
|
||||
|
||||
try:
|
||||
credential_dict = json.loads(raw_credential_string)
|
||||
except json.JSONDecodeError:
|
||||
raise ValueError("Invalid JSON in Google Drive credentials")
|
||||
|
||||
if oauth:
|
||||
credential_dict = ensure_oauth_token_dict(credential_dict, DocumentSource.GOOGLE_DRIVE)
|
||||
|
||||
refried_credential_string = json.dumps(credential_dict)
|
||||
|
||||
DB_CREDENTIALS_DICT_TOKEN_KEY = "google_tokens"
|
||||
DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "google_service_account_key"
|
||||
DB_CREDENTIALS_PRIMARY_ADMIN_KEY = "google_primary_admin"
|
||||
DB_CREDENTIALS_AUTHENTICATION_METHOD = "authentication_method"
|
||||
|
||||
cred_key = DB_CREDENTIALS_DICT_TOKEN_KEY if oauth else DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY
|
||||
|
||||
return {
|
||||
cred_key: refried_credential_string,
|
||||
DB_CREDENTIALS_PRIMARY_ADMIN_KEY: email,
|
||||
DB_CREDENTIALS_AUTHENTICATION_METHOD: "uploaded",
|
||||
}
|
||||
|
||||
|
||||
class CheckpointOutputWrapper:
|
||||
"""
|
||||
Wraps a CheckpointOutput generator to give things back in a more digestible format.
|
||||
@ -1236,7 +1201,7 @@ def yield_all_docs_from_checkpoint_connector(
|
||||
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
|
||||
from common.data_source.google_util.util import get_credentials_from_env
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
try:
|
||||
@ -1245,7 +1210,7 @@ if __name__ == "__main__":
|
||||
creds = get_credentials_from_env(email, oauth=True)
|
||||
print("Credentials loaded successfully")
|
||||
print(f"{creds=}")
|
||||
|
||||
sys.exit(0)
|
||||
connector = GoogleDriveConnector(
|
||||
include_shared_drives=False,
|
||||
shared_drive_urls=None,
|
||||
|
||||
@ -49,11 +49,11 @@ MISSING_SCOPES_ERROR_STR = "client not authorized for any of the scopes requeste
|
||||
SCOPE_INSTRUCTIONS = ""
|
||||
|
||||
|
||||
GOOGLE_DRIVE_WEB_OAUTH_POPUP_TEMPLATE = """<!DOCTYPE html>
|
||||
GOOGLE_WEB_OAUTH_POPUP_TEMPLATE = """<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Google Drive Authorization</title>
|
||||
<title>{title}</title>
|
||||
<style>
|
||||
body {{
|
||||
font-family: Arial, sans-serif;
|
||||
|
||||
@ -1,12 +1,17 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
from collections.abc import Callable, Iterator
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
import unicodedata
|
||||
from googleapiclient.errors import HttpError # type: ignore # type: ignore
|
||||
|
||||
from common.data_source.config import DocumentSource
|
||||
from common.data_source.google_drive.model import GoogleDriveFileType
|
||||
from common.data_source.google_util.oauth_flow import ensure_oauth_token_dict
|
||||
|
||||
|
||||
# See https://developers.google.com/drive/api/reference/rest/v3/files/list for more
|
||||
@ -117,6 +122,7 @@ def _execute_single_retrieval(
|
||||
"""Execute a single retrieval from Google Drive API"""
|
||||
try:
|
||||
results = retrieval_function(**request_kwargs).execute()
|
||||
|
||||
except HttpError as e:
|
||||
if e.resp.status >= 500:
|
||||
results = retrieval_function()
|
||||
@ -148,5 +154,110 @@ def _execute_single_retrieval(
|
||||
error,
|
||||
)
|
||||
results = retrieval_function()
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def get_credentials_from_env(email: str, oauth: bool = False, source="drive") -> dict:
|
||||
try:
|
||||
if oauth:
|
||||
raw_credential_string = os.environ["GOOGLE_OAUTH_CREDENTIALS_JSON_STR"]
|
||||
else:
|
||||
raw_credential_string = os.environ["GOOGLE_SERVICE_ACCOUNT_JSON_STR"]
|
||||
except KeyError:
|
||||
raise ValueError("Missing Google Drive credentials in environment variables")
|
||||
|
||||
try:
|
||||
credential_dict = json.loads(raw_credential_string)
|
||||
except json.JSONDecodeError:
|
||||
raise ValueError("Invalid JSON in Google Drive credentials")
|
||||
|
||||
if oauth and source == "drive":
|
||||
credential_dict = ensure_oauth_token_dict(credential_dict, DocumentSource.GOOGLE_DRIVE)
|
||||
else:
|
||||
credential_dict = ensure_oauth_token_dict(credential_dict, DocumentSource.GMAIL)
|
||||
|
||||
refried_credential_string = json.dumps(credential_dict)
|
||||
|
||||
DB_CREDENTIALS_DICT_TOKEN_KEY = "google_tokens"
|
||||
DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "google_service_account_key"
|
||||
DB_CREDENTIALS_PRIMARY_ADMIN_KEY = "google_primary_admin"
|
||||
DB_CREDENTIALS_AUTHENTICATION_METHOD = "authentication_method"
|
||||
|
||||
cred_key = DB_CREDENTIALS_DICT_TOKEN_KEY if oauth else DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY
|
||||
|
||||
return {
|
||||
cred_key: refried_credential_string,
|
||||
DB_CREDENTIALS_PRIMARY_ADMIN_KEY: email,
|
||||
DB_CREDENTIALS_AUTHENTICATION_METHOD: "uploaded",
|
||||
}
|
||||
|
||||
def sanitize_filename(name: str) -> str:
|
||||
"""
|
||||
Soft sanitize for MinIO/S3:
|
||||
- Replace only prohibited characters with a space.
|
||||
- Preserve readability (no ugly underscores).
|
||||
- Collapse multiple spaces.
|
||||
"""
|
||||
if name is None:
|
||||
return "file.txt"
|
||||
|
||||
name = str(name).strip()
|
||||
|
||||
# Characters that MUST NOT appear in S3/MinIO object keys
|
||||
# Replace them with a space (not underscore)
|
||||
forbidden = r'[\\\?\#\%\*\:\|\<\>"]'
|
||||
name = re.sub(forbidden, " ", name)
|
||||
|
||||
# Replace slashes "/" (S3 interprets as folder) with space
|
||||
name = name.replace("/", " ")
|
||||
|
||||
# Collapse multiple spaces into one
|
||||
name = re.sub(r"\s+", " ", name)
|
||||
|
||||
# Trim both ends
|
||||
name = name.strip()
|
||||
|
||||
# Enforce reasonable max length
|
||||
if len(name) > 200:
|
||||
base, ext = os.path.splitext(name)
|
||||
name = base[:180].rstrip() + ext
|
||||
|
||||
# Ensure there is an extension (your original logic)
|
||||
if not os.path.splitext(name)[1]:
|
||||
name += ".txt"
|
||||
|
||||
return name
|
||||
|
||||
|
||||
def clean_string(text: str | None) -> str | None:
|
||||
"""
|
||||
Clean a string to make it safe for insertion into MySQL (utf8mb4).
|
||||
- Normalize Unicode
|
||||
- Remove control characters / zero-width characters
|
||||
- Optionally remove high-plane emoji and symbols
|
||||
"""
|
||||
if text is None:
|
||||
return None
|
||||
|
||||
# 0. Ensure the value is a string
|
||||
text = str(text)
|
||||
|
||||
# 1. Normalize Unicode (NFC)
|
||||
text = unicodedata.normalize("NFC", text)
|
||||
|
||||
# 2. Remove ASCII control characters (except tab, newline, carriage return)
|
||||
text = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", text)
|
||||
|
||||
# 3. Remove zero-width characters / BOM
|
||||
text = re.sub(r"[\u200b-\u200d\uFEFF]", "", text)
|
||||
|
||||
# 4. Remove high Unicode characters (emoji, special symbols)
|
||||
text = re.sub(r"[\U00010000-\U0010FFFF]", "", text)
|
||||
|
||||
# 5. Final fallback: strip any invalid UTF-8 sequences
|
||||
try:
|
||||
text.encode("utf-8")
|
||||
except UnicodeEncodeError:
|
||||
text = text.encode("utf-8", errors="ignore").decode("utf-8")
|
||||
|
||||
return text
|
||||
@ -30,7 +30,6 @@ class LoadConnector(ABC):
|
||||
"""Load documents from state"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def validate_connector_settings(self) -> None:
|
||||
"""Validate connector settings"""
|
||||
pass
|
||||
|
||||
@ -733,7 +733,7 @@ def build_time_range_query(
|
||||
"""Build time range query for Gmail API"""
|
||||
query = ""
|
||||
if time_range_start is not None and time_range_start != 0:
|
||||
query += f"after:{int(time_range_start)}"
|
||||
query += f"after:{int(time_range_start) + 1}"
|
||||
if time_range_end is not None and time_range_end != 0:
|
||||
query += f" before:{int(time_range_end)}"
|
||||
query = query.strip()
|
||||
@ -778,6 +778,15 @@ def time_str_to_utc(time_str: str):
|
||||
return datetime.fromisoformat(time_str.replace("Z", "+00:00"))
|
||||
|
||||
|
||||
def gmail_time_str_to_utc(time_str: str):
|
||||
"""Convert Gmail RFC 2822 time string to UTC."""
|
||||
from email.utils import parsedate_to_datetime
|
||||
from datetime import timezone
|
||||
|
||||
dt = parsedate_to_datetime(time_str)
|
||||
return dt.astimezone(timezone.utc)
|
||||
|
||||
|
||||
# Notion Utilities
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
@ -41,6 +41,7 @@ from common.data_source import BlobStorageConnector, NotionConnector, DiscordCon
|
||||
from common.constants import FileSource, TaskStatus
|
||||
from common.data_source.config import INDEX_BATCH_SIZE
|
||||
from common.data_source.confluence_connector import ConfluenceConnector
|
||||
from common.data_source.gmail_connector import GmailConnector
|
||||
from common.data_source.interfaces import CheckpointOutputWrapper
|
||||
from common.data_source.utils import load_all_docs_from_checkpoint_connector
|
||||
from common.log_utils import init_root_logger
|
||||
@ -230,7 +231,64 @@ class Gmail(SyncBase):
|
||||
SOURCE_NAME: str = FileSource.GMAIL
|
||||
|
||||
async def _generate(self, task: dict):
|
||||
pass
|
||||
# Gmail sync reuses the generic LoadConnector/PollConnector interface
|
||||
# implemented by common.data_source.gmail_connector.GmailConnector.
|
||||
#
|
||||
# Config expectations (self.conf):
|
||||
# credentials: Gmail / Workspace OAuth JSON (with primary admin email)
|
||||
# batch_size: optional, defaults to INDEX_BATCH_SIZE
|
||||
batch_size = self.conf.get("batch_size", INDEX_BATCH_SIZE)
|
||||
|
||||
self.connector = GmailConnector(batch_size=batch_size)
|
||||
|
||||
credentials = self.conf.get("credentials")
|
||||
if not credentials:
|
||||
raise ValueError("Gmail connector is missing credentials.")
|
||||
|
||||
new_credentials = self.connector.load_credentials(credentials)
|
||||
if new_credentials:
|
||||
# Persist rotated / refreshed credentials back to connector config
|
||||
try:
|
||||
updated_conf = copy.deepcopy(self.conf)
|
||||
updated_conf["credentials"] = new_credentials
|
||||
ConnectorService.update_by_id(task["connector_id"], {"config": updated_conf})
|
||||
self.conf = updated_conf
|
||||
logging.info(
|
||||
"Persisted refreshed Gmail credentials for connector %s",
|
||||
task["connector_id"],
|
||||
)
|
||||
except Exception:
|
||||
logging.exception(
|
||||
"Failed to persist refreshed Gmail credentials for connector %s",
|
||||
task["connector_id"],
|
||||
)
|
||||
|
||||
# Decide between full reindex and incremental polling by time range.
|
||||
if task["reindex"] == "1" or not task.get("poll_range_start"):
|
||||
start_time = None
|
||||
end_time = None
|
||||
begin_info = "totally"
|
||||
document_generator = self.connector.load_from_state()
|
||||
else:
|
||||
poll_start = task["poll_range_start"]
|
||||
# Defensive: if poll_start is somehow None, fall back to full load
|
||||
if poll_start is None:
|
||||
start_time = None
|
||||
end_time = None
|
||||
begin_info = "totally"
|
||||
document_generator = self.connector.load_from_state()
|
||||
else:
|
||||
start_time = poll_start.timestamp()
|
||||
end_time = datetime.now(timezone.utc).timestamp()
|
||||
begin_info = f"from {poll_start}"
|
||||
document_generator = self.connector.poll_source(start_time, end_time)
|
||||
|
||||
try:
|
||||
admin_email = self.connector.primary_admin_email
|
||||
except RuntimeError:
|
||||
admin_email = "unknown"
|
||||
logging.info(f"Connect to Gmail as {admin_email} {begin_info}")
|
||||
return document_generator
|
||||
|
||||
|
||||
class Dropbox(SyncBase):
|
||||
|
||||
7
web/src/assets/svg/data-source/gmail.svg
Normal file
7
web/src/assets/svg/data-source/gmail.svg
Normal file
@ -0,0 +1,7 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="52 42 88 66">
|
||||
<path fill="#4285f4" d="M58 108h14V74L52 59v43c0 3.32 2.69 6 6 6"/>
|
||||
<path fill="#34a853" d="M120 108h14c3.32 0 6-2.69 6-6V59l-20 15"/>
|
||||
<path fill="#fbbc04" d="M120 48v26l20-15v-8c0-7.42-8.47-11.65-14.4-7.2"/>
|
||||
<path fill="#ea4335" d="M72 74V48l24 18 24-18v26L96 92"/>
|
||||
<path fill="#c5221f" d="M52 51v8l20 15V48l-5.6-4.2c-5.94-4.45-14.4-.22-14.4 7.2"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 419 B |
@ -739,6 +739,7 @@ Example: Virtual Hosted Style`,
|
||||
'Sync pages and databases from Notion for knowledge retrieval.',
|
||||
google_driveDescription:
|
||||
'Connect your Google Drive via OAuth and sync specific folders or drives.',
|
||||
gmailDescription: 'Connect your Gmail via OAuth to sync emails.',
|
||||
webdavDescription: 'Connect to WebDAV servers to sync files.',
|
||||
webdavRemotePathTip:
|
||||
'Optional: Specify a folder path on the WebDAV server (e.g., /Documents). Leave empty to sync from root.',
|
||||
@ -750,6 +751,10 @@ Example: Virtual Hosted Style`,
|
||||
'Comma-separated emails whose "My Drive" contents should be indexed (include the primary admin).',
|
||||
google_driveSharedFoldersTip:
|
||||
'Comma-separated Google Drive folder links to crawl.',
|
||||
gmailPrimaryAdminTip:
|
||||
'Primary admin email with Gmail / Workspace access, used to enumerate domain users and as the default sync account.',
|
||||
gmailTokenTip:
|
||||
'Upload the OAuth JSON generated from Google Console. If it only contains client credentials, run the browser-based verification once to mint long-lived refresh tokens.',
|
||||
dropboxDescription:
|
||||
'Connect your Dropbox to sync files and folders from a chosen account.',
|
||||
dropboxAccessTokenTip:
|
||||
|
||||
@ -736,6 +736,8 @@ export default {
|
||||
'Синхронизируйте страницы и базы данных из Notion для извлечения знаний.',
|
||||
google_driveDescription:
|
||||
'Подключите ваш Google Drive через OAuth и синхронизируйте определенные папки или диски.',
|
||||
gmailDescription:
|
||||
'Подключите ваш Gmail / Google Workspace аккаунт для синхронизации писем и их метаданных, чтобы построить корпоративную почтовую базу знаний и поиск с учетом прав доступа.',
|
||||
google_driveTokenTip:
|
||||
'Загрузите JSON токена OAuth, сгенерированный из помощника OAuth или Google Cloud Console. Вы также можете загрузить client_secret JSON из "установленного" или "веб" приложения. Если это ваша первая синхронизация, откроется окно браузера для завершения согласия OAuth. Если JSON уже содержит токен обновления, он будет автоматически повторно использован.',
|
||||
google_drivePrimaryAdminTip:
|
||||
@ -744,6 +746,10 @@ export default {
|
||||
'Электронные почты через запятую, чье содержимое "Мой диск" должно индексироваться (включите основного администратора).',
|
||||
google_driveSharedFoldersTip:
|
||||
'Ссылки на папки Google Drive через запятую для обхода.',
|
||||
gmailPrimaryAdminTip:
|
||||
'Основной административный email с доступом к Gmail / Workspace, используется для перечисления пользователей домена и как аккаунт синхронизации по умолчанию.',
|
||||
gmailTokenTip:
|
||||
'Загрузите OAuth JSON, сгенерированный в Google Console. Если он содержит только учетные данные клиента, выполните одноразовое подтверждение в браузере, чтобы получить долгоживущие токены обновления.',
|
||||
jiraDescription:
|
||||
'Подключите ваше рабочее пространство Jira для синхронизации задач, комментариев и вложений.',
|
||||
jiraBaseUrlTip:
|
||||
|
||||
@ -718,6 +718,7 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
notionDescription: ' 同步 Notion 页面与数据库,用于知识检索。',
|
||||
google_driveDescription:
|
||||
'通过 OAuth 连接 Google Drive,并同步指定的文件夹或云端硬盘。',
|
||||
gmailDescription: '通过 OAuth 连接 Gmail,用于同步邮件。',
|
||||
google_driveTokenTip:
|
||||
'请上传由 OAuth helper 或 Google Cloud Console 导出的 OAuth token JSON。也支持上传 “installed” 或 “web” 类型的 client_secret JSON。若为首次同步,将自动弹出浏览器完成 OAuth 授权流程;如果该 JSON 已包含 refresh token,将会被自动复用。',
|
||||
google_drivePrimaryAdminTip: '拥有相应 Drive 访问权限的管理员邮箱。',
|
||||
@ -725,6 +726,10 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
'需要索引其 “我的云端硬盘” 的邮箱,多个邮箱用逗号分隔(建议包含管理员)。',
|
||||
google_driveSharedFoldersTip:
|
||||
'需要同步的 Google Drive 文件夹链接,多个链接用逗号分隔。',
|
||||
gmailPrimaryAdminTip:
|
||||
'拥有 Gmail / Workspace 访问权限的主要管理员邮箱,用于列出域内用户并作为默认同步账号。',
|
||||
gmailTokenTip:
|
||||
'请上传由 Google Console 生成的 OAuth JSON。如果仅包含 client credentials,请通过浏览器授权一次以获取长期有效的刷新 Token。',
|
||||
dropboxDescription: '连接 Dropbox,同步指定账号下的文件与文件夹。',
|
||||
dropboxAccessTokenTip:
|
||||
'请在 Dropbox App Console 生成 Access Token,并勾选 files.metadata.read、files.content.read、sharing.read 等必要权限。',
|
||||
|
||||
@ -47,6 +47,7 @@ const AddDataSourceModal = ({
|
||||
}
|
||||
open={visible || false}
|
||||
onOpenChange={(open) => !open && hideModal?.()}
|
||||
maskClosable={false}
|
||||
// onOk={() => handleOk()}
|
||||
okText={t('common.confirm')}
|
||||
cancelText={t('common.cancel')}
|
||||
|
||||
@ -0,0 +1,391 @@
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
|
||||
import { FileUploader } from '@/components/file-uploader';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogDescription,
|
||||
DialogFooter,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
} from '@/components/ui/dialog';
|
||||
import message from '@/components/ui/message';
|
||||
import { FileMimeType } from '@/constants/common';
|
||||
import {
|
||||
pollGmailWebAuthResult,
|
||||
startGmailWebAuth,
|
||||
} from '@/services/data-source-service';
|
||||
import { Loader2 } from 'lucide-react';
|
||||
|
||||
export type GmailTokenFieldProps = {
|
||||
value?: string;
|
||||
onChange: (value: any) => void;
|
||||
placeholder?: string;
|
||||
};
|
||||
|
||||
const credentialHasRefreshToken = (content: string) => {
|
||||
try {
|
||||
const parsed = JSON.parse(content);
|
||||
return Boolean(parsed?.refresh_token);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
const describeCredentials = (content?: string) => {
|
||||
if (!content) return '';
|
||||
try {
|
||||
const parsed = JSON.parse(content);
|
||||
if (parsed?.refresh_token) {
|
||||
return 'Uploaded OAuth tokens with a refresh token.';
|
||||
}
|
||||
if (parsed?.installed || parsed?.web) {
|
||||
return 'Client credentials detected. Complete verification to mint long-lived tokens.';
|
||||
}
|
||||
return 'Stored Google credential JSON.';
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
};
|
||||
|
||||
const GmailTokenField = ({
|
||||
value,
|
||||
onChange,
|
||||
placeholder,
|
||||
}: GmailTokenFieldProps) => {
|
||||
const [files, setFiles] = useState<File[]>([]);
|
||||
const [pendingCredentials, setPendingCredentials] = useState<string>('');
|
||||
const [dialogOpen, setDialogOpen] = useState(false);
|
||||
const [webAuthLoading, setWebAuthLoading] = useState(false);
|
||||
const [webFlowId, setWebFlowId] = useState<string | null>(null);
|
||||
const [webStatus, setWebStatus] = useState<
|
||||
'idle' | 'waiting' | 'success' | 'error'
|
||||
>('idle');
|
||||
const [webStatusMessage, setWebStatusMessage] = useState('');
|
||||
const webFlowIdRef = useRef<string | null>(null);
|
||||
const webPollTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
|
||||
const clearWebState = useCallback(() => {
|
||||
if (webPollTimerRef.current) {
|
||||
clearTimeout(webPollTimerRef.current);
|
||||
webPollTimerRef.current = null;
|
||||
}
|
||||
webFlowIdRef.current = null;
|
||||
setWebFlowId(null);
|
||||
setWebStatus('idle');
|
||||
setWebStatusMessage('');
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (webPollTimerRef.current) {
|
||||
clearTimeout(webPollTimerRef.current);
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
webFlowIdRef.current = webFlowId;
|
||||
}, [webFlowId]);
|
||||
|
||||
const credentialSummary = useMemo(() => describeCredentials(value), [value]);
|
||||
const hasVerifiedTokens = useMemo(
|
||||
() => Boolean(value && credentialHasRefreshToken(value)),
|
||||
[value],
|
||||
);
|
||||
const hasUploadedButUnverified = useMemo(
|
||||
() => Boolean(value && !hasVerifiedTokens),
|
||||
[hasVerifiedTokens, value],
|
||||
);
|
||||
|
||||
const resetDialog = useCallback(
|
||||
(shouldResetState: boolean) => {
|
||||
setDialogOpen(false);
|
||||
clearWebState();
|
||||
if (shouldResetState) {
|
||||
setPendingCredentials('');
|
||||
setFiles([]);
|
||||
}
|
||||
},
|
||||
[clearWebState],
|
||||
);
|
||||
|
||||
const fetchWebResult = useCallback(
|
||||
async (flowId: string) => {
|
||||
try {
|
||||
const { data } = await pollGmailWebAuthResult({
|
||||
flow_id: flowId,
|
||||
});
|
||||
if (data.code === 0 && data.data?.credentials) {
|
||||
onChange(data.data.credentials);
|
||||
setPendingCredentials('');
|
||||
message.success('Gmail credentials verified.');
|
||||
resetDialog(false);
|
||||
return;
|
||||
}
|
||||
if (data.code === 106) {
|
||||
setWebStatus('waiting');
|
||||
setWebStatusMessage('Authorization confirmed. Finalizing tokens...');
|
||||
if (webPollTimerRef.current) {
|
||||
clearTimeout(webPollTimerRef.current);
|
||||
}
|
||||
webPollTimerRef.current = setTimeout(
|
||||
() => fetchWebResult(flowId),
|
||||
1500,
|
||||
);
|
||||
return;
|
||||
}
|
||||
message.error(data.message || 'Authorization failed.');
|
||||
clearWebState();
|
||||
} catch (err) {
|
||||
message.error('Unable to retrieve authorization result.');
|
||||
clearWebState();
|
||||
}
|
||||
},
|
||||
[clearWebState, onChange, resetDialog],
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
const handler = (event: MessageEvent) => {
|
||||
const payload = event.data;
|
||||
if (!payload || payload.type !== 'ragflow-gmail-oauth') {
|
||||
return;
|
||||
}
|
||||
if (!payload.flowId) {
|
||||
return;
|
||||
}
|
||||
if (webFlowIdRef.current && webFlowIdRef.current !== payload.flowId) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (payload.status === 'success') {
|
||||
setWebStatus('waiting');
|
||||
setWebStatusMessage('Authorization confirmed. Finalizing tokens...');
|
||||
fetchWebResult(payload.flowId);
|
||||
} else {
|
||||
message.error(
|
||||
payload.message || 'Authorization window reported an error.',
|
||||
);
|
||||
clearWebState();
|
||||
}
|
||||
};
|
||||
|
||||
window.addEventListener('message', handler);
|
||||
return () => window.removeEventListener('message', handler);
|
||||
}, [clearWebState, fetchWebResult]);
|
||||
|
||||
const handleValueChange = useCallback(
|
||||
(nextFiles: File[]) => {
|
||||
if (!nextFiles.length) {
|
||||
setFiles([]);
|
||||
onChange('');
|
||||
setPendingCredentials('');
|
||||
clearWebState();
|
||||
return;
|
||||
}
|
||||
const file = nextFiles[nextFiles.length - 1];
|
||||
file
|
||||
.text()
|
||||
.then((text) => {
|
||||
try {
|
||||
JSON.parse(text);
|
||||
} catch {
|
||||
message.error('Invalid JSON file.');
|
||||
setFiles([]);
|
||||
clearWebState();
|
||||
return;
|
||||
}
|
||||
setFiles([file]);
|
||||
clearWebState();
|
||||
if (credentialHasRefreshToken(text)) {
|
||||
onChange(text);
|
||||
setPendingCredentials('');
|
||||
message.success('Gmail OAuth credentials uploaded.');
|
||||
return;
|
||||
}
|
||||
setPendingCredentials(text);
|
||||
setDialogOpen(true);
|
||||
message.info(
|
||||
'Client configuration uploaded. Verification is required to finish setup.',
|
||||
);
|
||||
})
|
||||
.catch(() => {
|
||||
message.error('Unable to read the uploaded file.');
|
||||
setFiles([]);
|
||||
});
|
||||
},
|
||||
[clearWebState, onChange],
|
||||
);
|
||||
|
||||
const handleStartWebAuthorization = useCallback(async () => {
|
||||
if (!pendingCredentials) {
|
||||
message.error('No Google credential file detected.');
|
||||
return;
|
||||
}
|
||||
setWebAuthLoading(true);
|
||||
clearWebState();
|
||||
try {
|
||||
const { data } = await startGmailWebAuth({
|
||||
credentials: pendingCredentials,
|
||||
});
|
||||
if (data.code === 0 && data.data?.authorization_url) {
|
||||
const flowId = data.data.flow_id;
|
||||
const popup = window.open(
|
||||
data.data.authorization_url,
|
||||
'ragflow-gmail-oauth',
|
||||
'width=600,height=720',
|
||||
);
|
||||
if (!popup) {
|
||||
message.error(
|
||||
'Popup was blocked. Please allow popups for this site.',
|
||||
);
|
||||
return;
|
||||
}
|
||||
popup.focus();
|
||||
webFlowIdRef.current = flowId;
|
||||
setWebFlowId(flowId);
|
||||
setWebStatus('waiting');
|
||||
setWebStatusMessage('Complete the Google consent in the popup window.');
|
||||
} else {
|
||||
message.error(data.message || 'Failed to start browser authorization.');
|
||||
}
|
||||
} catch (err) {
|
||||
message.error('Failed to start browser authorization.');
|
||||
} finally {
|
||||
setWebAuthLoading(false);
|
||||
}
|
||||
}, [clearWebState, pendingCredentials]);
|
||||
|
||||
const handleManualWebCheck = useCallback(() => {
|
||||
if (!webFlowId) {
|
||||
message.info('Start browser authorization first.');
|
||||
return;
|
||||
}
|
||||
setWebStatus('waiting');
|
||||
setWebStatusMessage('Checking authorization status...');
|
||||
fetchWebResult(webFlowId);
|
||||
}, [fetchWebResult, webFlowId]);
|
||||
|
||||
const handleCancel = useCallback(() => {
|
||||
message.warning(
|
||||
'Verification canceled. Upload the credential again to restart.',
|
||||
);
|
||||
resetDialog(true);
|
||||
}, [resetDialog]);
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-3">
|
||||
{(credentialSummary ||
|
||||
hasVerifiedTokens ||
|
||||
hasUploadedButUnverified ||
|
||||
pendingCredentials) && (
|
||||
<div className="flex flex-wrap items-center gap-3 rounded-md border border-dashed border-muted-foreground/40 bg-muted/20 px-3 py-2 text-xs text-muted-foreground">
|
||||
<div className="flex flex-wrap items-center gap-2">
|
||||
{hasVerifiedTokens ? (
|
||||
<span className="rounded-full bg-emerald-100 px-2 py-0.5 text-[11px] font-semibold uppercase tracking-wide text-emerald-700">
|
||||
Verified
|
||||
</span>
|
||||
) : null}
|
||||
{hasUploadedButUnverified ? (
|
||||
<span className="rounded-full bg-amber-100 px-2 py-0.5 text-[11px] font-semibold uppercase tracking-wide text-amber-700">
|
||||
Needs authorization
|
||||
</span>
|
||||
) : null}
|
||||
{pendingCredentials && !hasVerifiedTokens ? (
|
||||
<span className="rounded-full bg-blue-100 px-2 py-0.5 text-[11px] font-semibold uppercase tracking-wide text-blue-700">
|
||||
Uploaded (pending)
|
||||
</span>
|
||||
) : null}
|
||||
</div>
|
||||
{credentialSummary ? (
|
||||
<p className="m-0">{credentialSummary}</p>
|
||||
) : null}
|
||||
</div>
|
||||
)}
|
||||
<FileUploader
|
||||
className="py-4 border-[0.5px] bg-bg-card text-text-secondary"
|
||||
value={files}
|
||||
onValueChange={handleValueChange}
|
||||
accept={{ '*.json': [FileMimeType.Json] }}
|
||||
maxFileCount={1}
|
||||
description={'Upload your Gmail OAuth JSON file.'}
|
||||
/>
|
||||
|
||||
<Dialog
|
||||
open={dialogOpen}
|
||||
onOpenChange={(open) => {
|
||||
if (!open && dialogOpen) {
|
||||
handleCancel();
|
||||
}
|
||||
}}
|
||||
>
|
||||
<DialogContent
|
||||
onPointerDownOutside={(e) => e.preventDefault()}
|
||||
onInteractOutside={(e) => e.preventDefault()}
|
||||
onEscapeKeyDown={(e) => e.preventDefault()}
|
||||
>
|
||||
<DialogHeader>
|
||||
<DialogTitle>Complete Gmail verification</DialogTitle>
|
||||
<DialogDescription>
|
||||
The uploaded client credentials do not contain a refresh token.
|
||||
Run the verification flow once to mint reusable tokens.
|
||||
</DialogDescription>
|
||||
</DialogHeader>
|
||||
|
||||
<div className="space-y-4">
|
||||
<div className="rounded-md border border-dashed border-muted-foreground/40 bg-muted/10 px-4 py-4 text-sm text-muted-foreground">
|
||||
<div className="text-sm font-semibold text-foreground">
|
||||
Authorize in browser
|
||||
</div>
|
||||
<p className="mt-2">
|
||||
We will open Google's consent page in a new window. Sign in
|
||||
with the admin account, grant access, and return here. Your
|
||||
credentials will update automatically.
|
||||
</p>
|
||||
{webStatus !== 'idle' && (
|
||||
<p
|
||||
className={`mt-2 text-xs ${
|
||||
webStatus === 'error'
|
||||
? 'text-destructive'
|
||||
: 'text-muted-foreground'
|
||||
}`}
|
||||
>
|
||||
{webStatusMessage}
|
||||
</p>
|
||||
)}
|
||||
<div className="mt-3 flex flex-wrap gap-2">
|
||||
<Button
|
||||
onClick={handleStartWebAuthorization}
|
||||
disabled={webAuthLoading}
|
||||
>
|
||||
{webAuthLoading && (
|
||||
<Loader2 className="mr-2 size-4 animate-spin" />
|
||||
)}
|
||||
Authorize with Google
|
||||
</Button>
|
||||
{webFlowId ? (
|
||||
<Button
|
||||
variant="outline"
|
||||
onClick={handleManualWebCheck}
|
||||
disabled={webStatus === 'success'}
|
||||
>
|
||||
Refresh status
|
||||
</Button>
|
||||
) : null}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<DialogFooter className="pt-2">
|
||||
<Button variant="ghost" onClick={handleCancel}>
|
||||
Cancel
|
||||
</Button>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default GmailTokenField;
|
||||
@ -1,5 +1,3 @@
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
|
||||
import { FileUploader } from '@/components/file-uploader';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import {
|
||||
@ -17,6 +15,7 @@ import {
|
||||
startGoogleDriveWebAuth,
|
||||
} from '@/services/data-source-service';
|
||||
import { Loader2 } from 'lucide-react';
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
|
||||
type GoogleDriveTokenFieldProps = {
|
||||
value?: string;
|
||||
@ -313,12 +312,16 @@ const GoogleDriveTokenField = ({
|
||||
<Dialog
|
||||
open={dialogOpen}
|
||||
onOpenChange={(open) => {
|
||||
if (!open) {
|
||||
if (!open && dialogOpen) {
|
||||
handleCancel();
|
||||
}
|
||||
}}
|
||||
>
|
||||
<DialogContent>
|
||||
<DialogContent
|
||||
onPointerDownOutside={(e) => e.preventDefault()}
|
||||
onInteractOutside={(e) => e.preventDefault()}
|
||||
onEscapeKeyDown={(e) => e.preventDefault()}
|
||||
>
|
||||
<DialogHeader>
|
||||
<DialogTitle>Complete Google verification</DialogTitle>
|
||||
<DialogDescription>
|
||||
@ -326,7 +329,6 @@ const GoogleDriveTokenField = ({
|
||||
Run the verification flow once to mint reusable tokens.
|
||||
</DialogDescription>
|
||||
</DialogHeader>
|
||||
|
||||
<div className="space-y-4">
|
||||
<div className="rounded-md border border-dashed border-muted-foreground/40 bg-muted/10 px-4 py-4 text-sm text-muted-foreground">
|
||||
<div className="text-sm font-semibold text-foreground">
|
||||
@ -370,7 +372,6 @@ const GoogleDriveTokenField = ({
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<DialogFooter className="pt-2">
|
||||
<Button variant="ghost" onClick={handleCancel}>
|
||||
Cancel
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import { FormFieldType } from '@/components/dynamic-form';
|
||||
import SvgIcon from '@/components/svg-icon';
|
||||
import { t } from 'i18next';
|
||||
import GmailTokenField from './component/gmail-token-field';
|
||||
import GoogleDriveTokenField from './component/google-drive-token-field';
|
||||
|
||||
export enum DataSourceKey {
|
||||
@ -10,7 +11,7 @@ export enum DataSourceKey {
|
||||
DISCORD = 'discord',
|
||||
GOOGLE_DRIVE = 'google_drive',
|
||||
MOODLE = 'moodle',
|
||||
// GMAIL = 'gmail',
|
||||
GMAIL = 'gmail',
|
||||
JIRA = 'jira',
|
||||
WEBDAV = 'webdav',
|
||||
DROPBOX = 'dropbox',
|
||||
@ -45,6 +46,11 @@ export const DataSourceInfo = {
|
||||
description: t(`setting.${DataSourceKey.GOOGLE_DRIVE}Description`),
|
||||
icon: <SvgIcon name={'data-source/google-drive'} width={38} />,
|
||||
},
|
||||
[DataSourceKey.GMAIL]: {
|
||||
name: 'Gmail',
|
||||
description: t(`setting.${DataSourceKey.GMAIL}Description`),
|
||||
icon: <SvgIcon name={'data-source/gmail'} width={38} />,
|
||||
},
|
||||
[DataSourceKey.MOODLE]: {
|
||||
name: 'Moodle',
|
||||
description: t(`setting.${DataSourceKey.MOODLE}Description`),
|
||||
@ -320,6 +326,38 @@ export const DataSourceFormFields = {
|
||||
defaultValue: 'uploaded',
|
||||
},
|
||||
],
|
||||
[DataSourceKey.GMAIL]: [
|
||||
{
|
||||
label: 'Primary Admin Email',
|
||||
name: 'config.credentials.google_primary_admin',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
placeholder: 'admin@example.com',
|
||||
tooltip: t('setting.gmailPrimaryAdminTip'),
|
||||
},
|
||||
{
|
||||
label: 'OAuth Token JSON',
|
||||
name: 'config.credentials.google_tokens',
|
||||
type: FormFieldType.Textarea,
|
||||
required: true,
|
||||
render: (fieldProps: any) => (
|
||||
<GmailTokenField
|
||||
value={fieldProps.value}
|
||||
onChange={fieldProps.onChange}
|
||||
placeholder='{ "token": "...", "refresh_token": "...", ... }'
|
||||
/>
|
||||
),
|
||||
tooltip: t('setting.gmailTokenTip'),
|
||||
},
|
||||
{
|
||||
label: '',
|
||||
name: 'config.credentials.authentication_method',
|
||||
type: FormFieldType.Text,
|
||||
required: false,
|
||||
hidden: true,
|
||||
defaultValue: 'uploaded',
|
||||
},
|
||||
],
|
||||
[DataSourceKey.MOODLE]: [
|
||||
{
|
||||
label: 'Moodle URL',
|
||||
@ -550,6 +588,17 @@ export const DataSourceFormDefaultValues = {
|
||||
},
|
||||
},
|
||||
},
|
||||
[DataSourceKey.GMAIL]: {
|
||||
name: '',
|
||||
source: DataSourceKey.GMAIL,
|
||||
config: {
|
||||
credentials: {
|
||||
google_primary_admin: '',
|
||||
google_tokens: '',
|
||||
authentication_method: 'uploaded',
|
||||
},
|
||||
},
|
||||
},
|
||||
[DataSourceKey.MOODLE]: {
|
||||
name: '',
|
||||
source: DataSourceKey.MOODLE,
|
||||
|
||||
@ -34,9 +34,17 @@ export const featchDataSourceDetail = (id: string) =>
|
||||
request.get(api.dataSourceDetail(id));
|
||||
|
||||
export const startGoogleDriveWebAuth = (payload: { credentials: string }) =>
|
||||
request.post(api.googleDriveWebAuthStart, { data: payload });
|
||||
request.post(api.googleWebAuthStart('google-drive'), { data: payload });
|
||||
|
||||
export const pollGoogleDriveWebAuthResult = (payload: { flow_id: string }) =>
|
||||
request.post(api.googleDriveWebAuthResult, { data: payload });
|
||||
request.post(api.googleWebAuthResult('google-drive'), { data: payload });
|
||||
|
||||
// Gmail web auth follows the same pattern as Google Drive, but uses
|
||||
// Gmail-specific endpoints and is consumed by the GmailTokenField UI.
|
||||
export const startGmailWebAuth = (payload: { credentials: string }) =>
|
||||
request.post(api.googleWebAuthStart('gmail'), { data: payload });
|
||||
|
||||
export const pollGmailWebAuthResult = (payload: { flow_id: string }) =>
|
||||
request.post(api.googleWebAuthResult('gmail'), { data: payload });
|
||||
|
||||
export default dataSourceService;
|
||||
|
||||
@ -42,8 +42,10 @@ export default {
|
||||
dataSourceRebuild: (id: string) => `${api_host}/connector/${id}/rebuild`,
|
||||
dataSourceLogs: (id: string) => `${api_host}/connector/${id}/logs`,
|
||||
dataSourceDetail: (id: string) => `${api_host}/connector/${id}`,
|
||||
googleDriveWebAuthStart: `${api_host}/connector/google-drive/oauth/web/start`,
|
||||
googleDriveWebAuthResult: `${api_host}/connector/google-drive/oauth/web/result`,
|
||||
googleWebAuthStart: (type: 'google-drive' | 'gmail') =>
|
||||
`${api_host}/connector/google/oauth/web/start?type=${type}`,
|
||||
googleWebAuthResult: (type: 'google-drive' | 'gmail') =>
|
||||
`${api_host}/connector/google/oauth/web/result?type=${type}`,
|
||||
|
||||
// plugin
|
||||
llm_tools: `${api_host}/plugin/llm_tools`,
|
||||
|
||||
Reference in New Issue
Block a user