mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat: add gmail connector (#11549)
### What problem does this PR solve? _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -28,8 +28,8 @@ from api.db import InputType
|
|||||||
from api.db.services.connector_service import ConnectorService, SyncLogsService
|
from api.db.services.connector_service import ConnectorService, SyncLogsService
|
||||||
from api.utils.api_utils import get_data_error_result, get_json_result, validate_request
|
from api.utils.api_utils import get_data_error_result, get_json_result, validate_request
|
||||||
from common.constants import RetCode, TaskStatus
|
from common.constants import RetCode, TaskStatus
|
||||||
from common.data_source.config import GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI, DocumentSource
|
from common.data_source.config import GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI, GMAIL_WEB_OAUTH_REDIRECT_URI, DocumentSource
|
||||||
from common.data_source.google_util.constant import GOOGLE_DRIVE_WEB_OAUTH_POPUP_TEMPLATE, GOOGLE_SCOPES
|
from common.data_source.google_util.constant import GOOGLE_WEB_OAUTH_POPUP_TEMPLATE, GOOGLE_SCOPES
|
||||||
from common.misc_utils import get_uuid
|
from common.misc_utils import get_uuid
|
||||||
from rag.utils.redis_conn import REDIS_CONN
|
from rag.utils.redis_conn import REDIS_CONN
|
||||||
from api.apps import login_required, current_user
|
from api.apps import login_required, current_user
|
||||||
@ -122,12 +122,30 @@ GOOGLE_WEB_FLOW_RESULT_PREFIX = "google_drive_web_flow_result"
|
|||||||
WEB_FLOW_TTL_SECS = 15 * 60
|
WEB_FLOW_TTL_SECS = 15 * 60
|
||||||
|
|
||||||
|
|
||||||
def _web_state_cache_key(flow_id: str) -> str:
|
def _web_state_cache_key(flow_id: str, source_type: str | None = None) -> str:
|
||||||
return f"{GOOGLE_WEB_FLOW_STATE_PREFIX}:{flow_id}"
|
"""Return Redis key for web OAuth state.
|
||||||
|
|
||||||
|
The default prefix keeps backward compatibility for Google Drive.
|
||||||
|
When source_type == "gmail", a different prefix is used so that
|
||||||
|
Drive/Gmail flows don't clash in Redis.
|
||||||
|
"""
|
||||||
|
if source_type == "gmail":
|
||||||
|
prefix = "gmail_web_flow_state"
|
||||||
|
else:
|
||||||
|
prefix = GOOGLE_WEB_FLOW_STATE_PREFIX
|
||||||
|
return f"{prefix}:{flow_id}"
|
||||||
|
|
||||||
|
|
||||||
def _web_result_cache_key(flow_id: str) -> str:
|
def _web_result_cache_key(flow_id: str, source_type: str | None = None) -> str:
|
||||||
return f"{GOOGLE_WEB_FLOW_RESULT_PREFIX}:{flow_id}"
|
"""Return Redis key for web OAuth result.
|
||||||
|
|
||||||
|
Mirrors _web_state_cache_key logic for result storage.
|
||||||
|
"""
|
||||||
|
if source_type == "gmail":
|
||||||
|
prefix = "gmail_web_flow_result"
|
||||||
|
else:
|
||||||
|
prefix = GOOGLE_WEB_FLOW_RESULT_PREFIX
|
||||||
|
return f"{prefix}:{flow_id}"
|
||||||
|
|
||||||
|
|
||||||
def _load_credentials(payload: str | dict[str, Any]) -> dict[str, Any]:
|
def _load_credentials(payload: str | dict[str, Any]) -> dict[str, Any]:
|
||||||
@ -146,19 +164,22 @@ def _get_web_client_config(credentials: dict[str, Any]) -> dict[str, Any]:
|
|||||||
return {"web": web_section}
|
return {"web": web_section}
|
||||||
|
|
||||||
|
|
||||||
async def _render_web_oauth_popup(flow_id: str, success: bool, message: str):
|
async def _render_web_oauth_popup(flow_id: str, success: bool, message: str, source="drive"):
|
||||||
status = "success" if success else "error"
|
status = "success" if success else "error"
|
||||||
auto_close = "window.close();" if success else ""
|
auto_close = "window.close();" if success else ""
|
||||||
escaped_message = escape(message)
|
escaped_message = escape(message)
|
||||||
payload_json = json.dumps(
|
payload_json = json.dumps(
|
||||||
{
|
{
|
||||||
"type": "ragflow-google-drive-oauth",
|
# TODO(google-oauth): include connector type (drive/gmail) in payload type if needed
|
||||||
|
"type": f"ragflow-google-{source}-oauth",
|
||||||
"status": status,
|
"status": status,
|
||||||
"flowId": flow_id or "",
|
"flowId": flow_id or "",
|
||||||
"message": message,
|
"message": message,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
html = GOOGLE_DRIVE_WEB_OAUTH_POPUP_TEMPLATE.format(
|
# TODO(google-oauth): title/heading/message may need to reflect drive/gmail based on cached type
|
||||||
|
html = GOOGLE_WEB_OAUTH_POPUP_TEMPLATE.format(
|
||||||
|
title=f"Google {source.capitalize()} Authorization",
|
||||||
heading="Authorization complete" if success else "Authorization failed",
|
heading="Authorization complete" if success else "Authorization failed",
|
||||||
message=escaped_message,
|
message=escaped_message,
|
||||||
payload_json=payload_json,
|
payload_json=payload_json,
|
||||||
@ -169,20 +190,33 @@ async def _render_web_oauth_popup(flow_id: str, success: bool, message: str):
|
|||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/google-drive/oauth/web/start", methods=["POST"]) # noqa: F821
|
@manager.route("/google/oauth/web/start", methods=["POST"]) # noqa: F821
|
||||||
@login_required
|
@login_required
|
||||||
@validate_request("credentials")
|
@validate_request("credentials")
|
||||||
async def start_google_drive_web_oauth():
|
async def start_google_web_oauth():
|
||||||
if not GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI:
|
source = request.args.get("type", "google-drive")
|
||||||
|
if source not in ("google-drive", "gmail"):
|
||||||
|
return get_json_result(code=RetCode.ARGUMENT_ERROR, message="Invalid Google OAuth type.")
|
||||||
|
|
||||||
|
if source == "gmail":
|
||||||
|
redirect_uri = GMAIL_WEB_OAUTH_REDIRECT_URI
|
||||||
|
scopes = GOOGLE_SCOPES[DocumentSource.GMAIL]
|
||||||
|
else:
|
||||||
|
redirect_uri = GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI if source == "google-drive" else GMAIL_WEB_OAUTH_REDIRECT_URI
|
||||||
|
scopes = GOOGLE_SCOPES[DocumentSource.GOOGLE_DRIVE if source == "google-drive" else DocumentSource.GMAIL]
|
||||||
|
|
||||||
|
if not redirect_uri:
|
||||||
return get_json_result(
|
return get_json_result(
|
||||||
code=RetCode.SERVER_ERROR,
|
code=RetCode.SERVER_ERROR,
|
||||||
message="Google Drive OAuth redirect URI is not configured on the server.",
|
message="Google OAuth redirect URI is not configured on the server.",
|
||||||
)
|
)
|
||||||
|
|
||||||
req = await request.json or {}
|
req = await request.json or {}
|
||||||
raw_credentials = req.get("credentials", "")
|
raw_credentials = req.get("credentials", "")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
credentials = _load_credentials(raw_credentials)
|
credentials = _load_credentials(raw_credentials)
|
||||||
|
print(credentials)
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
return get_json_result(code=RetCode.ARGUMENT_ERROR, message=str(exc))
|
return get_json_result(code=RetCode.ARGUMENT_ERROR, message=str(exc))
|
||||||
|
|
||||||
@ -199,8 +233,8 @@ async def start_google_drive_web_oauth():
|
|||||||
|
|
||||||
flow_id = str(uuid.uuid4())
|
flow_id = str(uuid.uuid4())
|
||||||
try:
|
try:
|
||||||
flow = Flow.from_client_config(client_config, scopes=GOOGLE_SCOPES[DocumentSource.GOOGLE_DRIVE])
|
flow = Flow.from_client_config(client_config, scopes=scopes)
|
||||||
flow.redirect_uri = GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI
|
flow.redirect_uri = redirect_uri
|
||||||
authorization_url, _ = flow.authorization_url(
|
authorization_url, _ = flow.authorization_url(
|
||||||
access_type="offline",
|
access_type="offline",
|
||||||
include_granted_scopes="true",
|
include_granted_scopes="true",
|
||||||
@ -219,7 +253,7 @@ async def start_google_drive_web_oauth():
|
|||||||
"client_config": client_config,
|
"client_config": client_config,
|
||||||
"created_at": int(time.time()),
|
"created_at": int(time.time()),
|
||||||
}
|
}
|
||||||
REDIS_CONN.set_obj(_web_state_cache_key(flow_id), cache_payload, WEB_FLOW_TTL_SECS)
|
REDIS_CONN.set_obj(_web_state_cache_key(flow_id, source), cache_payload, WEB_FLOW_TTL_SECS)
|
||||||
|
|
||||||
return get_json_result(
|
return get_json_result(
|
||||||
data={
|
data={
|
||||||
@ -230,60 +264,122 @@ async def start_google_drive_web_oauth():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/google-drive/oauth/web/callback", methods=["GET"]) # noqa: F821
|
@manager.route("/gmail/oauth/web/callback", methods=["GET"]) # noqa: F821
|
||||||
async def google_drive_web_oauth_callback():
|
async def google_gmail_web_oauth_callback():
|
||||||
state_id = request.args.get("state")
|
state_id = request.args.get("state")
|
||||||
error = request.args.get("error")
|
error = request.args.get("error")
|
||||||
|
source = "gmail"
|
||||||
|
if source != 'gmail':
|
||||||
|
return await _render_web_oauth_popup("", False, "Invalid Google OAuth type.", source)
|
||||||
|
|
||||||
error_description = request.args.get("error_description") or error
|
error_description = request.args.get("error_description") or error
|
||||||
|
|
||||||
if not state_id:
|
if not state_id:
|
||||||
return await _render_web_oauth_popup("", False, "Missing OAuth state parameter.")
|
return await _render_web_oauth_popup("", False, "Missing OAuth state parameter.", source)
|
||||||
|
|
||||||
state_cache = REDIS_CONN.get(_web_state_cache_key(state_id))
|
state_cache = REDIS_CONN.get(_web_state_cache_key(state_id, source))
|
||||||
if not state_cache:
|
if not state_cache:
|
||||||
return await _render_web_oauth_popup(state_id, False, "Authorization session expired. Please restart from the main window.")
|
return await _render_web_oauth_popup(state_id, False, "Authorization session expired. Please restart from the main window.", source)
|
||||||
|
|
||||||
state_obj = json.loads(state_cache)
|
state_obj = json.loads(state_cache)
|
||||||
client_config = state_obj.get("client_config")
|
client_config = state_obj.get("client_config")
|
||||||
if not client_config:
|
if not client_config:
|
||||||
REDIS_CONN.delete(_web_state_cache_key(state_id))
|
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||||
return await _render_web_oauth_popup(state_id, False, "Authorization session was invalid. Please retry.")
|
return await _render_web_oauth_popup(state_id, False, "Authorization session was invalid. Please retry.", source)
|
||||||
|
|
||||||
if error:
|
if error:
|
||||||
REDIS_CONN.delete(_web_state_cache_key(state_id))
|
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||||
return await _render_web_oauth_popup(state_id, False, error_description or "Authorization was cancelled.")
|
return await _render_web_oauth_popup(state_id, False, error_description or "Authorization was cancelled.", source)
|
||||||
|
|
||||||
code = request.args.get("code")
|
code = request.args.get("code")
|
||||||
if not code:
|
if not code:
|
||||||
return await _render_web_oauth_popup(state_id, False, "Missing authorization code from Google.")
|
return await _render_web_oauth_popup(state_id, False, "Missing authorization code from Google.", source)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
flow = Flow.from_client_config(client_config, scopes=GOOGLE_SCOPES[DocumentSource.GOOGLE_DRIVE])
|
# TODO(google-oauth): branch scopes/redirect_uri based on source_type (drive vs gmail)
|
||||||
flow.redirect_uri = GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI
|
flow = Flow.from_client_config(client_config, scopes=GOOGLE_SCOPES[DocumentSource.GMAIL])
|
||||||
|
flow.redirect_uri = GMAIL_WEB_OAUTH_REDIRECT_URI
|
||||||
flow.fetch_token(code=code)
|
flow.fetch_token(code=code)
|
||||||
except Exception as exc: # pragma: no cover - defensive
|
except Exception as exc: # pragma: no cover - defensive
|
||||||
logging.exception("Failed to exchange Google OAuth code: %s", exc)
|
logging.exception("Failed to exchange Google OAuth code: %s", exc)
|
||||||
REDIS_CONN.delete(_web_state_cache_key(state_id))
|
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||||
return await _render_web_oauth_popup(state_id, False, "Failed to exchange tokens with Google. Please retry.")
|
return await _render_web_oauth_popup(state_id, False, "Failed to exchange tokens with Google. Please retry.", source)
|
||||||
|
|
||||||
creds_json = flow.credentials.to_json()
|
creds_json = flow.credentials.to_json()
|
||||||
result_payload = {
|
result_payload = {
|
||||||
"user_id": state_obj.get("user_id"),
|
"user_id": state_obj.get("user_id"),
|
||||||
"credentials": creds_json,
|
"credentials": creds_json,
|
||||||
}
|
}
|
||||||
REDIS_CONN.set_obj(_web_result_cache_key(state_id), result_payload, WEB_FLOW_TTL_SECS)
|
REDIS_CONN.set_obj(_web_result_cache_key(state_id, source), result_payload, WEB_FLOW_TTL_SECS)
|
||||||
REDIS_CONN.delete(_web_state_cache_key(state_id))
|
|
||||||
|
|
||||||
return await _render_web_oauth_popup(state_id, True, "Authorization completed successfully.")
|
print("\n\n", _web_result_cache_key(state_id, source), "\n\n")
|
||||||
|
|
||||||
|
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||||
|
|
||||||
|
return await _render_web_oauth_popup(state_id, True, "Authorization completed successfully.", source)
|
||||||
|
|
||||||
|
|
||||||
@manager.route("/google-drive/oauth/web/result", methods=["POST"]) # noqa: F821
|
@manager.route("/google-drive/oauth/web/callback", methods=["GET"]) # noqa: F821
|
||||||
|
async def google_drive_web_oauth_callback():
|
||||||
|
state_id = request.args.get("state")
|
||||||
|
error = request.args.get("error")
|
||||||
|
source = "google-drive"
|
||||||
|
if source not in ("google-drive", "gmail"):
|
||||||
|
return await _render_web_oauth_popup("", False, "Invalid Google OAuth type.", source)
|
||||||
|
|
||||||
|
error_description = request.args.get("error_description") or error
|
||||||
|
|
||||||
|
if not state_id:
|
||||||
|
return await _render_web_oauth_popup("", False, "Missing OAuth state parameter.", source)
|
||||||
|
|
||||||
|
state_cache = REDIS_CONN.get(_web_state_cache_key(state_id, source))
|
||||||
|
if not state_cache:
|
||||||
|
return await _render_web_oauth_popup(state_id, False, "Authorization session expired. Please restart from the main window.", source)
|
||||||
|
|
||||||
|
state_obj = json.loads(state_cache)
|
||||||
|
client_config = state_obj.get("client_config")
|
||||||
|
if not client_config:
|
||||||
|
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||||
|
return await _render_web_oauth_popup(state_id, False, "Authorization session was invalid. Please retry.", source)
|
||||||
|
|
||||||
|
if error:
|
||||||
|
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||||
|
return await _render_web_oauth_popup(state_id, False, error_description or "Authorization was cancelled.", source)
|
||||||
|
|
||||||
|
code = request.args.get("code")
|
||||||
|
if not code:
|
||||||
|
return await _render_web_oauth_popup(state_id, False, "Missing authorization code from Google.", source)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# TODO(google-oauth): branch scopes/redirect_uri based on source_type (drive vs gmail)
|
||||||
|
flow = Flow.from_client_config(client_config, scopes=GOOGLE_SCOPES[DocumentSource.GOOGLE_DRIVE])
|
||||||
|
flow.redirect_uri = GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI
|
||||||
|
flow.fetch_token(code=code)
|
||||||
|
except Exception as exc: # pragma: no cover - defensive
|
||||||
|
logging.exception("Failed to exchange Google OAuth code: %s", exc)
|
||||||
|
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||||
|
return await _render_web_oauth_popup(state_id, False, "Failed to exchange tokens with Google. Please retry.", source)
|
||||||
|
|
||||||
|
creds_json = flow.credentials.to_json()
|
||||||
|
result_payload = {
|
||||||
|
"user_id": state_obj.get("user_id"),
|
||||||
|
"credentials": creds_json,
|
||||||
|
}
|
||||||
|
REDIS_CONN.set_obj(_web_result_cache_key(state_id, source), result_payload, WEB_FLOW_TTL_SECS)
|
||||||
|
REDIS_CONN.delete(_web_state_cache_key(state_id, source))
|
||||||
|
|
||||||
|
return await _render_web_oauth_popup(state_id, True, "Authorization completed successfully.", source)
|
||||||
|
|
||||||
|
@manager.route("/google/oauth/web/result", methods=["POST"]) # noqa: F821
|
||||||
@login_required
|
@login_required
|
||||||
@validate_request("flow_id")
|
@validate_request("flow_id")
|
||||||
async def poll_google_drive_web_result():
|
async def poll_google_web_result():
|
||||||
req = await request.json or {}
|
req = await request.json or {}
|
||||||
|
source = request.args.get("type")
|
||||||
|
if source not in ("google-drive", "gmail"):
|
||||||
|
return get_json_result(code=RetCode.ARGUMENT_ERROR, message="Invalid Google OAuth type.")
|
||||||
flow_id = req.get("flow_id")
|
flow_id = req.get("flow_id")
|
||||||
cache_raw = REDIS_CONN.get(_web_result_cache_key(flow_id))
|
cache_raw = REDIS_CONN.get(_web_result_cache_key(flow_id, source))
|
||||||
if not cache_raw:
|
if not cache_raw:
|
||||||
return get_json_result(code=RetCode.RUNNING, message="Authorization is still pending.")
|
return get_json_result(code=RetCode.RUNNING, message="Authorization is still pending.")
|
||||||
|
|
||||||
@ -291,5 +387,5 @@ async def poll_google_drive_web_result():
|
|||||||
if result.get("user_id") != current_user.id:
|
if result.get("user_id") != current_user.id:
|
||||||
return get_json_result(code=RetCode.PERMISSION_ERROR, message="You are not allowed to access this authorization result.")
|
return get_json_result(code=RetCode.PERMISSION_ERROR, message="You are not allowed to access this authorization result.")
|
||||||
|
|
||||||
REDIS_CONN.delete(_web_result_cache_key(flow_id))
|
REDIS_CONN.delete(_web_result_cache_key(flow_id, source))
|
||||||
return get_json_result(data={"credentials": result.get("credentials")})
|
return get_json_result(data={"credentials": result.get("credentials")})
|
||||||
|
|||||||
@ -121,8 +121,8 @@ async def login():
|
|||||||
response_data = user.to_json()
|
response_data = user.to_json()
|
||||||
user.access_token = get_uuid()
|
user.access_token = get_uuid()
|
||||||
login_user(user)
|
login_user(user)
|
||||||
user.update_time = (current_timestamp(),)
|
user.update_time = current_timestamp()
|
||||||
user.update_date = (datetime_format(datetime.now()),)
|
user.update_date = datetime_format(datetime.now())
|
||||||
user.save()
|
user.save()
|
||||||
msg = "Welcome back!"
|
msg = "Welcome back!"
|
||||||
|
|
||||||
@ -1002,8 +1002,8 @@ async def forget():
|
|||||||
# Auto login (reuse login flow)
|
# Auto login (reuse login flow)
|
||||||
user.access_token = get_uuid()
|
user.access_token = get_uuid()
|
||||||
login_user(user)
|
login_user(user)
|
||||||
user.update_time = (current_timestamp(),)
|
user.update_time = current_timestamp()
|
||||||
user.update_date = (datetime_format(datetime.now()),)
|
user.update_date = datetime_format(datetime.now())
|
||||||
user.save()
|
user.save()
|
||||||
msg = "Password reset successful. Logged in."
|
msg = "Password reset successful. Logged in."
|
||||||
return construct_response(data=user.to_json(), auth=user.get_id(), message=msg)
|
return construct_response(data=user.to_json(), auth=user.get_id(), message=msg)
|
||||||
|
|||||||
@ -217,6 +217,7 @@ OAUTH_GOOGLE_DRIVE_CLIENT_SECRET = os.environ.get(
|
|||||||
"OAUTH_GOOGLE_DRIVE_CLIENT_SECRET", ""
|
"OAUTH_GOOGLE_DRIVE_CLIENT_SECRET", ""
|
||||||
)
|
)
|
||||||
GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI = os.environ.get("GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI", "http://localhost:9380/v1/connector/google-drive/oauth/web/callback")
|
GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI = os.environ.get("GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI", "http://localhost:9380/v1/connector/google-drive/oauth/web/callback")
|
||||||
|
GMAIL_WEB_OAUTH_REDIRECT_URI = os.environ.get("GMAIL_WEB_OAUTH_REDIRECT_URI", "http://localhost:9380/v1/connector/gmail/oauth/web/callback")
|
||||||
|
|
||||||
CONFLUENCE_OAUTH_TOKEN_URL = "https://auth.atlassian.com/oauth/token"
|
CONFLUENCE_OAUTH_TOKEN_URL = "https://auth.atlassian.com/oauth/token"
|
||||||
RATE_LIMIT_MESSAGE_LOWERCASE = "Rate limit exceeded".lower()
|
RATE_LIMIT_MESSAGE_LOWERCASE = "Rate limit exceeded".lower()
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from google.oauth2.credentials import Credentials as OAuthCredentials
|
from google.oauth2.credentials import Credentials as OAuthCredentials
|
||||||
from google.oauth2.service_account import Credentials as ServiceAccountCredentials
|
from google.oauth2.service_account import Credentials as ServiceAccountCredentials
|
||||||
from googleapiclient.errors import HttpError
|
from googleapiclient.errors import HttpError
|
||||||
@ -9,10 +9,10 @@ from common.data_source.config import INDEX_BATCH_SIZE, SLIM_BATCH_SIZE, Documen
|
|||||||
from common.data_source.google_util.auth import get_google_creds
|
from common.data_source.google_util.auth import get_google_creds
|
||||||
from common.data_source.google_util.constant import DB_CREDENTIALS_PRIMARY_ADMIN_KEY, MISSING_SCOPES_ERROR_STR, SCOPE_INSTRUCTIONS, USER_FIELDS
|
from common.data_source.google_util.constant import DB_CREDENTIALS_PRIMARY_ADMIN_KEY, MISSING_SCOPES_ERROR_STR, SCOPE_INSTRUCTIONS, USER_FIELDS
|
||||||
from common.data_source.google_util.resource import get_admin_service, get_gmail_service
|
from common.data_source.google_util.resource import get_admin_service, get_gmail_service
|
||||||
from common.data_source.google_util.util import _execute_single_retrieval, execute_paginated_retrieval
|
from common.data_source.google_util.util import _execute_single_retrieval, execute_paginated_retrieval, sanitize_filename, clean_string
|
||||||
from common.data_source.interfaces import LoadConnector, PollConnector, SecondsSinceUnixEpoch, SlimConnectorWithPermSync
|
from common.data_source.interfaces import LoadConnector, PollConnector, SecondsSinceUnixEpoch, SlimConnectorWithPermSync
|
||||||
from common.data_source.models import BasicExpertInfo, Document, ExternalAccess, GenerateDocumentsOutput, GenerateSlimDocumentOutput, SlimDocument, TextSection
|
from common.data_source.models import BasicExpertInfo, Document, ExternalAccess, GenerateDocumentsOutput, GenerateSlimDocumentOutput, SlimDocument, TextSection
|
||||||
from common.data_source.utils import build_time_range_query, clean_email_and_extract_name, get_message_body, is_mail_service_disabled_error, time_str_to_utc
|
from common.data_source.utils import build_time_range_query, clean_email_and_extract_name, get_message_body, is_mail_service_disabled_error, gmail_time_str_to_utc
|
||||||
|
|
||||||
# Constants for Gmail API fields
|
# Constants for Gmail API fields
|
||||||
THREAD_LIST_FIELDS = "nextPageToken, threads(id)"
|
THREAD_LIST_FIELDS = "nextPageToken, threads(id)"
|
||||||
@ -67,7 +67,6 @@ def message_to_section(message: dict[str, Any]) -> tuple[TextSection, dict[str,
|
|||||||
message_data += f"{name}: {value}\n"
|
message_data += f"{name}: {value}\n"
|
||||||
|
|
||||||
message_body_text: str = get_message_body(payload)
|
message_body_text: str = get_message_body(payload)
|
||||||
|
|
||||||
return TextSection(link=link, text=message_body_text + message_data), metadata
|
return TextSection(link=link, text=message_body_text + message_data), metadata
|
||||||
|
|
||||||
|
|
||||||
@ -97,13 +96,15 @@ def thread_to_document(full_thread: dict[str, Any], email_used_to_fetch_thread:
|
|||||||
|
|
||||||
if not semantic_identifier:
|
if not semantic_identifier:
|
||||||
semantic_identifier = message_metadata.get("subject", "")
|
semantic_identifier = message_metadata.get("subject", "")
|
||||||
|
semantic_identifier = clean_string(semantic_identifier)
|
||||||
|
semantic_identifier = sanitize_filename(semantic_identifier)
|
||||||
|
|
||||||
if message_metadata.get("updated_at"):
|
if message_metadata.get("updated_at"):
|
||||||
updated_at = message_metadata.get("updated_at")
|
updated_at = message_metadata.get("updated_at")
|
||||||
|
|
||||||
updated_at_datetime = None
|
updated_at_datetime = None
|
||||||
if updated_at:
|
if updated_at:
|
||||||
updated_at_datetime = time_str_to_utc(updated_at)
|
updated_at_datetime = gmail_time_str_to_utc(updated_at)
|
||||||
|
|
||||||
thread_id = full_thread.get("id")
|
thread_id = full_thread.get("id")
|
||||||
if not thread_id:
|
if not thread_id:
|
||||||
@ -115,15 +116,24 @@ def thread_to_document(full_thread: dict[str, Any], email_used_to_fetch_thread:
|
|||||||
if not semantic_identifier:
|
if not semantic_identifier:
|
||||||
semantic_identifier = "(no subject)"
|
semantic_identifier = "(no subject)"
|
||||||
|
|
||||||
|
combined_sections = "\n\n".join(
|
||||||
|
sec.text for sec in sections if hasattr(sec, "text")
|
||||||
|
)
|
||||||
|
blob = combined_sections
|
||||||
|
size_bytes = len(blob)
|
||||||
|
extension = '.txt'
|
||||||
|
|
||||||
return Document(
|
return Document(
|
||||||
id=thread_id,
|
id=thread_id,
|
||||||
semantic_identifier=semantic_identifier,
|
semantic_identifier=semantic_identifier,
|
||||||
sections=sections,
|
blob=blob,
|
||||||
|
size_bytes=size_bytes,
|
||||||
|
extension=extension,
|
||||||
source=DocumentSource.GMAIL,
|
source=DocumentSource.GMAIL,
|
||||||
primary_owners=primary_owners,
|
primary_owners=primary_owners,
|
||||||
secondary_owners=secondary_owners,
|
secondary_owners=secondary_owners,
|
||||||
doc_updated_at=updated_at_datetime,
|
doc_updated_at=updated_at_datetime,
|
||||||
metadata={},
|
metadata=message_metadata,
|
||||||
external_access=ExternalAccess(
|
external_access=ExternalAccess(
|
||||||
external_user_emails={email_used_to_fetch_thread},
|
external_user_emails={email_used_to_fetch_thread},
|
||||||
external_user_group_ids=set(),
|
external_user_group_ids=set(),
|
||||||
@ -214,15 +224,13 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
|||||||
q=query,
|
q=query,
|
||||||
continue_on_404_or_403=True,
|
continue_on_404_or_403=True,
|
||||||
):
|
):
|
||||||
full_threads = _execute_single_retrieval(
|
full_thread = _execute_single_retrieval(
|
||||||
retrieval_function=gmail_service.users().threads().get,
|
retrieval_function=gmail_service.users().threads().get,
|
||||||
list_key=None,
|
|
||||||
userId=user_email,
|
userId=user_email,
|
||||||
fields=THREAD_FIELDS,
|
fields=THREAD_FIELDS,
|
||||||
id=thread["id"],
|
id=thread["id"],
|
||||||
continue_on_404_or_403=True,
|
continue_on_404_or_403=True,
|
||||||
)
|
)
|
||||||
full_thread = list(full_threads)[0]
|
|
||||||
doc = thread_to_document(full_thread, user_email)
|
doc = thread_to_document(full_thread, user_email)
|
||||||
if doc is None:
|
if doc is None:
|
||||||
continue
|
continue
|
||||||
@ -310,4 +318,30 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
pass
|
import time
|
||||||
|
import os
|
||||||
|
from common.data_source.google_util.util import get_credentials_from_env
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
try:
|
||||||
|
email = os.environ.get("GMAIL_TEST_EMAIL", "newyorkupperbay@gmail.com")
|
||||||
|
creds = get_credentials_from_env(email, oauth=True, source="gmail")
|
||||||
|
print("Credentials loaded successfully")
|
||||||
|
print(f"{creds=}")
|
||||||
|
|
||||||
|
connector = GmailConnector(batch_size=2)
|
||||||
|
print("GmailConnector initialized")
|
||||||
|
connector.load_credentials(creds)
|
||||||
|
print("Credentials loaded into connector")
|
||||||
|
|
||||||
|
print("Gmail is ready to use")
|
||||||
|
|
||||||
|
for file in connector._fetch_threads(
|
||||||
|
int(time.time()) - 1 * 24 * 60 * 60,
|
||||||
|
int(time.time()),
|
||||||
|
):
|
||||||
|
print("new batch","-"*80)
|
||||||
|
for f in file:
|
||||||
|
print(f)
|
||||||
|
print("\n\n")
|
||||||
|
except Exception as e:
|
||||||
|
logging.exception(f"Error loading credentials: {e}")
|
||||||
@ -1,7 +1,6 @@
|
|||||||
"""Google Drive connector"""
|
"""Google Drive connector"""
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
@ -32,7 +31,6 @@ from common.data_source.google_drive.file_retrieval import (
|
|||||||
from common.data_source.google_drive.model import DriveRetrievalStage, GoogleDriveCheckpoint, GoogleDriveFileType, RetrievedDriveFile, StageCompletion
|
from common.data_source.google_drive.model import DriveRetrievalStage, GoogleDriveCheckpoint, GoogleDriveFileType, RetrievedDriveFile, StageCompletion
|
||||||
from common.data_source.google_util.auth import get_google_creds
|
from common.data_source.google_util.auth import get_google_creds
|
||||||
from common.data_source.google_util.constant import DB_CREDENTIALS_PRIMARY_ADMIN_KEY, MISSING_SCOPES_ERROR_STR, USER_FIELDS
|
from common.data_source.google_util.constant import DB_CREDENTIALS_PRIMARY_ADMIN_KEY, MISSING_SCOPES_ERROR_STR, USER_FIELDS
|
||||||
from common.data_source.google_util.oauth_flow import ensure_oauth_token_dict
|
|
||||||
from common.data_source.google_util.resource import GoogleDriveService, get_admin_service, get_drive_service
|
from common.data_source.google_util.resource import GoogleDriveService, get_admin_service, get_drive_service
|
||||||
from common.data_source.google_util.util import GoogleFields, execute_paginated_retrieval, get_file_owners
|
from common.data_source.google_util.util import GoogleFields, execute_paginated_retrieval, get_file_owners
|
||||||
from common.data_source.google_util.util_threadpool_concurrency import ThreadSafeDict
|
from common.data_source.google_util.util_threadpool_concurrency import ThreadSafeDict
|
||||||
@ -1138,39 +1136,6 @@ class GoogleDriveConnector(SlimConnectorWithPermSync, CheckpointedConnectorWithP
|
|||||||
return GoogleDriveCheckpoint.model_validate_json(checkpoint_json)
|
return GoogleDriveCheckpoint.model_validate_json(checkpoint_json)
|
||||||
|
|
||||||
|
|
||||||
def get_credentials_from_env(email: str, oauth: bool = False) -> dict:
|
|
||||||
try:
|
|
||||||
if oauth:
|
|
||||||
raw_credential_string = os.environ["GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR"]
|
|
||||||
else:
|
|
||||||
raw_credential_string = os.environ["GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR"]
|
|
||||||
except KeyError:
|
|
||||||
raise ValueError("Missing Google Drive credentials in environment variables")
|
|
||||||
|
|
||||||
try:
|
|
||||||
credential_dict = json.loads(raw_credential_string)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
raise ValueError("Invalid JSON in Google Drive credentials")
|
|
||||||
|
|
||||||
if oauth:
|
|
||||||
credential_dict = ensure_oauth_token_dict(credential_dict, DocumentSource.GOOGLE_DRIVE)
|
|
||||||
|
|
||||||
refried_credential_string = json.dumps(credential_dict)
|
|
||||||
|
|
||||||
DB_CREDENTIALS_DICT_TOKEN_KEY = "google_tokens"
|
|
||||||
DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "google_service_account_key"
|
|
||||||
DB_CREDENTIALS_PRIMARY_ADMIN_KEY = "google_primary_admin"
|
|
||||||
DB_CREDENTIALS_AUTHENTICATION_METHOD = "authentication_method"
|
|
||||||
|
|
||||||
cred_key = DB_CREDENTIALS_DICT_TOKEN_KEY if oauth else DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY
|
|
||||||
|
|
||||||
return {
|
|
||||||
cred_key: refried_credential_string,
|
|
||||||
DB_CREDENTIALS_PRIMARY_ADMIN_KEY: email,
|
|
||||||
DB_CREDENTIALS_AUTHENTICATION_METHOD: "uploaded",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class CheckpointOutputWrapper:
|
class CheckpointOutputWrapper:
|
||||||
"""
|
"""
|
||||||
Wraps a CheckpointOutput generator to give things back in a more digestible format.
|
Wraps a CheckpointOutput generator to give things back in a more digestible format.
|
||||||
@ -1236,7 +1201,7 @@ def yield_all_docs_from_checkpoint_connector(
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import time
|
import time
|
||||||
|
from common.data_source.google_util.util import get_credentials_from_env
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -1245,7 +1210,7 @@ if __name__ == "__main__":
|
|||||||
creds = get_credentials_from_env(email, oauth=True)
|
creds = get_credentials_from_env(email, oauth=True)
|
||||||
print("Credentials loaded successfully")
|
print("Credentials loaded successfully")
|
||||||
print(f"{creds=}")
|
print(f"{creds=}")
|
||||||
|
sys.exit(0)
|
||||||
connector = GoogleDriveConnector(
|
connector = GoogleDriveConnector(
|
||||||
include_shared_drives=False,
|
include_shared_drives=False,
|
||||||
shared_drive_urls=None,
|
shared_drive_urls=None,
|
||||||
|
|||||||
@ -49,11 +49,11 @@ MISSING_SCOPES_ERROR_STR = "client not authorized for any of the scopes requeste
|
|||||||
SCOPE_INSTRUCTIONS = ""
|
SCOPE_INSTRUCTIONS = ""
|
||||||
|
|
||||||
|
|
||||||
GOOGLE_DRIVE_WEB_OAUTH_POPUP_TEMPLATE = """<!DOCTYPE html>
|
GOOGLE_WEB_OAUTH_POPUP_TEMPLATE = """<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="utf-8" />
|
<meta charset="utf-8" />
|
||||||
<title>Google Drive Authorization</title>
|
<title>{title}</title>
|
||||||
<style>
|
<style>
|
||||||
body {{
|
body {{
|
||||||
font-family: Arial, sans-serif;
|
font-family: Arial, sans-serif;
|
||||||
|
|||||||
@ -1,12 +1,17 @@
|
|||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
import socket
|
import socket
|
||||||
from collections.abc import Callable, Iterator
|
from collections.abc import Callable, Iterator
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
import unicodedata
|
||||||
from googleapiclient.errors import HttpError # type: ignore # type: ignore
|
from googleapiclient.errors import HttpError # type: ignore # type: ignore
|
||||||
|
|
||||||
|
from common.data_source.config import DocumentSource
|
||||||
from common.data_source.google_drive.model import GoogleDriveFileType
|
from common.data_source.google_drive.model import GoogleDriveFileType
|
||||||
|
from common.data_source.google_util.oauth_flow import ensure_oauth_token_dict
|
||||||
|
|
||||||
|
|
||||||
# See https://developers.google.com/drive/api/reference/rest/v3/files/list for more
|
# See https://developers.google.com/drive/api/reference/rest/v3/files/list for more
|
||||||
@ -117,6 +122,7 @@ def _execute_single_retrieval(
|
|||||||
"""Execute a single retrieval from Google Drive API"""
|
"""Execute a single retrieval from Google Drive API"""
|
||||||
try:
|
try:
|
||||||
results = retrieval_function(**request_kwargs).execute()
|
results = retrieval_function(**request_kwargs).execute()
|
||||||
|
|
||||||
except HttpError as e:
|
except HttpError as e:
|
||||||
if e.resp.status >= 500:
|
if e.resp.status >= 500:
|
||||||
results = retrieval_function()
|
results = retrieval_function()
|
||||||
@ -148,5 +154,110 @@ def _execute_single_retrieval(
|
|||||||
error,
|
error,
|
||||||
)
|
)
|
||||||
results = retrieval_function()
|
results = retrieval_function()
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def get_credentials_from_env(email: str, oauth: bool = False, source="drive") -> dict:
|
||||||
|
try:
|
||||||
|
if oauth:
|
||||||
|
raw_credential_string = os.environ["GOOGLE_OAUTH_CREDENTIALS_JSON_STR"]
|
||||||
|
else:
|
||||||
|
raw_credential_string = os.environ["GOOGLE_SERVICE_ACCOUNT_JSON_STR"]
|
||||||
|
except KeyError:
|
||||||
|
raise ValueError("Missing Google Drive credentials in environment variables")
|
||||||
|
|
||||||
|
try:
|
||||||
|
credential_dict = json.loads(raw_credential_string)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
raise ValueError("Invalid JSON in Google Drive credentials")
|
||||||
|
|
||||||
|
if oauth and source == "drive":
|
||||||
|
credential_dict = ensure_oauth_token_dict(credential_dict, DocumentSource.GOOGLE_DRIVE)
|
||||||
|
else:
|
||||||
|
credential_dict = ensure_oauth_token_dict(credential_dict, DocumentSource.GMAIL)
|
||||||
|
|
||||||
|
refried_credential_string = json.dumps(credential_dict)
|
||||||
|
|
||||||
|
DB_CREDENTIALS_DICT_TOKEN_KEY = "google_tokens"
|
||||||
|
DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "google_service_account_key"
|
||||||
|
DB_CREDENTIALS_PRIMARY_ADMIN_KEY = "google_primary_admin"
|
||||||
|
DB_CREDENTIALS_AUTHENTICATION_METHOD = "authentication_method"
|
||||||
|
|
||||||
|
cred_key = DB_CREDENTIALS_DICT_TOKEN_KEY if oauth else DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY
|
||||||
|
|
||||||
|
return {
|
||||||
|
cred_key: refried_credential_string,
|
||||||
|
DB_CREDENTIALS_PRIMARY_ADMIN_KEY: email,
|
||||||
|
DB_CREDENTIALS_AUTHENTICATION_METHOD: "uploaded",
|
||||||
|
}
|
||||||
|
|
||||||
|
def sanitize_filename(name: str) -> str:
|
||||||
|
"""
|
||||||
|
Soft sanitize for MinIO/S3:
|
||||||
|
- Replace only prohibited characters with a space.
|
||||||
|
- Preserve readability (no ugly underscores).
|
||||||
|
- Collapse multiple spaces.
|
||||||
|
"""
|
||||||
|
if name is None:
|
||||||
|
return "file.txt"
|
||||||
|
|
||||||
|
name = str(name).strip()
|
||||||
|
|
||||||
|
# Characters that MUST NOT appear in S3/MinIO object keys
|
||||||
|
# Replace them with a space (not underscore)
|
||||||
|
forbidden = r'[\\\?\#\%\*\:\|\<\>"]'
|
||||||
|
name = re.sub(forbidden, " ", name)
|
||||||
|
|
||||||
|
# Replace slashes "/" (S3 interprets as folder) with space
|
||||||
|
name = name.replace("/", " ")
|
||||||
|
|
||||||
|
# Collapse multiple spaces into one
|
||||||
|
name = re.sub(r"\s+", " ", name)
|
||||||
|
|
||||||
|
# Trim both ends
|
||||||
|
name = name.strip()
|
||||||
|
|
||||||
|
# Enforce reasonable max length
|
||||||
|
if len(name) > 200:
|
||||||
|
base, ext = os.path.splitext(name)
|
||||||
|
name = base[:180].rstrip() + ext
|
||||||
|
|
||||||
|
# Ensure there is an extension (your original logic)
|
||||||
|
if not os.path.splitext(name)[1]:
|
||||||
|
name += ".txt"
|
||||||
|
|
||||||
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
def clean_string(text: str | None) -> str | None:
|
||||||
|
"""
|
||||||
|
Clean a string to make it safe for insertion into MySQL (utf8mb4).
|
||||||
|
- Normalize Unicode
|
||||||
|
- Remove control characters / zero-width characters
|
||||||
|
- Optionally remove high-plane emoji and symbols
|
||||||
|
"""
|
||||||
|
if text is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 0. Ensure the value is a string
|
||||||
|
text = str(text)
|
||||||
|
|
||||||
|
# 1. Normalize Unicode (NFC)
|
||||||
|
text = unicodedata.normalize("NFC", text)
|
||||||
|
|
||||||
|
# 2. Remove ASCII control characters (except tab, newline, carriage return)
|
||||||
|
text = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", text)
|
||||||
|
|
||||||
|
# 3. Remove zero-width characters / BOM
|
||||||
|
text = re.sub(r"[\u200b-\u200d\uFEFF]", "", text)
|
||||||
|
|
||||||
|
# 4. Remove high Unicode characters (emoji, special symbols)
|
||||||
|
text = re.sub(r"[\U00010000-\U0010FFFF]", "", text)
|
||||||
|
|
||||||
|
# 5. Final fallback: strip any invalid UTF-8 sequences
|
||||||
|
try:
|
||||||
|
text.encode("utf-8")
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
text = text.encode("utf-8", errors="ignore").decode("utf-8")
|
||||||
|
|
||||||
|
return text
|
||||||
@ -30,7 +30,6 @@ class LoadConnector(ABC):
|
|||||||
"""Load documents from state"""
|
"""Load documents from state"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def validate_connector_settings(self) -> None:
|
def validate_connector_settings(self) -> None:
|
||||||
"""Validate connector settings"""
|
"""Validate connector settings"""
|
||||||
pass
|
pass
|
||||||
|
|||||||
@ -733,7 +733,7 @@ def build_time_range_query(
|
|||||||
"""Build time range query for Gmail API"""
|
"""Build time range query for Gmail API"""
|
||||||
query = ""
|
query = ""
|
||||||
if time_range_start is not None and time_range_start != 0:
|
if time_range_start is not None and time_range_start != 0:
|
||||||
query += f"after:{int(time_range_start)}"
|
query += f"after:{int(time_range_start) + 1}"
|
||||||
if time_range_end is not None and time_range_end != 0:
|
if time_range_end is not None and time_range_end != 0:
|
||||||
query += f" before:{int(time_range_end)}"
|
query += f" before:{int(time_range_end)}"
|
||||||
query = query.strip()
|
query = query.strip()
|
||||||
@ -778,6 +778,15 @@ def time_str_to_utc(time_str: str):
|
|||||||
return datetime.fromisoformat(time_str.replace("Z", "+00:00"))
|
return datetime.fromisoformat(time_str.replace("Z", "+00:00"))
|
||||||
|
|
||||||
|
|
||||||
|
def gmail_time_str_to_utc(time_str: str):
|
||||||
|
"""Convert Gmail RFC 2822 time string to UTC."""
|
||||||
|
from email.utils import parsedate_to_datetime
|
||||||
|
from datetime import timezone
|
||||||
|
|
||||||
|
dt = parsedate_to_datetime(time_str)
|
||||||
|
return dt.astimezone(timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
# Notion Utilities
|
# Notion Utilities
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|||||||
@ -41,6 +41,7 @@ from common.data_source import BlobStorageConnector, NotionConnector, DiscordCon
|
|||||||
from common.constants import FileSource, TaskStatus
|
from common.constants import FileSource, TaskStatus
|
||||||
from common.data_source.config import INDEX_BATCH_SIZE
|
from common.data_source.config import INDEX_BATCH_SIZE
|
||||||
from common.data_source.confluence_connector import ConfluenceConnector
|
from common.data_source.confluence_connector import ConfluenceConnector
|
||||||
|
from common.data_source.gmail_connector import GmailConnector
|
||||||
from common.data_source.interfaces import CheckpointOutputWrapper
|
from common.data_source.interfaces import CheckpointOutputWrapper
|
||||||
from common.data_source.utils import load_all_docs_from_checkpoint_connector
|
from common.data_source.utils import load_all_docs_from_checkpoint_connector
|
||||||
from common.log_utils import init_root_logger
|
from common.log_utils import init_root_logger
|
||||||
@ -230,7 +231,64 @@ class Gmail(SyncBase):
|
|||||||
SOURCE_NAME: str = FileSource.GMAIL
|
SOURCE_NAME: str = FileSource.GMAIL
|
||||||
|
|
||||||
async def _generate(self, task: dict):
|
async def _generate(self, task: dict):
|
||||||
pass
|
# Gmail sync reuses the generic LoadConnector/PollConnector interface
|
||||||
|
# implemented by common.data_source.gmail_connector.GmailConnector.
|
||||||
|
#
|
||||||
|
# Config expectations (self.conf):
|
||||||
|
# credentials: Gmail / Workspace OAuth JSON (with primary admin email)
|
||||||
|
# batch_size: optional, defaults to INDEX_BATCH_SIZE
|
||||||
|
batch_size = self.conf.get("batch_size", INDEX_BATCH_SIZE)
|
||||||
|
|
||||||
|
self.connector = GmailConnector(batch_size=batch_size)
|
||||||
|
|
||||||
|
credentials = self.conf.get("credentials")
|
||||||
|
if not credentials:
|
||||||
|
raise ValueError("Gmail connector is missing credentials.")
|
||||||
|
|
||||||
|
new_credentials = self.connector.load_credentials(credentials)
|
||||||
|
if new_credentials:
|
||||||
|
# Persist rotated / refreshed credentials back to connector config
|
||||||
|
try:
|
||||||
|
updated_conf = copy.deepcopy(self.conf)
|
||||||
|
updated_conf["credentials"] = new_credentials
|
||||||
|
ConnectorService.update_by_id(task["connector_id"], {"config": updated_conf})
|
||||||
|
self.conf = updated_conf
|
||||||
|
logging.info(
|
||||||
|
"Persisted refreshed Gmail credentials for connector %s",
|
||||||
|
task["connector_id"],
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logging.exception(
|
||||||
|
"Failed to persist refreshed Gmail credentials for connector %s",
|
||||||
|
task["connector_id"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Decide between full reindex and incremental polling by time range.
|
||||||
|
if task["reindex"] == "1" or not task.get("poll_range_start"):
|
||||||
|
start_time = None
|
||||||
|
end_time = None
|
||||||
|
begin_info = "totally"
|
||||||
|
document_generator = self.connector.load_from_state()
|
||||||
|
else:
|
||||||
|
poll_start = task["poll_range_start"]
|
||||||
|
# Defensive: if poll_start is somehow None, fall back to full load
|
||||||
|
if poll_start is None:
|
||||||
|
start_time = None
|
||||||
|
end_time = None
|
||||||
|
begin_info = "totally"
|
||||||
|
document_generator = self.connector.load_from_state()
|
||||||
|
else:
|
||||||
|
start_time = poll_start.timestamp()
|
||||||
|
end_time = datetime.now(timezone.utc).timestamp()
|
||||||
|
begin_info = f"from {poll_start}"
|
||||||
|
document_generator = self.connector.poll_source(start_time, end_time)
|
||||||
|
|
||||||
|
try:
|
||||||
|
admin_email = self.connector.primary_admin_email
|
||||||
|
except RuntimeError:
|
||||||
|
admin_email = "unknown"
|
||||||
|
logging.info(f"Connect to Gmail as {admin_email} {begin_info}")
|
||||||
|
return document_generator
|
||||||
|
|
||||||
|
|
||||||
class Dropbox(SyncBase):
|
class Dropbox(SyncBase):
|
||||||
|
|||||||
7
web/src/assets/svg/data-source/gmail.svg
Normal file
7
web/src/assets/svg/data-source/gmail.svg
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="52 42 88 66">
|
||||||
|
<path fill="#4285f4" d="M58 108h14V74L52 59v43c0 3.32 2.69 6 6 6"/>
|
||||||
|
<path fill="#34a853" d="M120 108h14c3.32 0 6-2.69 6-6V59l-20 15"/>
|
||||||
|
<path fill="#fbbc04" d="M120 48v26l20-15v-8c0-7.42-8.47-11.65-14.4-7.2"/>
|
||||||
|
<path fill="#ea4335" d="M72 74V48l24 18 24-18v26L96 92"/>
|
||||||
|
<path fill="#c5221f" d="M52 51v8l20 15V48l-5.6-4.2c-5.94-4.45-14.4-.22-14.4 7.2"/>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 419 B |
@ -739,6 +739,7 @@ Example: Virtual Hosted Style`,
|
|||||||
'Sync pages and databases from Notion for knowledge retrieval.',
|
'Sync pages and databases from Notion for knowledge retrieval.',
|
||||||
google_driveDescription:
|
google_driveDescription:
|
||||||
'Connect your Google Drive via OAuth and sync specific folders or drives.',
|
'Connect your Google Drive via OAuth and sync specific folders or drives.',
|
||||||
|
gmailDescription: 'Connect your Gmail via OAuth to sync emails.',
|
||||||
webdavDescription: 'Connect to WebDAV servers to sync files.',
|
webdavDescription: 'Connect to WebDAV servers to sync files.',
|
||||||
webdavRemotePathTip:
|
webdavRemotePathTip:
|
||||||
'Optional: Specify a folder path on the WebDAV server (e.g., /Documents). Leave empty to sync from root.',
|
'Optional: Specify a folder path on the WebDAV server (e.g., /Documents). Leave empty to sync from root.',
|
||||||
@ -750,6 +751,10 @@ Example: Virtual Hosted Style`,
|
|||||||
'Comma-separated emails whose "My Drive" contents should be indexed (include the primary admin).',
|
'Comma-separated emails whose "My Drive" contents should be indexed (include the primary admin).',
|
||||||
google_driveSharedFoldersTip:
|
google_driveSharedFoldersTip:
|
||||||
'Comma-separated Google Drive folder links to crawl.',
|
'Comma-separated Google Drive folder links to crawl.',
|
||||||
|
gmailPrimaryAdminTip:
|
||||||
|
'Primary admin email with Gmail / Workspace access, used to enumerate domain users and as the default sync account.',
|
||||||
|
gmailTokenTip:
|
||||||
|
'Upload the OAuth JSON generated from Google Console. If it only contains client credentials, run the browser-based verification once to mint long-lived refresh tokens.',
|
||||||
dropboxDescription:
|
dropboxDescription:
|
||||||
'Connect your Dropbox to sync files and folders from a chosen account.',
|
'Connect your Dropbox to sync files and folders from a chosen account.',
|
||||||
dropboxAccessTokenTip:
|
dropboxAccessTokenTip:
|
||||||
|
|||||||
@ -736,6 +736,8 @@ export default {
|
|||||||
'Синхронизируйте страницы и базы данных из Notion для извлечения знаний.',
|
'Синхронизируйте страницы и базы данных из Notion для извлечения знаний.',
|
||||||
google_driveDescription:
|
google_driveDescription:
|
||||||
'Подключите ваш Google Drive через OAuth и синхронизируйте определенные папки или диски.',
|
'Подключите ваш Google Drive через OAuth и синхронизируйте определенные папки или диски.',
|
||||||
|
gmailDescription:
|
||||||
|
'Подключите ваш Gmail / Google Workspace аккаунт для синхронизации писем и их метаданных, чтобы построить корпоративную почтовую базу знаний и поиск с учетом прав доступа.',
|
||||||
google_driveTokenTip:
|
google_driveTokenTip:
|
||||||
'Загрузите JSON токена OAuth, сгенерированный из помощника OAuth или Google Cloud Console. Вы также можете загрузить client_secret JSON из "установленного" или "веб" приложения. Если это ваша первая синхронизация, откроется окно браузера для завершения согласия OAuth. Если JSON уже содержит токен обновления, он будет автоматически повторно использован.',
|
'Загрузите JSON токена OAuth, сгенерированный из помощника OAuth или Google Cloud Console. Вы также можете загрузить client_secret JSON из "установленного" или "веб" приложения. Если это ваша первая синхронизация, откроется окно браузера для завершения согласия OAuth. Если JSON уже содержит токен обновления, он будет автоматически повторно использован.',
|
||||||
google_drivePrimaryAdminTip:
|
google_drivePrimaryAdminTip:
|
||||||
@ -744,6 +746,10 @@ export default {
|
|||||||
'Электронные почты через запятую, чье содержимое "Мой диск" должно индексироваться (включите основного администратора).',
|
'Электронные почты через запятую, чье содержимое "Мой диск" должно индексироваться (включите основного администратора).',
|
||||||
google_driveSharedFoldersTip:
|
google_driveSharedFoldersTip:
|
||||||
'Ссылки на папки Google Drive через запятую для обхода.',
|
'Ссылки на папки Google Drive через запятую для обхода.',
|
||||||
|
gmailPrimaryAdminTip:
|
||||||
|
'Основной административный email с доступом к Gmail / Workspace, используется для перечисления пользователей домена и как аккаунт синхронизации по умолчанию.',
|
||||||
|
gmailTokenTip:
|
||||||
|
'Загрузите OAuth JSON, сгенерированный в Google Console. Если он содержит только учетные данные клиента, выполните одноразовое подтверждение в браузере, чтобы получить долгоживущие токены обновления.',
|
||||||
jiraDescription:
|
jiraDescription:
|
||||||
'Подключите ваше рабочее пространство Jira для синхронизации задач, комментариев и вложений.',
|
'Подключите ваше рабочее пространство Jira для синхронизации задач, комментариев и вложений.',
|
||||||
jiraBaseUrlTip:
|
jiraBaseUrlTip:
|
||||||
|
|||||||
@ -718,6 +718,7 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
|||||||
notionDescription: ' 同步 Notion 页面与数据库,用于知识检索。',
|
notionDescription: ' 同步 Notion 页面与数据库,用于知识检索。',
|
||||||
google_driveDescription:
|
google_driveDescription:
|
||||||
'通过 OAuth 连接 Google Drive,并同步指定的文件夹或云端硬盘。',
|
'通过 OAuth 连接 Google Drive,并同步指定的文件夹或云端硬盘。',
|
||||||
|
gmailDescription: '通过 OAuth 连接 Gmail,用于同步邮件。',
|
||||||
google_driveTokenTip:
|
google_driveTokenTip:
|
||||||
'请上传由 OAuth helper 或 Google Cloud Console 导出的 OAuth token JSON。也支持上传 “installed” 或 “web” 类型的 client_secret JSON。若为首次同步,将自动弹出浏览器完成 OAuth 授权流程;如果该 JSON 已包含 refresh token,将会被自动复用。',
|
'请上传由 OAuth helper 或 Google Cloud Console 导出的 OAuth token JSON。也支持上传 “installed” 或 “web” 类型的 client_secret JSON。若为首次同步,将自动弹出浏览器完成 OAuth 授权流程;如果该 JSON 已包含 refresh token,将会被自动复用。',
|
||||||
google_drivePrimaryAdminTip: '拥有相应 Drive 访问权限的管理员邮箱。',
|
google_drivePrimaryAdminTip: '拥有相应 Drive 访问权限的管理员邮箱。',
|
||||||
@ -725,6 +726,10 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
|||||||
'需要索引其 “我的云端硬盘” 的邮箱,多个邮箱用逗号分隔(建议包含管理员)。',
|
'需要索引其 “我的云端硬盘” 的邮箱,多个邮箱用逗号分隔(建议包含管理员)。',
|
||||||
google_driveSharedFoldersTip:
|
google_driveSharedFoldersTip:
|
||||||
'需要同步的 Google Drive 文件夹链接,多个链接用逗号分隔。',
|
'需要同步的 Google Drive 文件夹链接,多个链接用逗号分隔。',
|
||||||
|
gmailPrimaryAdminTip:
|
||||||
|
'拥有 Gmail / Workspace 访问权限的主要管理员邮箱,用于列出域内用户并作为默认同步账号。',
|
||||||
|
gmailTokenTip:
|
||||||
|
'请上传由 Google Console 生成的 OAuth JSON。如果仅包含 client credentials,请通过浏览器授权一次以获取长期有效的刷新 Token。',
|
||||||
dropboxDescription: '连接 Dropbox,同步指定账号下的文件与文件夹。',
|
dropboxDescription: '连接 Dropbox,同步指定账号下的文件与文件夹。',
|
||||||
dropboxAccessTokenTip:
|
dropboxAccessTokenTip:
|
||||||
'请在 Dropbox App Console 生成 Access Token,并勾选 files.metadata.read、files.content.read、sharing.read 等必要权限。',
|
'请在 Dropbox App Console 生成 Access Token,并勾选 files.metadata.read、files.content.read、sharing.read 等必要权限。',
|
||||||
|
|||||||
@ -47,6 +47,7 @@ const AddDataSourceModal = ({
|
|||||||
}
|
}
|
||||||
open={visible || false}
|
open={visible || false}
|
||||||
onOpenChange={(open) => !open && hideModal?.()}
|
onOpenChange={(open) => !open && hideModal?.()}
|
||||||
|
maskClosable={false}
|
||||||
// onOk={() => handleOk()}
|
// onOk={() => handleOk()}
|
||||||
okText={t('common.confirm')}
|
okText={t('common.confirm')}
|
||||||
cancelText={t('common.cancel')}
|
cancelText={t('common.cancel')}
|
||||||
|
|||||||
@ -0,0 +1,391 @@
|
|||||||
|
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||||
|
|
||||||
|
import { FileUploader } from '@/components/file-uploader';
|
||||||
|
import { Button } from '@/components/ui/button';
|
||||||
|
import {
|
||||||
|
Dialog,
|
||||||
|
DialogContent,
|
||||||
|
DialogDescription,
|
||||||
|
DialogFooter,
|
||||||
|
DialogHeader,
|
||||||
|
DialogTitle,
|
||||||
|
} from '@/components/ui/dialog';
|
||||||
|
import message from '@/components/ui/message';
|
||||||
|
import { FileMimeType } from '@/constants/common';
|
||||||
|
import {
|
||||||
|
pollGmailWebAuthResult,
|
||||||
|
startGmailWebAuth,
|
||||||
|
} from '@/services/data-source-service';
|
||||||
|
import { Loader2 } from 'lucide-react';
|
||||||
|
|
||||||
|
export type GmailTokenFieldProps = {
|
||||||
|
value?: string;
|
||||||
|
onChange: (value: any) => void;
|
||||||
|
placeholder?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
const credentialHasRefreshToken = (content: string) => {
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(content);
|
||||||
|
return Boolean(parsed?.refresh_token);
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const describeCredentials = (content?: string) => {
|
||||||
|
if (!content) return '';
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(content);
|
||||||
|
if (parsed?.refresh_token) {
|
||||||
|
return 'Uploaded OAuth tokens with a refresh token.';
|
||||||
|
}
|
||||||
|
if (parsed?.installed || parsed?.web) {
|
||||||
|
return 'Client credentials detected. Complete verification to mint long-lived tokens.';
|
||||||
|
}
|
||||||
|
return 'Stored Google credential JSON.';
|
||||||
|
} catch {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const GmailTokenField = ({
|
||||||
|
value,
|
||||||
|
onChange,
|
||||||
|
placeholder,
|
||||||
|
}: GmailTokenFieldProps) => {
|
||||||
|
const [files, setFiles] = useState<File[]>([]);
|
||||||
|
const [pendingCredentials, setPendingCredentials] = useState<string>('');
|
||||||
|
const [dialogOpen, setDialogOpen] = useState(false);
|
||||||
|
const [webAuthLoading, setWebAuthLoading] = useState(false);
|
||||||
|
const [webFlowId, setWebFlowId] = useState<string | null>(null);
|
||||||
|
const [webStatus, setWebStatus] = useState<
|
||||||
|
'idle' | 'waiting' | 'success' | 'error'
|
||||||
|
>('idle');
|
||||||
|
const [webStatusMessage, setWebStatusMessage] = useState('');
|
||||||
|
const webFlowIdRef = useRef<string | null>(null);
|
||||||
|
const webPollTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||||
|
|
||||||
|
const clearWebState = useCallback(() => {
|
||||||
|
if (webPollTimerRef.current) {
|
||||||
|
clearTimeout(webPollTimerRef.current);
|
||||||
|
webPollTimerRef.current = null;
|
||||||
|
}
|
||||||
|
webFlowIdRef.current = null;
|
||||||
|
setWebFlowId(null);
|
||||||
|
setWebStatus('idle');
|
||||||
|
setWebStatusMessage('');
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
return () => {
|
||||||
|
if (webPollTimerRef.current) {
|
||||||
|
clearTimeout(webPollTimerRef.current);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
webFlowIdRef.current = webFlowId;
|
||||||
|
}, [webFlowId]);
|
||||||
|
|
||||||
|
const credentialSummary = useMemo(() => describeCredentials(value), [value]);
|
||||||
|
const hasVerifiedTokens = useMemo(
|
||||||
|
() => Boolean(value && credentialHasRefreshToken(value)),
|
||||||
|
[value],
|
||||||
|
);
|
||||||
|
const hasUploadedButUnverified = useMemo(
|
||||||
|
() => Boolean(value && !hasVerifiedTokens),
|
||||||
|
[hasVerifiedTokens, value],
|
||||||
|
);
|
||||||
|
|
||||||
|
const resetDialog = useCallback(
|
||||||
|
(shouldResetState: boolean) => {
|
||||||
|
setDialogOpen(false);
|
||||||
|
clearWebState();
|
||||||
|
if (shouldResetState) {
|
||||||
|
setPendingCredentials('');
|
||||||
|
setFiles([]);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[clearWebState],
|
||||||
|
);
|
||||||
|
|
||||||
|
const fetchWebResult = useCallback(
|
||||||
|
async (flowId: string) => {
|
||||||
|
try {
|
||||||
|
const { data } = await pollGmailWebAuthResult({
|
||||||
|
flow_id: flowId,
|
||||||
|
});
|
||||||
|
if (data.code === 0 && data.data?.credentials) {
|
||||||
|
onChange(data.data.credentials);
|
||||||
|
setPendingCredentials('');
|
||||||
|
message.success('Gmail credentials verified.');
|
||||||
|
resetDialog(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (data.code === 106) {
|
||||||
|
setWebStatus('waiting');
|
||||||
|
setWebStatusMessage('Authorization confirmed. Finalizing tokens...');
|
||||||
|
if (webPollTimerRef.current) {
|
||||||
|
clearTimeout(webPollTimerRef.current);
|
||||||
|
}
|
||||||
|
webPollTimerRef.current = setTimeout(
|
||||||
|
() => fetchWebResult(flowId),
|
||||||
|
1500,
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
message.error(data.message || 'Authorization failed.');
|
||||||
|
clearWebState();
|
||||||
|
} catch (err) {
|
||||||
|
message.error('Unable to retrieve authorization result.');
|
||||||
|
clearWebState();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[clearWebState, onChange, resetDialog],
|
||||||
|
);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const handler = (event: MessageEvent) => {
|
||||||
|
const payload = event.data;
|
||||||
|
if (!payload || payload.type !== 'ragflow-gmail-oauth') {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!payload.flowId) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (webFlowIdRef.current && webFlowIdRef.current !== payload.flowId) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payload.status === 'success') {
|
||||||
|
setWebStatus('waiting');
|
||||||
|
setWebStatusMessage('Authorization confirmed. Finalizing tokens...');
|
||||||
|
fetchWebResult(payload.flowId);
|
||||||
|
} else {
|
||||||
|
message.error(
|
||||||
|
payload.message || 'Authorization window reported an error.',
|
||||||
|
);
|
||||||
|
clearWebState();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
window.addEventListener('message', handler);
|
||||||
|
return () => window.removeEventListener('message', handler);
|
||||||
|
}, [clearWebState, fetchWebResult]);
|
||||||
|
|
||||||
|
const handleValueChange = useCallback(
|
||||||
|
(nextFiles: File[]) => {
|
||||||
|
if (!nextFiles.length) {
|
||||||
|
setFiles([]);
|
||||||
|
onChange('');
|
||||||
|
setPendingCredentials('');
|
||||||
|
clearWebState();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const file = nextFiles[nextFiles.length - 1];
|
||||||
|
file
|
||||||
|
.text()
|
||||||
|
.then((text) => {
|
||||||
|
try {
|
||||||
|
JSON.parse(text);
|
||||||
|
} catch {
|
||||||
|
message.error('Invalid JSON file.');
|
||||||
|
setFiles([]);
|
||||||
|
clearWebState();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
setFiles([file]);
|
||||||
|
clearWebState();
|
||||||
|
if (credentialHasRefreshToken(text)) {
|
||||||
|
onChange(text);
|
||||||
|
setPendingCredentials('');
|
||||||
|
message.success('Gmail OAuth credentials uploaded.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
setPendingCredentials(text);
|
||||||
|
setDialogOpen(true);
|
||||||
|
message.info(
|
||||||
|
'Client configuration uploaded. Verification is required to finish setup.',
|
||||||
|
);
|
||||||
|
})
|
||||||
|
.catch(() => {
|
||||||
|
message.error('Unable to read the uploaded file.');
|
||||||
|
setFiles([]);
|
||||||
|
});
|
||||||
|
},
|
||||||
|
[clearWebState, onChange],
|
||||||
|
);
|
||||||
|
|
||||||
|
const handleStartWebAuthorization = useCallback(async () => {
|
||||||
|
if (!pendingCredentials) {
|
||||||
|
message.error('No Google credential file detected.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
setWebAuthLoading(true);
|
||||||
|
clearWebState();
|
||||||
|
try {
|
||||||
|
const { data } = await startGmailWebAuth({
|
||||||
|
credentials: pendingCredentials,
|
||||||
|
});
|
||||||
|
if (data.code === 0 && data.data?.authorization_url) {
|
||||||
|
const flowId = data.data.flow_id;
|
||||||
|
const popup = window.open(
|
||||||
|
data.data.authorization_url,
|
||||||
|
'ragflow-gmail-oauth',
|
||||||
|
'width=600,height=720',
|
||||||
|
);
|
||||||
|
if (!popup) {
|
||||||
|
message.error(
|
||||||
|
'Popup was blocked. Please allow popups for this site.',
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
popup.focus();
|
||||||
|
webFlowIdRef.current = flowId;
|
||||||
|
setWebFlowId(flowId);
|
||||||
|
setWebStatus('waiting');
|
||||||
|
setWebStatusMessage('Complete the Google consent in the popup window.');
|
||||||
|
} else {
|
||||||
|
message.error(data.message || 'Failed to start browser authorization.');
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
message.error('Failed to start browser authorization.');
|
||||||
|
} finally {
|
||||||
|
setWebAuthLoading(false);
|
||||||
|
}
|
||||||
|
}, [clearWebState, pendingCredentials]);
|
||||||
|
|
||||||
|
const handleManualWebCheck = useCallback(() => {
|
||||||
|
if (!webFlowId) {
|
||||||
|
message.info('Start browser authorization first.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
setWebStatus('waiting');
|
||||||
|
setWebStatusMessage('Checking authorization status...');
|
||||||
|
fetchWebResult(webFlowId);
|
||||||
|
}, [fetchWebResult, webFlowId]);
|
||||||
|
|
||||||
|
const handleCancel = useCallback(() => {
|
||||||
|
message.warning(
|
||||||
|
'Verification canceled. Upload the credential again to restart.',
|
||||||
|
);
|
||||||
|
resetDialog(true);
|
||||||
|
}, [resetDialog]);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col gap-3">
|
||||||
|
{(credentialSummary ||
|
||||||
|
hasVerifiedTokens ||
|
||||||
|
hasUploadedButUnverified ||
|
||||||
|
pendingCredentials) && (
|
||||||
|
<div className="flex flex-wrap items-center gap-3 rounded-md border border-dashed border-muted-foreground/40 bg-muted/20 px-3 py-2 text-xs text-muted-foreground">
|
||||||
|
<div className="flex flex-wrap items-center gap-2">
|
||||||
|
{hasVerifiedTokens ? (
|
||||||
|
<span className="rounded-full bg-emerald-100 px-2 py-0.5 text-[11px] font-semibold uppercase tracking-wide text-emerald-700">
|
||||||
|
Verified
|
||||||
|
</span>
|
||||||
|
) : null}
|
||||||
|
{hasUploadedButUnverified ? (
|
||||||
|
<span className="rounded-full bg-amber-100 px-2 py-0.5 text-[11px] font-semibold uppercase tracking-wide text-amber-700">
|
||||||
|
Needs authorization
|
||||||
|
</span>
|
||||||
|
) : null}
|
||||||
|
{pendingCredentials && !hasVerifiedTokens ? (
|
||||||
|
<span className="rounded-full bg-blue-100 px-2 py-0.5 text-[11px] font-semibold uppercase tracking-wide text-blue-700">
|
||||||
|
Uploaded (pending)
|
||||||
|
</span>
|
||||||
|
) : null}
|
||||||
|
</div>
|
||||||
|
{credentialSummary ? (
|
||||||
|
<p className="m-0">{credentialSummary}</p>
|
||||||
|
) : null}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<FileUploader
|
||||||
|
className="py-4 border-[0.5px] bg-bg-card text-text-secondary"
|
||||||
|
value={files}
|
||||||
|
onValueChange={handleValueChange}
|
||||||
|
accept={{ '*.json': [FileMimeType.Json] }}
|
||||||
|
maxFileCount={1}
|
||||||
|
description={'Upload your Gmail OAuth JSON file.'}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<Dialog
|
||||||
|
open={dialogOpen}
|
||||||
|
onOpenChange={(open) => {
|
||||||
|
if (!open && dialogOpen) {
|
||||||
|
handleCancel();
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<DialogContent
|
||||||
|
onPointerDownOutside={(e) => e.preventDefault()}
|
||||||
|
onInteractOutside={(e) => e.preventDefault()}
|
||||||
|
onEscapeKeyDown={(e) => e.preventDefault()}
|
||||||
|
>
|
||||||
|
<DialogHeader>
|
||||||
|
<DialogTitle>Complete Gmail verification</DialogTitle>
|
||||||
|
<DialogDescription>
|
||||||
|
The uploaded client credentials do not contain a refresh token.
|
||||||
|
Run the verification flow once to mint reusable tokens.
|
||||||
|
</DialogDescription>
|
||||||
|
</DialogHeader>
|
||||||
|
|
||||||
|
<div className="space-y-4">
|
||||||
|
<div className="rounded-md border border-dashed border-muted-foreground/40 bg-muted/10 px-4 py-4 text-sm text-muted-foreground">
|
||||||
|
<div className="text-sm font-semibold text-foreground">
|
||||||
|
Authorize in browser
|
||||||
|
</div>
|
||||||
|
<p className="mt-2">
|
||||||
|
We will open Google's consent page in a new window. Sign in
|
||||||
|
with the admin account, grant access, and return here. Your
|
||||||
|
credentials will update automatically.
|
||||||
|
</p>
|
||||||
|
{webStatus !== 'idle' && (
|
||||||
|
<p
|
||||||
|
className={`mt-2 text-xs ${
|
||||||
|
webStatus === 'error'
|
||||||
|
? 'text-destructive'
|
||||||
|
: 'text-muted-foreground'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{webStatusMessage}
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
<div className="mt-3 flex flex-wrap gap-2">
|
||||||
|
<Button
|
||||||
|
onClick={handleStartWebAuthorization}
|
||||||
|
disabled={webAuthLoading}
|
||||||
|
>
|
||||||
|
{webAuthLoading && (
|
||||||
|
<Loader2 className="mr-2 size-4 animate-spin" />
|
||||||
|
)}
|
||||||
|
Authorize with Google
|
||||||
|
</Button>
|
||||||
|
{webFlowId ? (
|
||||||
|
<Button
|
||||||
|
variant="outline"
|
||||||
|
onClick={handleManualWebCheck}
|
||||||
|
disabled={webStatus === 'success'}
|
||||||
|
>
|
||||||
|
Refresh status
|
||||||
|
</Button>
|
||||||
|
) : null}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<DialogFooter className="pt-2">
|
||||||
|
<Button variant="ghost" onClick={handleCancel}>
|
||||||
|
Cancel
|
||||||
|
</Button>
|
||||||
|
</DialogFooter>
|
||||||
|
</DialogContent>
|
||||||
|
</Dialog>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default GmailTokenField;
|
||||||
@ -1,5 +1,3 @@
|
|||||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
|
||||||
|
|
||||||
import { FileUploader } from '@/components/file-uploader';
|
import { FileUploader } from '@/components/file-uploader';
|
||||||
import { Button } from '@/components/ui/button';
|
import { Button } from '@/components/ui/button';
|
||||||
import {
|
import {
|
||||||
@ -17,6 +15,7 @@ import {
|
|||||||
startGoogleDriveWebAuth,
|
startGoogleDriveWebAuth,
|
||||||
} from '@/services/data-source-service';
|
} from '@/services/data-source-service';
|
||||||
import { Loader2 } from 'lucide-react';
|
import { Loader2 } from 'lucide-react';
|
||||||
|
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||||
|
|
||||||
type GoogleDriveTokenFieldProps = {
|
type GoogleDriveTokenFieldProps = {
|
||||||
value?: string;
|
value?: string;
|
||||||
@ -313,12 +312,16 @@ const GoogleDriveTokenField = ({
|
|||||||
<Dialog
|
<Dialog
|
||||||
open={dialogOpen}
|
open={dialogOpen}
|
||||||
onOpenChange={(open) => {
|
onOpenChange={(open) => {
|
||||||
if (!open) {
|
if (!open && dialogOpen) {
|
||||||
handleCancel();
|
handleCancel();
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<DialogContent>
|
<DialogContent
|
||||||
|
onPointerDownOutside={(e) => e.preventDefault()}
|
||||||
|
onInteractOutside={(e) => e.preventDefault()}
|
||||||
|
onEscapeKeyDown={(e) => e.preventDefault()}
|
||||||
|
>
|
||||||
<DialogHeader>
|
<DialogHeader>
|
||||||
<DialogTitle>Complete Google verification</DialogTitle>
|
<DialogTitle>Complete Google verification</DialogTitle>
|
||||||
<DialogDescription>
|
<DialogDescription>
|
||||||
@ -326,7 +329,6 @@ const GoogleDriveTokenField = ({
|
|||||||
Run the verification flow once to mint reusable tokens.
|
Run the verification flow once to mint reusable tokens.
|
||||||
</DialogDescription>
|
</DialogDescription>
|
||||||
</DialogHeader>
|
</DialogHeader>
|
||||||
|
|
||||||
<div className="space-y-4">
|
<div className="space-y-4">
|
||||||
<div className="rounded-md border border-dashed border-muted-foreground/40 bg-muted/10 px-4 py-4 text-sm text-muted-foreground">
|
<div className="rounded-md border border-dashed border-muted-foreground/40 bg-muted/10 px-4 py-4 text-sm text-muted-foreground">
|
||||||
<div className="text-sm font-semibold text-foreground">
|
<div className="text-sm font-semibold text-foreground">
|
||||||
@ -370,7 +372,6 @@ const GoogleDriveTokenField = ({
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<DialogFooter className="pt-2">
|
<DialogFooter className="pt-2">
|
||||||
<Button variant="ghost" onClick={handleCancel}>
|
<Button variant="ghost" onClick={handleCancel}>
|
||||||
Cancel
|
Cancel
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
import { FormFieldType } from '@/components/dynamic-form';
|
import { FormFieldType } from '@/components/dynamic-form';
|
||||||
import SvgIcon from '@/components/svg-icon';
|
import SvgIcon from '@/components/svg-icon';
|
||||||
import { t } from 'i18next';
|
import { t } from 'i18next';
|
||||||
|
import GmailTokenField from './component/gmail-token-field';
|
||||||
import GoogleDriveTokenField from './component/google-drive-token-field';
|
import GoogleDriveTokenField from './component/google-drive-token-field';
|
||||||
|
|
||||||
export enum DataSourceKey {
|
export enum DataSourceKey {
|
||||||
@ -10,7 +11,7 @@ export enum DataSourceKey {
|
|||||||
DISCORD = 'discord',
|
DISCORD = 'discord',
|
||||||
GOOGLE_DRIVE = 'google_drive',
|
GOOGLE_DRIVE = 'google_drive',
|
||||||
MOODLE = 'moodle',
|
MOODLE = 'moodle',
|
||||||
// GMAIL = 'gmail',
|
GMAIL = 'gmail',
|
||||||
JIRA = 'jira',
|
JIRA = 'jira',
|
||||||
WEBDAV = 'webdav',
|
WEBDAV = 'webdav',
|
||||||
DROPBOX = 'dropbox',
|
DROPBOX = 'dropbox',
|
||||||
@ -45,6 +46,11 @@ export const DataSourceInfo = {
|
|||||||
description: t(`setting.${DataSourceKey.GOOGLE_DRIVE}Description`),
|
description: t(`setting.${DataSourceKey.GOOGLE_DRIVE}Description`),
|
||||||
icon: <SvgIcon name={'data-source/google-drive'} width={38} />,
|
icon: <SvgIcon name={'data-source/google-drive'} width={38} />,
|
||||||
},
|
},
|
||||||
|
[DataSourceKey.GMAIL]: {
|
||||||
|
name: 'Gmail',
|
||||||
|
description: t(`setting.${DataSourceKey.GMAIL}Description`),
|
||||||
|
icon: <SvgIcon name={'data-source/gmail'} width={38} />,
|
||||||
|
},
|
||||||
[DataSourceKey.MOODLE]: {
|
[DataSourceKey.MOODLE]: {
|
||||||
name: 'Moodle',
|
name: 'Moodle',
|
||||||
description: t(`setting.${DataSourceKey.MOODLE}Description`),
|
description: t(`setting.${DataSourceKey.MOODLE}Description`),
|
||||||
@ -320,6 +326,38 @@ export const DataSourceFormFields = {
|
|||||||
defaultValue: 'uploaded',
|
defaultValue: 'uploaded',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
[DataSourceKey.GMAIL]: [
|
||||||
|
{
|
||||||
|
label: 'Primary Admin Email',
|
||||||
|
name: 'config.credentials.google_primary_admin',
|
||||||
|
type: FormFieldType.Text,
|
||||||
|
required: true,
|
||||||
|
placeholder: 'admin@example.com',
|
||||||
|
tooltip: t('setting.gmailPrimaryAdminTip'),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'OAuth Token JSON',
|
||||||
|
name: 'config.credentials.google_tokens',
|
||||||
|
type: FormFieldType.Textarea,
|
||||||
|
required: true,
|
||||||
|
render: (fieldProps: any) => (
|
||||||
|
<GmailTokenField
|
||||||
|
value={fieldProps.value}
|
||||||
|
onChange={fieldProps.onChange}
|
||||||
|
placeholder='{ "token": "...", "refresh_token": "...", ... }'
|
||||||
|
/>
|
||||||
|
),
|
||||||
|
tooltip: t('setting.gmailTokenTip'),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: '',
|
||||||
|
name: 'config.credentials.authentication_method',
|
||||||
|
type: FormFieldType.Text,
|
||||||
|
required: false,
|
||||||
|
hidden: true,
|
||||||
|
defaultValue: 'uploaded',
|
||||||
|
},
|
||||||
|
],
|
||||||
[DataSourceKey.MOODLE]: [
|
[DataSourceKey.MOODLE]: [
|
||||||
{
|
{
|
||||||
label: 'Moodle URL',
|
label: 'Moodle URL',
|
||||||
@ -550,6 +588,17 @@ export const DataSourceFormDefaultValues = {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
[DataSourceKey.GMAIL]: {
|
||||||
|
name: '',
|
||||||
|
source: DataSourceKey.GMAIL,
|
||||||
|
config: {
|
||||||
|
credentials: {
|
||||||
|
google_primary_admin: '',
|
||||||
|
google_tokens: '',
|
||||||
|
authentication_method: 'uploaded',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
[DataSourceKey.MOODLE]: {
|
[DataSourceKey.MOODLE]: {
|
||||||
name: '',
|
name: '',
|
||||||
source: DataSourceKey.MOODLE,
|
source: DataSourceKey.MOODLE,
|
||||||
|
|||||||
@ -34,9 +34,17 @@ export const featchDataSourceDetail = (id: string) =>
|
|||||||
request.get(api.dataSourceDetail(id));
|
request.get(api.dataSourceDetail(id));
|
||||||
|
|
||||||
export const startGoogleDriveWebAuth = (payload: { credentials: string }) =>
|
export const startGoogleDriveWebAuth = (payload: { credentials: string }) =>
|
||||||
request.post(api.googleDriveWebAuthStart, { data: payload });
|
request.post(api.googleWebAuthStart('google-drive'), { data: payload });
|
||||||
|
|
||||||
export const pollGoogleDriveWebAuthResult = (payload: { flow_id: string }) =>
|
export const pollGoogleDriveWebAuthResult = (payload: { flow_id: string }) =>
|
||||||
request.post(api.googleDriveWebAuthResult, { data: payload });
|
request.post(api.googleWebAuthResult('google-drive'), { data: payload });
|
||||||
|
|
||||||
|
// Gmail web auth follows the same pattern as Google Drive, but uses
|
||||||
|
// Gmail-specific endpoints and is consumed by the GmailTokenField UI.
|
||||||
|
export const startGmailWebAuth = (payload: { credentials: string }) =>
|
||||||
|
request.post(api.googleWebAuthStart('gmail'), { data: payload });
|
||||||
|
|
||||||
|
export const pollGmailWebAuthResult = (payload: { flow_id: string }) =>
|
||||||
|
request.post(api.googleWebAuthResult('gmail'), { data: payload });
|
||||||
|
|
||||||
export default dataSourceService;
|
export default dataSourceService;
|
||||||
|
|||||||
@ -42,8 +42,10 @@ export default {
|
|||||||
dataSourceRebuild: (id: string) => `${api_host}/connector/${id}/rebuild`,
|
dataSourceRebuild: (id: string) => `${api_host}/connector/${id}/rebuild`,
|
||||||
dataSourceLogs: (id: string) => `${api_host}/connector/${id}/logs`,
|
dataSourceLogs: (id: string) => `${api_host}/connector/${id}/logs`,
|
||||||
dataSourceDetail: (id: string) => `${api_host}/connector/${id}`,
|
dataSourceDetail: (id: string) => `${api_host}/connector/${id}`,
|
||||||
googleDriveWebAuthStart: `${api_host}/connector/google-drive/oauth/web/start`,
|
googleWebAuthStart: (type: 'google-drive' | 'gmail') =>
|
||||||
googleDriveWebAuthResult: `${api_host}/connector/google-drive/oauth/web/result`,
|
`${api_host}/connector/google/oauth/web/start?type=${type}`,
|
||||||
|
googleWebAuthResult: (type: 'google-drive' | 'gmail') =>
|
||||||
|
`${api_host}/connector/google/oauth/web/result?type=${type}`,
|
||||||
|
|
||||||
// plugin
|
// plugin
|
||||||
llm_tools: `${api_host}/plugin/llm_tools`,
|
llm_tools: `${api_host}/plugin/llm_tools`,
|
||||||
|
|||||||
Reference in New Issue
Block a user