Feat: add IMAP data source integration with configuration and sync capabilities (#12316)

### What problem does this PR solve?
issue:
#12217 [#12313](https://github.com/infiniflow/ragflow/issues/12313)
change:
add IMAP data source integration with configuration and sync
capabilities

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
buua436
2025-12-30 17:09:13 +08:00
committed by GitHub
parent 109e782493
commit bffdb5fb11
11 changed files with 906 additions and 16 deletions

View File

@ -1,6 +1,6 @@
from datetime import datetime, timezone
import logging
from typing import Any
from typing import Any, Generator
import requests
@ -8,8 +8,8 @@ from pyairtable import Api as AirtableApi
from common.data_source.config import AIRTABLE_CONNECTOR_SIZE_THRESHOLD, INDEX_BATCH_SIZE, DocumentSource
from common.data_source.exceptions import ConnectorMissingCredentialError
from common.data_source.interfaces import LoadConnector
from common.data_source.models import Document, GenerateDocumentsOutput
from common.data_source.interfaces import LoadConnector, PollConnector
from common.data_source.models import Document, GenerateDocumentsOutput, SecondsSinceUnixEpoch
from common.data_source.utils import extract_size_bytes, get_file_ext
class AirtableClientNotSetUpError(PermissionError):
@ -19,7 +19,7 @@ class AirtableClientNotSetUpError(PermissionError):
)
class AirtableConnector(LoadConnector):
class AirtableConnector(LoadConnector, PollConnector):
"""
Lightweight Airtable connector.
@ -132,6 +132,26 @@ class AirtableConnector(LoadConnector):
if batch:
yield batch
def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> Generator[list[Document], None, None]:
"""Poll source to get documents"""
start_dt = datetime.fromtimestamp(start, tz=timezone.utc)
end_dt = datetime.fromtimestamp(end, tz=timezone.utc)
for batch in self.load_from_state():
filtered: list[Document] = []
for doc in batch:
if not doc.doc_updated_at:
continue
doc_dt = doc.doc_updated_at.astimezone(timezone.utc)
if start_dt <= doc_dt < end_dt:
filtered.append(doc)
if filtered:
yield filtered
if __name__ == "__main__":
import os