mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat: add auto parse to connector. (#11099)
### What problem does this PR solve? #10953 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -63,7 +63,7 @@ def _convert_message_to_document(
|
||||
semantic_identifier=semantic_identifier,
|
||||
doc_updated_at=doc_updated_at,
|
||||
blob=message.content.encode("utf-8"),
|
||||
extension="txt",
|
||||
extension=".txt",
|
||||
size_bytes=len(message.content.encode("utf-8")),
|
||||
)
|
||||
|
||||
@ -275,7 +275,7 @@ class DiscordConnector(LoadConnector, PollConnector):
|
||||
semantic_identifier=f"{min_updated_at} -> {max_updated_at}",
|
||||
doc_updated_at=max_updated_at,
|
||||
blob=blob,
|
||||
extension="txt",
|
||||
extension=".txt",
|
||||
size_bytes=size_bytes,
|
||||
)
|
||||
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
import logging
|
||||
from collections.abc import Generator
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
from retry import retry
|
||||
|
||||
@ -33,7 +32,7 @@ from common.data_source.utils import (
|
||||
batch_generator,
|
||||
fetch_notion_data,
|
||||
properties_to_str,
|
||||
filter_pages_by_time
|
||||
filter_pages_by_time, datetime_from_string
|
||||
)
|
||||
|
||||
|
||||
@ -293,9 +292,9 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
blob=blob,
|
||||
source=DocumentSource.NOTION,
|
||||
semantic_identifier=page_title,
|
||||
extension="txt",
|
||||
extension=".txt",
|
||||
size_bytes=len(blob),
|
||||
doc_updated_at=datetime.fromisoformat(page.last_edited_time).astimezone(timezone.utc)
|
||||
doc_updated_at=datetime_from_string(page.last_edited_time)
|
||||
)
|
||||
|
||||
if self.recursive_index_enabled and all_child_page_ids:
|
||||
|
||||
Reference in New Issue
Block a user