mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 12:32:30 +08:00
feat: improve Moodle connector functionality (#11665)
### What problem does this PR solve? Add metadata from moodle data source. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -17,7 +17,11 @@ from common.data_source.exceptions import (
|
|||||||
InsufficientPermissionsError,
|
InsufficientPermissionsError,
|
||||||
ConnectorValidationError,
|
ConnectorValidationError,
|
||||||
)
|
)
|
||||||
from common.data_source.interfaces import LoadConnector, PollConnector, SecondsSinceUnixEpoch
|
from common.data_source.interfaces import (
|
||||||
|
LoadConnector,
|
||||||
|
PollConnector,
|
||||||
|
SecondsSinceUnixEpoch,
|
||||||
|
)
|
||||||
from common.data_source.models import Document
|
from common.data_source.models import Document
|
||||||
from common.data_source.utils import batch_generator, rl_requests
|
from common.data_source.utils import batch_generator, rl_requests
|
||||||
|
|
||||||
@ -42,7 +46,9 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
delimiter = "&" if "?" in file_url else "?"
|
delimiter = "&" if "?" in file_url else "?"
|
||||||
return f"{file_url}{delimiter}token={token}"
|
return f"{file_url}{delimiter}token={token}"
|
||||||
|
|
||||||
def _log_error(self, context: str, error: Exception, level: str = "warning") -> None:
|
def _log_error(
|
||||||
|
self, context: str, error: Exception, level: str = "warning"
|
||||||
|
) -> None:
|
||||||
"""Simplified logging wrapper"""
|
"""Simplified logging wrapper"""
|
||||||
msg = f"{context}: {error}"
|
msg = f"{context}: {error}"
|
||||||
if level == "error":
|
if level == "error":
|
||||||
@ -73,7 +79,9 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
except MoodleException as e:
|
except MoodleException as e:
|
||||||
if "invalidtoken" in str(e).lower():
|
if "invalidtoken" in str(e).lower():
|
||||||
raise CredentialExpiredError("Moodle token is invalid or expired")
|
raise CredentialExpiredError("Moodle token is invalid or expired")
|
||||||
raise ConnectorMissingCredentialError(f"Failed to initialize Moodle client: {e}")
|
raise ConnectorMissingCredentialError(
|
||||||
|
f"Failed to initialize Moodle client: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
def validate_connector_settings(self) -> None:
|
def validate_connector_settings(self) -> None:
|
||||||
if not self.moodle_client:
|
if not self.moodle_client:
|
||||||
@ -125,7 +133,9 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
logger.warning("No courses found to poll")
|
logger.warning("No courses found to poll")
|
||||||
return
|
return
|
||||||
|
|
||||||
yield from self._yield_in_batches(self._get_updated_content(courses, start, end))
|
yield from self._yield_in_batches(
|
||||||
|
self._get_updated_content(courses, start, end)
|
||||||
|
)
|
||||||
|
|
||||||
@retry(tries=3, delay=1, backoff=2)
|
@retry(tries=3, delay=1, backoff=2)
|
||||||
def _get_enrolled_courses(self) -> list:
|
def _get_enrolled_courses(self) -> list:
|
||||||
@ -187,9 +197,7 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._log_error(f"polling course {course.fullname}", e)
|
self._log_error(f"polling course {course.fullname}", e)
|
||||||
|
|
||||||
def _process_module(
|
def _process_module(self, course, section, module) -> Optional[Document]:
|
||||||
self, course, section, module
|
|
||||||
) -> Optional[Document]:
|
|
||||||
try:
|
try:
|
||||||
mtype = module.modname
|
mtype = module.modname
|
||||||
if mtype in ["label", "url"]:
|
if mtype in ["label", "url"]:
|
||||||
@ -224,11 +232,37 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resp = rl_requests.get(self._add_token_to_url(file_info.fileurl), timeout=60)
|
resp = rl_requests.get(
|
||||||
|
self._add_token_to_url(file_info.fileurl), timeout=60
|
||||||
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
blob = resp.content
|
blob = resp.content
|
||||||
ext = os.path.splitext(file_name)[1] or ".bin"
|
ext = os.path.splitext(file_name)[1] or ".bin"
|
||||||
semantic_id = f"{course.fullname} / {section.name} / {file_name}"
|
semantic_id = f"{course.fullname} / {section.name} / {file_name}"
|
||||||
|
|
||||||
|
# Create metadata dictionary with relevant information
|
||||||
|
metadata = {
|
||||||
|
"moodle_url": self.moodle_url,
|
||||||
|
"course_id": getattr(course, "id", None),
|
||||||
|
"course_name": getattr(course, "fullname", None),
|
||||||
|
"course_shortname": getattr(course, "shortname", None),
|
||||||
|
"section_id": getattr(section, "id", None),
|
||||||
|
"section_name": getattr(section, "name", None),
|
||||||
|
"section_number": getattr(section, "section", None),
|
||||||
|
"module_id": getattr(module, "id", None),
|
||||||
|
"module_name": getattr(module, "name", None),
|
||||||
|
"module_type": getattr(module, "modname", None),
|
||||||
|
"module_instance": getattr(module, "instance", None),
|
||||||
|
"file_url": getattr(file_info, "fileurl", None),
|
||||||
|
"file_name": file_name,
|
||||||
|
"file_size": getattr(file_info, "filesize", len(blob)),
|
||||||
|
"file_type": getattr(file_info, "mimetype", None),
|
||||||
|
"time_created": getattr(module, "timecreated", None),
|
||||||
|
"time_modified": getattr(module, "timemodified", None),
|
||||||
|
"visible": getattr(module, "visible", None),
|
||||||
|
"groupmode": getattr(module, "groupmode", None),
|
||||||
|
}
|
||||||
|
|
||||||
return Document(
|
return Document(
|
||||||
id=f"moodle_resource_{module.id}",
|
id=f"moodle_resource_{module.id}",
|
||||||
source="moodle",
|
source="moodle",
|
||||||
@ -237,6 +271,7 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
blob=blob,
|
blob=blob,
|
||||||
doc_updated_at=datetime.fromtimestamp(ts or 0, tz=timezone.utc),
|
doc_updated_at=datetime.fromtimestamp(ts or 0, tz=timezone.utc),
|
||||||
size_bytes=len(blob),
|
size_bytes=len(blob),
|
||||||
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._log_error(f"downloading resource {file_name}", e, "error")
|
self._log_error(f"downloading resource {file_name}", e, "error")
|
||||||
@ -247,7 +282,9 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = self.moodle_client.mod.forum.get_forum_discussions(forumid=module.instance)
|
result = self.moodle_client.mod.forum.get_forum_discussions(
|
||||||
|
forumid=module.instance
|
||||||
|
)
|
||||||
disc_list = getattr(result, "discussions", [])
|
disc_list = getattr(result, "discussions", [])
|
||||||
if not disc_list:
|
if not disc_list:
|
||||||
return None
|
return None
|
||||||
@ -264,6 +301,38 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
|
|
||||||
blob = "\n".join(markdown).encode("utf-8")
|
blob = "\n".join(markdown).encode("utf-8")
|
||||||
semantic_id = f"{course.fullname} / {section.name} / {module.name}"
|
semantic_id = f"{course.fullname} / {section.name} / {module.name}"
|
||||||
|
|
||||||
|
# Create metadata dictionary with relevant information
|
||||||
|
metadata = {
|
||||||
|
"moodle_url": self.moodle_url,
|
||||||
|
"course_id": getattr(course, "id", None),
|
||||||
|
"course_name": getattr(course, "fullname", None),
|
||||||
|
"course_shortname": getattr(course, "shortname", None),
|
||||||
|
"section_id": getattr(section, "id", None),
|
||||||
|
"section_name": getattr(section, "name", None),
|
||||||
|
"section_number": getattr(section, "section", None),
|
||||||
|
"module_id": getattr(module, "id", None),
|
||||||
|
"module_name": getattr(module, "name", None),
|
||||||
|
"module_type": getattr(module, "modname", None),
|
||||||
|
"forum_id": getattr(module, "instance", None),
|
||||||
|
"discussion_count": len(disc_list),
|
||||||
|
"time_created": getattr(module, "timecreated", None),
|
||||||
|
"time_modified": getattr(module, "timemodified", None),
|
||||||
|
"visible": getattr(module, "visible", None),
|
||||||
|
"groupmode": getattr(module, "groupmode", None),
|
||||||
|
"discussions": [
|
||||||
|
{
|
||||||
|
"id": getattr(d, "id", None),
|
||||||
|
"name": getattr(d, "name", None),
|
||||||
|
"user_id": getattr(d, "userid", None),
|
||||||
|
"user_fullname": getattr(d, "userfullname", None),
|
||||||
|
"time_created": getattr(d, "timecreated", None),
|
||||||
|
"time_modified": getattr(d, "timemodified", None),
|
||||||
|
}
|
||||||
|
for d in disc_list
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
return Document(
|
return Document(
|
||||||
id=f"moodle_forum_{module.id}",
|
id=f"moodle_forum_{module.id}",
|
||||||
source="moodle",
|
source="moodle",
|
||||||
@ -272,6 +341,7 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
blob=blob,
|
blob=blob,
|
||||||
doc_updated_at=datetime.fromtimestamp(latest_ts or 0, tz=timezone.utc),
|
doc_updated_at=datetime.fromtimestamp(latest_ts or 0, tz=timezone.utc),
|
||||||
size_bytes=len(blob),
|
size_bytes=len(blob),
|
||||||
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._log_error(f"processing forum {module.name}", e)
|
self._log_error(f"processing forum {module.name}", e)
|
||||||
@ -293,11 +363,37 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resp = rl_requests.get(self._add_token_to_url(file_info.fileurl), timeout=60)
|
resp = rl_requests.get(
|
||||||
|
self._add_token_to_url(file_info.fileurl), timeout=60
|
||||||
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
blob = resp.content
|
blob = resp.content
|
||||||
ext = os.path.splitext(file_name)[1] or ".html"
|
ext = os.path.splitext(file_name)[1] or ".html"
|
||||||
semantic_id = f"{course.fullname} / {section.name} / {module.name}"
|
semantic_id = f"{course.fullname} / {section.name} / {module.name}"
|
||||||
|
|
||||||
|
# Create metadata dictionary with relevant information
|
||||||
|
metadata = {
|
||||||
|
"moodle_url": self.moodle_url,
|
||||||
|
"course_id": getattr(course, "id", None),
|
||||||
|
"course_name": getattr(course, "fullname", None),
|
||||||
|
"course_shortname": getattr(course, "shortname", None),
|
||||||
|
"section_id": getattr(section, "id", None),
|
||||||
|
"section_name": getattr(section, "name", None),
|
||||||
|
"section_number": getattr(section, "section", None),
|
||||||
|
"module_id": getattr(module, "id", None),
|
||||||
|
"module_name": getattr(module, "name", None),
|
||||||
|
"module_type": getattr(module, "modname", None),
|
||||||
|
"module_instance": getattr(module, "instance", None),
|
||||||
|
"page_url": getattr(file_info, "fileurl", None),
|
||||||
|
"file_name": file_name,
|
||||||
|
"file_size": getattr(file_info, "filesize", len(blob)),
|
||||||
|
"file_type": getattr(file_info, "mimetype", None),
|
||||||
|
"time_created": getattr(module, "timecreated", None),
|
||||||
|
"time_modified": getattr(module, "timemodified", None),
|
||||||
|
"visible": getattr(module, "visible", None),
|
||||||
|
"groupmode": getattr(module, "groupmode", None),
|
||||||
|
}
|
||||||
|
|
||||||
return Document(
|
return Document(
|
||||||
id=f"moodle_page_{module.id}",
|
id=f"moodle_page_{module.id}",
|
||||||
source="moodle",
|
source="moodle",
|
||||||
@ -306,6 +402,7 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
blob=blob,
|
blob=blob,
|
||||||
doc_updated_at=datetime.fromtimestamp(ts or 0, tz=timezone.utc),
|
doc_updated_at=datetime.fromtimestamp(ts or 0, tz=timezone.utc),
|
||||||
size_bytes=len(blob),
|
size_bytes=len(blob),
|
||||||
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._log_error(f"processing page {file_name}", e, "error")
|
self._log_error(f"processing page {file_name}", e, "error")
|
||||||
@ -326,6 +423,29 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
|
|
||||||
semantic_id = f"{course.fullname} / {section.name} / {mname}"
|
semantic_id = f"{course.fullname} / {section.name} / {mname}"
|
||||||
blob = markdown.encode("utf-8")
|
blob = markdown.encode("utf-8")
|
||||||
|
|
||||||
|
# Create metadata dictionary with relevant information
|
||||||
|
metadata = {
|
||||||
|
"moodle_url": self.moodle_url,
|
||||||
|
"course_id": getattr(course, "id", None),
|
||||||
|
"course_name": getattr(course, "fullname", None),
|
||||||
|
"course_shortname": getattr(course, "shortname", None),
|
||||||
|
"section_id": getattr(section, "id", None),
|
||||||
|
"section_name": getattr(section, "name", None),
|
||||||
|
"section_number": getattr(section, "section", None),
|
||||||
|
"module_id": getattr(module, "id", None),
|
||||||
|
"module_name": getattr(module, "name", None),
|
||||||
|
"module_type": getattr(module, "modname", None),
|
||||||
|
"activity_type": mtype,
|
||||||
|
"activity_instance": getattr(module, "instance", None),
|
||||||
|
"description": desc,
|
||||||
|
"time_created": getattr(module, "timecreated", None),
|
||||||
|
"time_modified": getattr(module, "timemodified", None),
|
||||||
|
"added": getattr(module, "added", None),
|
||||||
|
"visible": getattr(module, "visible", None),
|
||||||
|
"groupmode": getattr(module, "groupmode", None),
|
||||||
|
}
|
||||||
|
|
||||||
return Document(
|
return Document(
|
||||||
id=f"moodle_{mtype}_{module.id}",
|
id=f"moodle_{mtype}_{module.id}",
|
||||||
source="moodle",
|
source="moodle",
|
||||||
@ -334,6 +454,7 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
blob=blob,
|
blob=blob,
|
||||||
doc_updated_at=datetime.fromtimestamp(ts or 0, tz=timezone.utc),
|
doc_updated_at=datetime.fromtimestamp(ts or 0, tz=timezone.utc),
|
||||||
size_bytes=len(blob),
|
size_bytes=len(blob),
|
||||||
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _process_book(self, course, section, module) -> Optional[Document]:
|
def _process_book(self, course, section, module) -> Optional[Document]:
|
||||||
@ -342,8 +463,10 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
|
|
||||||
contents = module.contents
|
contents = module.contents
|
||||||
chapters = [
|
chapters = [
|
||||||
c for c in contents
|
c
|
||||||
if getattr(c, "fileurl", None) and os.path.basename(c.filename) == "index.html"
|
for c in contents
|
||||||
|
if getattr(c, "fileurl", None)
|
||||||
|
and os.path.basename(c.filename) == "index.html"
|
||||||
]
|
]
|
||||||
if not chapters:
|
if not chapters:
|
||||||
return None
|
return None
|
||||||
@ -356,17 +479,54 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
)
|
)
|
||||||
|
|
||||||
markdown_parts = [f"# {module.name}\n"]
|
markdown_parts = [f"# {module.name}\n"]
|
||||||
|
chapter_info = []
|
||||||
|
|
||||||
for ch in chapters:
|
for ch in chapters:
|
||||||
try:
|
try:
|
||||||
resp = rl_requests.get(self._add_token_to_url(ch.fileurl), timeout=60)
|
resp = rl_requests.get(self._add_token_to_url(ch.fileurl), timeout=60)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
html = resp.content.decode("utf-8", errors="ignore")
|
html = resp.content.decode("utf-8", errors="ignore")
|
||||||
markdown_parts.append(md(html) + "\n\n---\n")
|
markdown_parts.append(md(html) + "\n\n---\n")
|
||||||
|
|
||||||
|
# Collect chapter information for metadata
|
||||||
|
chapter_info.append(
|
||||||
|
{
|
||||||
|
"chapter_id": getattr(ch, "chapterid", None),
|
||||||
|
"title": getattr(ch, "title", None),
|
||||||
|
"filename": getattr(ch, "filename", None),
|
||||||
|
"fileurl": getattr(ch, "fileurl", None),
|
||||||
|
"time_created": getattr(ch, "timecreated", None),
|
||||||
|
"time_modified": getattr(ch, "timemodified", None),
|
||||||
|
"size": getattr(ch, "filesize", None),
|
||||||
|
}
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._log_error(f"processing book chapter {ch.filename}", e)
|
self._log_error(f"processing book chapter {ch.filename}", e)
|
||||||
|
|
||||||
blob = "\n".join(markdown_parts).encode("utf-8")
|
blob = "\n".join(markdown_parts).encode("utf-8")
|
||||||
semantic_id = f"{course.fullname} / {section.name} / {module.name}"
|
semantic_id = f"{course.fullname} / {section.name} / {module.name}"
|
||||||
|
|
||||||
|
# Create metadata dictionary with relevant information
|
||||||
|
metadata = {
|
||||||
|
"moodle_url": self.moodle_url,
|
||||||
|
"course_id": getattr(course, "id", None),
|
||||||
|
"course_name": getattr(course, "fullname", None),
|
||||||
|
"course_shortname": getattr(course, "shortname", None),
|
||||||
|
"section_id": getattr(section, "id", None),
|
||||||
|
"section_name": getattr(section, "name", None),
|
||||||
|
"section_number": getattr(section, "section", None),
|
||||||
|
"module_id": getattr(module, "id", None),
|
||||||
|
"module_name": getattr(module, "name", None),
|
||||||
|
"module_type": getattr(module, "modname", None),
|
||||||
|
"book_id": getattr(module, "instance", None),
|
||||||
|
"chapter_count": len(chapters),
|
||||||
|
"chapters": chapter_info,
|
||||||
|
"time_created": getattr(module, "timecreated", None),
|
||||||
|
"time_modified": getattr(module, "timemodified", None),
|
||||||
|
"visible": getattr(module, "visible", None),
|
||||||
|
"groupmode": getattr(module, "groupmode", None),
|
||||||
|
}
|
||||||
|
|
||||||
return Document(
|
return Document(
|
||||||
id=f"moodle_book_{module.id}",
|
id=f"moodle_book_{module.id}",
|
||||||
source="moodle",
|
source="moodle",
|
||||||
@ -375,4 +535,5 @@ class MoodleConnector(LoadConnector, PollConnector):
|
|||||||
blob=blob,
|
blob=blob,
|
||||||
doc_updated_at=datetime.fromtimestamp(latest_ts or 0, tz=timezone.utc),
|
doc_updated_at=datetime.fromtimestamp(latest_ts or 0, tz=timezone.utc),
|
||||||
size_bytes=len(blob),
|
size_bytes=len(blob),
|
||||||
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user