Revert "Feat: github connector (#12292)"

This reverts commit f099bc1236.
2025-12-30 00:32:30 +08:00 · 2025-12-29 17:06:28 +08:00
parent c3ae1aaecd
commit 17dac3ff4f
10 changed files with 9 additions and 1322 deletions
--- a/common/data_source/config.py
+++ b/common/data_source/config.py
@ -234,8 +234,6 @@ _REPLACEMENT_EXPANSIONS = "body.view.value"

 BOX_WEB_OAUTH_REDIRECT_URI = os.environ.get("BOX_WEB_OAUTH_REDIRECT_URI", "http://localhost:9380/v1/connector/box/oauth/web/callback")

-GITHUB_CONNECTOR_BASE_URL = os.environ.get("GITHUB_CONNECTOR_BASE_URL") or None
-
 class HtmlBasedConnectorTransformLinksStrategy(str, Enum):
    # remove links entirely
    STRIP = "strip"
--- a/common/data_source/connector_runner.py
+++ b/common/data_source/connector_runner.py
@ -1,217 +0,0 @@
-import sys
-import time
-import logging
-from collections.abc import Generator
-from datetime import datetime
-from typing import Generic
-from typing import TypeVar
-from common.data_source.interfaces import (
-    BaseConnector,
-    CheckpointedConnector,
-    CheckpointedConnectorWithPermSync,
-    CheckpointOutput,
-    LoadConnector,
-    PollConnector,
-)
-from common.data_source.models import ConnectorCheckpoint, ConnectorFailure, Document
-
-
-TimeRange = tuple[datetime, datetime]
-
-CT = TypeVar("CT", bound=ConnectorCheckpoint)
-
-
-def batched_doc_ids(
-    checkpoint_connector_generator: CheckpointOutput[CT],
-    batch_size: int,
-) -> Generator[set[str], None, None]:
-    batch: set[str] = set()
-    for document, failure, next_checkpoint in CheckpointOutputWrapper[CT]()(
-        checkpoint_connector_generator
-    ):
-        if document is not None:
-            batch.add(document.id)
-        elif (
-            failure and failure.failed_document and failure.failed_document.document_id
-        ):
-            batch.add(failure.failed_document.document_id)
-
-        if len(batch) >= batch_size:
-            yield batch
-            batch = set()
-    if len(batch) > 0:
-        yield batch
-
-
-class CheckpointOutputWrapper(Generic[CT]):
-    """
-    Wraps a CheckpointOutput generator to give things back in a more digestible format,
-    specifically for Document outputs.
-    The connector format is easier for the connector implementor (e.g. it enforces exactly
-    one new checkpoint is returned AND that the checkpoint is at the end), thus the different
-    formats.
-    """
-
-    def __init__(self) -> None:
-        self.next_checkpoint: CT | None = None
-
-    def __call__(
-        self,
-        checkpoint_connector_generator: CheckpointOutput[CT],
-    ) -> Generator[
-        tuple[Document | None, ConnectorFailure | None, CT | None],
-        None,
-        None,
-    ]:
-        # grabs the final return value and stores it in the `next_checkpoint` variable
-        def _inner_wrapper(
-            checkpoint_connector_generator: CheckpointOutput[CT],
-        ) -> CheckpointOutput[CT]:
-            self.next_checkpoint = yield from checkpoint_connector_generator
-            return self.next_checkpoint  # not used
-
-        for document_or_failure in _inner_wrapper(checkpoint_connector_generator):
-            if isinstance(document_or_failure, Document):
-                yield document_or_failure, None, None
-            elif isinstance(document_or_failure, ConnectorFailure):
-                yield None, document_or_failure, None
-            else:
-                raise ValueError(
-                    f"Invalid document_or_failure type: {type(document_or_failure)}"
-                )
-
-        if self.next_checkpoint is None:
-            raise RuntimeError(
-                "Checkpoint is None. This should never happen - the connector should always return a checkpoint."
-            )
-
-        yield None, None, self.next_checkpoint
-
-
-class ConnectorRunner(Generic[CT]):
-    """
-    Handles:
-        - Batching
-        - Additional exception logging
-        - Combining different connector types to a single interface
-    """
-
-    def __init__(
-        self,
-        connector: BaseConnector,
-        batch_size: int,
-        # cannot be True for non-checkpointed connectors
-        include_permissions: bool,
-        time_range: TimeRange | None = None,
-    ):
-        if not isinstance(connector, CheckpointedConnector) and include_permissions:
-            raise ValueError(
-                "include_permissions cannot be True for non-checkpointed connectors"
-            )
-
-        self.connector = connector
-        self.time_range = time_range
-        self.batch_size = batch_size
-        self.include_permissions = include_permissions
-
-        self.doc_batch: list[Document] = []
-
-    def run(self, checkpoint: CT) -> Generator[
-        tuple[list[Document] | None, ConnectorFailure | None, CT | None],
-        None,
-        None,
-    ]:
-        """Adds additional exception logging to the connector."""
-        try:
-            if isinstance(self.connector, CheckpointedConnector):
-                if self.time_range is None:
-                    raise ValueError("time_range is required for CheckpointedConnector")
-
-                start = time.monotonic()
-                if self.include_permissions:
-                    if not isinstance(
-                        self.connector, CheckpointedConnectorWithPermSync
-                    ):
-                        raise ValueError(
-                            "Connector does not support permission syncing"
-                        )
-                    load_from_checkpoint = (
-                        self.connector.load_from_checkpoint_with_perm_sync
-                    )
-                else:
-                    load_from_checkpoint = self.connector.load_from_checkpoint
-                checkpoint_connector_generator = load_from_checkpoint(
-                    start=self.time_range[0].timestamp(),
-                    end=self.time_range[1].timestamp(),
-                    checkpoint=checkpoint,
-                )
-                next_checkpoint: CT | None = None
-                # this is guaranteed to always run at least once with next_checkpoint being non-None
-                for document, failure, next_checkpoint in CheckpointOutputWrapper[CT]()(
-                    checkpoint_connector_generator
-                ):
-                    if document is not None and isinstance(document, Document):
-                        self.doc_batch.append(document)
-
-                    if failure is not None:
-                        yield None, failure, None
-
-                    if len(self.doc_batch) >= self.batch_size:
-                        yield self.doc_batch, None, None
-                        self.doc_batch = []
-
-                # yield remaining documents
-                if len(self.doc_batch) > 0:
-                    yield self.doc_batch, None, None
-                    self.doc_batch = []
-
-                yield None, None, next_checkpoint
-
-                logging.debug(
-                    f"Connector took {time.monotonic() - start} seconds to get to the next checkpoint."
-                )
-
-            else:
-                finished_checkpoint = self.connector.build_dummy_checkpoint()
-                finished_checkpoint.has_more = False
-
-                if isinstance(self.connector, PollConnector):
-                    if self.time_range is None:
-                        raise ValueError("time_range is required for PollConnector")
-
-                    for document_batch in self.connector.poll_source(
-                        start=self.time_range[0].timestamp(),
-                        end=self.time_range[1].timestamp(),
-                    ):
-                        yield document_batch, None, None
-
-                    yield None, None, finished_checkpoint
-                elif isinstance(self.connector, LoadConnector):
-                    for document_batch in self.connector.load_from_state():
-                        yield document_batch, None, None
-
-                    yield None, None, finished_checkpoint
-                else:
-                    raise ValueError(f"Invalid connector. type: {type(self.connector)}")
-        except Exception:
-            exc_type, _, exc_traceback = sys.exc_info()
-
-            # Traverse the traceback to find the last frame where the exception was raised
-            tb = exc_traceback
-            if tb is None:
-                logging.error("No traceback found for exception")
-                raise
-
-            while tb.tb_next:
-                tb = tb.tb_next  # Move to the next frame in the traceback
-
-            # Get the local variables from the frame where the exception occurred
-            local_vars = tb.tb_frame.f_locals
-            local_vars_str = "\n".join(
-                f"{key}: {value}" for key, value in local_vars.items()
-            )
-            logging.error(
-                f"Error in connector. type: {exc_type};\n"
-                f"local_vars below -> \n{local_vars_str[:1024]}"
-            )
-            raise
--- a/common/data_source/github/init.py
+++ b/common/data_source/github/init.py
--- a/common/data_source/github/connector.py
+++ b/common/data_source/github/connector.py
@ -1,954 +0,0 @@
-import copy
-import logging
-from collections.abc import Callable
-from collections.abc import Generator
-from datetime import datetime
-from datetime import timedelta
-from datetime import timezone
-from enum import Enum
-from typing import Any
-from typing import cast
-
-from github import Github
-from github import RateLimitExceededException
-from github import Repository
-from github.GithubException import GithubException
-from github.Issue import Issue
-from github.NamedUser import NamedUser
-from github.PaginatedList import PaginatedList
-from github.PullRequest import PullRequest
-from pydantic import BaseModel
-from typing_extensions import override
-
-from common.data_source.config import DocumentSource, GITHUB_CONNECTOR_BASE_URL
-from common.data_source.exceptions import (
-    ConnectorMissingCredentialError,
-    ConnectorValidationError,
-    CredentialExpiredError,
-    InsufficientPermissionsError,
-    UnexpectedValidationError,
-)
-from common.data_source.interfaces import CheckpointedConnectorWithPermSync, CheckpointOutput
-from common.data_source.models import (
-    ConnectorCheckpoint,
-    ConnectorFailure,
-    Document,
-    DocumentFailure,
-    ExternalAccess,
-    SecondsSinceUnixEpoch,
-    TextSection,
-)
-from common.data_source.connector_runner import ConnectorRunner
-from .models import SerializedRepository
-from .rate_limit_utils import sleep_after_rate_limit_exception
-from .utils import deserialize_repository
-from .utils import get_external_access_permission
-
-ITEMS_PER_PAGE = 100
-CURSOR_LOG_FREQUENCY = 50
-
-_MAX_NUM_RATE_LIMIT_RETRIES = 5
-
-ONE_DAY = timedelta(days=1)
-SLIM_BATCH_SIZE = 100
-# Cases
-# X (from start) standard run, no fallback to cursor-based pagination
-# X (from start) standard run errors, fallback to cursor-based pagination
-#  X error in the middle of a page
-#  X no errors: run to completion
-# X (from checkpoint) standard run, no fallback to cursor-based pagination
-# X (from checkpoint) continue from cursor-based pagination
-#  - retrying
-#  - no retrying
-
-# things to check:
-# checkpoint state on return
-# checkpoint progress (no infinite loop)
-
-
-class DocMetadata(BaseModel):
-    repo: str
-
-
-def get_nextUrl_key(pag_list: PaginatedList[PullRequest | Issue]) -> str:
-    if "_PaginatedList__nextUrl" in pag_list.__dict__:
-        return "_PaginatedList__nextUrl"
-    for key in pag_list.__dict__:
-        if "__nextUrl" in key:
-            return key
-    for key in pag_list.__dict__:
-        if "nextUrl" in key:
-            return key
-    return ""
-
-
-def get_nextUrl(
-    pag_list: PaginatedList[PullRequest | Issue], nextUrl_key: str
-) -> str | None:
-    return getattr(pag_list, nextUrl_key) if nextUrl_key else None
-
-
-def set_nextUrl(
-    pag_list: PaginatedList[PullRequest | Issue], nextUrl_key: str, nextUrl: str
-) -> None:
-    if nextUrl_key:
-        setattr(pag_list, nextUrl_key, nextUrl)
-    elif nextUrl:
-        raise ValueError("Next URL key not found: " + str(pag_list.__dict__))
-
-
-def _paginate_until_error(
-    git_objs: Callable[[], PaginatedList[PullRequest | Issue]],
-    cursor_url: str | None,
-    prev_num_objs: int,
-    cursor_url_callback: Callable[[str | None, int], None],
-    retrying: bool = False,
-) -> Generator[PullRequest | Issue, None, None]:
-    num_objs = prev_num_objs
-    pag_list = git_objs()
-    nextUrl_key = get_nextUrl_key(pag_list)
-    if cursor_url:
-        set_nextUrl(pag_list, nextUrl_key, cursor_url)
-    elif retrying:
-        # if we are retrying, we want to skip the objects retrieved
-        # over previous calls. Unfortunately, this WILL retrieve all
-        # pages before the one we are resuming from, so we really
-        # don't want this case to be hit often
-        logging.warning(
-            "Retrying from a previous cursor-based pagination call. "
-            "This will retrieve all pages before the one we are resuming from, "
-            "which may take a while and consume many API calls."
-        )
-        pag_list = cast(PaginatedList[PullRequest | Issue], pag_list[prev_num_objs:])
-        num_objs = 0
-
-    try:
-        # this for loop handles cursor-based pagination
-        for issue_or_pr in pag_list:
-            num_objs += 1
-            yield issue_or_pr
-            # used to store the current cursor url in the checkpoint. This value
-            # is updated during iteration over pag_list.
-            cursor_url_callback(get_nextUrl(pag_list, nextUrl_key), num_objs)
-
-            if num_objs % CURSOR_LOG_FREQUENCY == 0:
-                logging.info(
-                    f"Retrieved {num_objs} objects with current cursor url: {get_nextUrl(pag_list, nextUrl_key)}"
-                )
-
-    except Exception as e:
-        logging.exception(f"Error during cursor-based pagination: {e}")
-        if num_objs - prev_num_objs > 0:
-            raise
-
-        if get_nextUrl(pag_list, nextUrl_key) is not None and not retrying:
-            logging.info(
-                "Assuming that this error is due to cursor "
-                "expiration because no objects were retrieved. "
-                "Retrying from the first page."
-            )
-            yield from _paginate_until_error(
-                git_objs, None, prev_num_objs, cursor_url_callback, retrying=True
-            )
-            return
-
-        # for no cursor url or if we reach this point after a retry, raise the error
-        raise
-
-
-def _get_batch_rate_limited(
-    # We pass in a callable because we want git_objs to produce a fresh
-    # PaginatedList each time it's called to avoid using the same object for cursor-based pagination
-    # from a partial offset-based pagination call.
-    git_objs: Callable[[], PaginatedList],
-    page_num: int,
-    cursor_url: str | None,
-    prev_num_objs: int,
-    cursor_url_callback: Callable[[str | None, int], None],
-    github_client: Github,
-    attempt_num: int = 0,
-) -> Generator[PullRequest | Issue, None, None]:
-    if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
-        raise RuntimeError(
-            "Re-tried fetching batch too many times. Something is going wrong with fetching objects from Github"
-        )
-    try:
-        if cursor_url:
-            # when this is set, we are resuming from an earlier
-            # cursor-based pagination call.
-            yield from _paginate_until_error(
-                git_objs, cursor_url, prev_num_objs, cursor_url_callback
-            )
-            return
-        objs = list(git_objs().get_page(page_num))
-        # fetch all data here to disable lazy loading later
-        # this is needed to capture the rate limit exception here (if one occurs)
-        for obj in objs:
-            if hasattr(obj, "raw_data"):
-                getattr(obj, "raw_data")
-        yield from objs
-    except RateLimitExceededException:
-        sleep_after_rate_limit_exception(github_client)
-        yield from _get_batch_rate_limited(
-            git_objs,
-            page_num,
-            cursor_url,
-            prev_num_objs,
-            cursor_url_callback,
-            github_client,
-            attempt_num + 1,
-        )
-    except GithubException as e:
-        if not (
-            e.status == 422
-            and (
-                "cursor" in (e.message or "")
-                or "cursor" in (e.data or {}).get("message", "")
-            )
-        ):
-            raise
-        # Fallback to a cursor-based pagination strategy
-        # This can happen for "large datasets," but there's no documentation
-        # On the error on the web as far as we can tell.
-        # Error message:
-        # "Pagination with the page parameter is not supported for large datasets,
-        # please use cursor based pagination (after/before)"
-        yield from _paginate_until_error(
-            git_objs, cursor_url, prev_num_objs, cursor_url_callback
-        )
-
-
-def _get_userinfo(user: NamedUser) -> dict[str, str]:
-    def _safe_get(attr_name: str) -> str | None:
-        try:
-            return cast(str | None, getattr(user, attr_name))
-        except GithubException:
-            logging.debug(f"Error getting {attr_name} for user")
-            return None
-
-    return {
-        k: v
-        for k, v in {
-            "login": _safe_get("login"),
-            "name": _safe_get("name"),
-            "email": _safe_get("email"),
-        }.items()
-        if v is not None
-    }
-
-
-def _convert_pr_to_document(
-    pull_request: PullRequest, repo_external_access: ExternalAccess | None
-) -> Document:
-    repo_name = pull_request.base.repo.full_name if pull_request.base else ""
-    doc_metadata = DocMetadata(repo=repo_name)
-    return Document(
-        id=pull_request.html_url,
-        sections=[
-            TextSection(link=pull_request.html_url, text=pull_request.body or "")
-        ],
-        external_access=repo_external_access,
-        source=DocumentSource.GITHUB,
-        semantic_identifier=f"{pull_request.number}: {pull_request.title}",
-        # updated_at is UTC time but is timezone unaware, explicitly add UTC
-        # as there is logic in indexing to prevent wrong timestamped docs
-        # due to local time discrepancies with UTC
-        doc_updated_at=(
-            pull_request.updated_at.replace(tzinfo=timezone.utc)
-            if pull_request.updated_at
-            else None
-        ),
-        # this metadata is used in perm sync
-        doc_metadata=doc_metadata.model_dump(),
-        metadata={
-            k: [str(vi) for vi in v] if isinstance(v, list) else str(v)
-            for k, v in {
-                "object_type": "PullRequest",
-                "id": pull_request.number,
-                "merged": pull_request.merged,
-                "state": pull_request.state,
-                "user": _get_userinfo(pull_request.user) if pull_request.user else None,
-                "assignees": [
-                    _get_userinfo(assignee) for assignee in pull_request.assignees
-                ],
-                "repo": (
-                    pull_request.base.repo.full_name if pull_request.base else None
-                ),
-                "num_commits": str(pull_request.commits),
-                "num_files_changed": str(pull_request.changed_files),
-                "labels": [label.name for label in pull_request.labels],
-                "created_at": (
-                    pull_request.created_at.replace(tzinfo=timezone.utc)
-                    if pull_request.created_at
-                    else None
-                ),
-                "updated_at": (
-                    pull_request.updated_at.replace(tzinfo=timezone.utc)
-                    if pull_request.updated_at
-                    else None
-                ),
-                "closed_at": (
-                    pull_request.closed_at.replace(tzinfo=timezone.utc)
-                    if pull_request.closed_at
-                    else None
-                ),
-                "merged_at": (
-                    pull_request.merged_at.replace(tzinfo=timezone.utc)
-                    if pull_request.merged_at
-                    else None
-                ),
-                "merged_by": (
-                    _get_userinfo(pull_request.merged_by)
-                    if pull_request.merged_by
-                    else None
-                ),
-            }.items()
-            if v is not None
-        },
-    )
-
-
-def _fetch_issue_comments(issue: Issue) -> str:
-    comments = issue.get_comments()
-    return "\nComment: ".join(comment.body for comment in comments)
-
-
-def _convert_issue_to_document(
-    issue: Issue, repo_external_access: ExternalAccess | None
-) -> Document:
-    repo_name = issue.repository.full_name if issue.repository else ""
-    doc_metadata = DocMetadata(repo=repo_name)
-    return Document(
-        id=issue.html_url,
-        sections=[TextSection(link=issue.html_url, text=issue.body or "")],
-        source=DocumentSource.GITHUB,
-        external_access=repo_external_access,
-        semantic_identifier=f"{issue.number}: {issue.title}",
-        # updated_at is UTC time but is timezone unaware
-        doc_updated_at=issue.updated_at.replace(tzinfo=timezone.utc),
-        # this metadata is used in perm sync
-        doc_metadata=doc_metadata.model_dump(),
-        metadata={
-            k: [str(vi) for vi in v] if isinstance(v, list) else str(v)
-            for k, v in {
-                "object_type": "Issue",
-                "id": issue.number,
-                "state": issue.state,
-                "user": _get_userinfo(issue.user) if issue.user else None,
-                "assignees": [_get_userinfo(assignee) for assignee in issue.assignees],
-                "repo": issue.repository.full_name if issue.repository else None,
-                "labels": [label.name for label in issue.labels],
-                "created_at": (
-                    issue.created_at.replace(tzinfo=timezone.utc)
-                    if issue.created_at
-                    else None
-                ),
-                "updated_at": (
-                    issue.updated_at.replace(tzinfo=timezone.utc)
-                    if issue.updated_at
-                    else None
-                ),
-                "closed_at": (
-                    issue.closed_at.replace(tzinfo=timezone.utc)
-                    if issue.closed_at
-                    else None
-                ),
-                "closed_by": (
-                    _get_userinfo(issue.closed_by) if issue.closed_by else None
-                ),
-            }.items()
-            if v is not None
-        },
-    )
-
-
-class GithubConnectorStage(Enum):
-    START = "start"
-    PRS = "prs"
-    ISSUES = "issues"
-
-
-class GithubConnectorCheckpoint(ConnectorCheckpoint):
-    stage: GithubConnectorStage
-    curr_page: int
-
-    cached_repo_ids: list[int] | None = None
-    cached_repo: SerializedRepository | None = None
-
-    # Used for the fallback cursor-based pagination strategy
-    num_retrieved: int
-    cursor_url: str | None = None
-
-    def reset(self) -> None:
-        """
-        Resets curr_page, num_retrieved, and cursor_url to their initial values (0, 0, None)
-        """
-        self.curr_page = 0
-        self.num_retrieved = 0
-        self.cursor_url = None
-
-
-def make_cursor_url_callback(
-    checkpoint: GithubConnectorCheckpoint,
-) -> Callable[[str | None, int], None]:
-    def cursor_url_callback(cursor_url: str | None, num_objs: int) -> None:
-        # we want to maintain the old cursor url so code after retrieval
-        # can determine that we are using the fallback cursor-based pagination strategy
-        if cursor_url:
-            checkpoint.cursor_url = cursor_url
-        checkpoint.num_retrieved = num_objs
-
-    return cursor_url_callback
-
-
-class GithubConnector(CheckpointedConnectorWithPermSync[GithubConnectorCheckpoint]):
-    def __init__(
-        self,
-        repo_owner: str,
-        repositories: str | None = None,
-        state_filter: str = "all",
-        include_prs: bool = True,
-        include_issues: bool = False,
-    ) -> None:
-        self.repo_owner = repo_owner
-        self.repositories = repositories
-        self.state_filter = state_filter
-        self.include_prs = include_prs
-        self.include_issues = include_issues
-        self.github_client: Github | None = None
-
-    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
-        # defaults to 30 items per page, can be set to as high as 100
-        self.github_client = (
-            Github(
-                credentials["github_access_token"],
-                base_url=GITHUB_CONNECTOR_BASE_URL,
-                per_page=ITEMS_PER_PAGE,
-            )
-            if GITHUB_CONNECTOR_BASE_URL
-            else Github(credentials["github_access_token"], per_page=ITEMS_PER_PAGE)
-        )
-        return None
-
-    def get_github_repo(
-        self, github_client: Github, attempt_num: int = 0
-    ) -> Repository.Repository:
-        if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
-            raise RuntimeError(
-                "Re-tried fetching repo too many times. Something is going wrong with fetching objects from Github"
-            )
-
-        try:
-            return github_client.get_repo(f"{self.repo_owner}/{self.repositories}")
-        except RateLimitExceededException:
-            sleep_after_rate_limit_exception(github_client)
-            return self.get_github_repo(github_client, attempt_num + 1)
-
-    def get_github_repos(
-        self, github_client: Github, attempt_num: int = 0
-    ) -> list[Repository.Repository]:
-        """Get specific repositories based on comma-separated repo_name string."""
-        if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
-            raise RuntimeError(
-                "Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github"
-            )
-
-        try:
-            repos = []
-            # Split repo_name by comma and strip whitespace
-            repo_names = [
-                name.strip() for name in (cast(str, self.repositories)).split(",")
-            ]
-
-            for repo_name in repo_names:
-                if repo_name:  # Skip empty strings
-                    try:
-                        repo = github_client.get_repo(f"{self.repo_owner}/{repo_name}")
-                        repos.append(repo)
-                    except GithubException as e:
-                        logging.warning(
-                            f"Could not fetch repo {self.repo_owner}/{repo_name}: {e}"
-                        )
-
-            return repos
-        except RateLimitExceededException:
-            sleep_after_rate_limit_exception(github_client)
-            return self.get_github_repos(github_client, attempt_num + 1)
-
-    def get_all_repos(
-        self, github_client: Github, attempt_num: int = 0
-    ) -> list[Repository.Repository]:
-        if attempt_num > _MAX_NUM_RATE_LIMIT_RETRIES:
-            raise RuntimeError(
-                "Re-tried fetching repos too many times. Something is going wrong with fetching objects from Github"
-            )
-
-        try:
-            # Try to get organization first
-            try:
-                org = github_client.get_organization(self.repo_owner)
-                return list(org.get_repos())
-
-            except GithubException:
-                # If not an org, try as a user
-                user = github_client.get_user(self.repo_owner)
-                return list(user.get_repos())
-        except RateLimitExceededException:
-            sleep_after_rate_limit_exception(github_client)
-            return self.get_all_repos(github_client, attempt_num + 1)
-
-    def _pull_requests_func(
-        self, repo: Repository.Repository
-    ) -> Callable[[], PaginatedList[PullRequest]]:
-        return lambda: repo.get_pulls(
-            state=self.state_filter, sort="updated", direction="desc"
-        )
-
-    def _issues_func(
-        self, repo: Repository.Repository
-    ) -> Callable[[], PaginatedList[Issue]]:
-        return lambda: repo.get_issues(
-            state=self.state_filter, sort="updated", direction="desc"
-        )
-
-    def _fetch_from_github(
-        self,
-        checkpoint: GithubConnectorCheckpoint,
-        start: datetime | None = None,
-        end: datetime | None = None,
-        include_permissions: bool = False,
-    ) -> Generator[Document | ConnectorFailure, None, GithubConnectorCheckpoint]:
-        if self.github_client is None:
-            raise ConnectorMissingCredentialError("GitHub")
-
-        checkpoint = copy.deepcopy(checkpoint)
-
-        # First run of the connector, fetch all repos and store in checkpoint
-        if checkpoint.cached_repo_ids is None:
-            repos = []
-            if self.repositories:
-                if "," in self.repositories:
-                    # Multiple repositories specified
-                    repos = self.get_github_repos(self.github_client)
-                else:
-                    # Single repository (backward compatibility)
-                    repos = [self.get_github_repo(self.github_client)]
-            else:
-                # All repositories
-                repos = self.get_all_repos(self.github_client)
-            if not repos:
-                checkpoint.has_more = False
-                return checkpoint
-
-            curr_repo = repos.pop()
-            checkpoint.cached_repo_ids = [repo.id for repo in repos]
-            checkpoint.cached_repo = SerializedRepository(
-                id=curr_repo.id,
-                headers=curr_repo.raw_headers,
-                raw_data=curr_repo.raw_data,
-            )
-            checkpoint.stage = GithubConnectorStage.PRS
-            checkpoint.curr_page = 0
-            # save checkpoint with repo ids retrieved
-            return checkpoint
-
-        if checkpoint.cached_repo is None:
-            raise ValueError("No repo saved in checkpoint")
-
-        # Deserialize the repository from the checkpoint
-        repo = deserialize_repository(checkpoint.cached_repo, self.github_client)
-
-        cursor_url_callback = make_cursor_url_callback(checkpoint)
-        repo_external_access: ExternalAccess | None = None
-        if include_permissions:
-            repo_external_access = get_external_access_permission(
-                repo, self.github_client
-            )
-        if self.include_prs and checkpoint.stage == GithubConnectorStage.PRS:
-            logging.info(f"Fetching PRs for repo: {repo.name}")
-
-            pr_batch = _get_batch_rate_limited(
-                self._pull_requests_func(repo),
-                checkpoint.curr_page,
-                checkpoint.cursor_url,
-                checkpoint.num_retrieved,
-                cursor_url_callback,
-                self.github_client,
-            )
-            checkpoint.curr_page += 1  # NOTE: not used for cursor-based fallback
-            done_with_prs = False
-            num_prs = 0
-            pr = None
-            for pr in pr_batch:
-                num_prs += 1
-
-                # we iterate backwards in time, so at this point we stop processing prs
-                if (
-                    start is not None
-                    and pr.updated_at
-                    and pr.updated_at.replace(tzinfo=timezone.utc) < start
-                ):
-                    done_with_prs = True
-                    break
-                # Skip PRs updated after the end date
-                if (
-                    end is not None
-                    and pr.updated_at
-                    and pr.updated_at.replace(tzinfo=timezone.utc) > end
-                ):
-                    continue
-                try:
-                    yield _convert_pr_to_document(
-                        cast(PullRequest, pr), repo_external_access
-                    )
-                except Exception as e:
-                    error_msg = f"Error converting PR to document: {e}"
-                    logging.exception(error_msg)
-                    yield ConnectorFailure(
-                        failed_document=DocumentFailure(
-                            document_id=str(pr.id), document_link=pr.html_url
-                        ),
-                        failure_message=error_msg,
-                        exception=e,
-                    )
-                    continue
-
-            # If we reach this point with a cursor url in the checkpoint, we were using
-            # the fallback cursor-based pagination strategy. That strategy tries to get all
-            # PRs, so having curosr_url set means we are done with prs. However, we need to
-            # return AFTER the checkpoint reset to avoid infinite loops.
-
-            # if we found any PRs on the page and there are more PRs to get, return the checkpoint.
-            # In offset mode, while indexing without time constraints, the pr batch
-            # will be empty when we're done.
-            used_cursor = checkpoint.cursor_url is not None
-            logging.info(f"Fetched {num_prs} PRs for repo: {repo.name}")
-            if num_prs > 0 and not done_with_prs and not used_cursor:
-                return checkpoint
-
-            # if we went past the start date during the loop or there are no more
-            # prs to get, we move on to issues
-            checkpoint.stage = GithubConnectorStage.ISSUES
-            checkpoint.reset()
-
-            if used_cursor:
-                # save the checkpoint after changing stage; next run will continue from issues
-                return checkpoint
-
-        checkpoint.stage = GithubConnectorStage.ISSUES
-
-        if self.include_issues and checkpoint.stage == GithubConnectorStage.ISSUES:
-            logging.info(f"Fetching issues for repo: {repo.name}")
-
-            issue_batch = list(
-                _get_batch_rate_limited(
-                    self._issues_func(repo),
-                    checkpoint.curr_page,
-                    checkpoint.cursor_url,
-                    checkpoint.num_retrieved,
-                    cursor_url_callback,
-                    self.github_client,
-                )
-            )
-            logging.info(f"Fetched {len(issue_batch)} issues for repo: {repo.name}")
-            checkpoint.curr_page += 1
-            done_with_issues = False
-            num_issues = 0
-            for issue in issue_batch:
-                num_issues += 1
-                issue = cast(Issue, issue)
-                # we iterate backwards in time, so at this point we stop processing prs
-                if (
-                    start is not None
-                    and issue.updated_at.replace(tzinfo=timezone.utc) < start
-                ):
-                    done_with_issues = True
-                    break
-                # Skip PRs updated after the end date
-                if (
-                    end is not None
-                    and issue.updated_at.replace(tzinfo=timezone.utc) > end
-                ):
-                    continue
-
-                if issue.pull_request is not None:
-                    # PRs are handled separately
-                    continue
-
-                try:
-                    yield _convert_issue_to_document(issue, repo_external_access)
-                except Exception as e:
-                    error_msg = f"Error converting issue to document: {e}"
-                    logging.exception(error_msg)
-                    yield ConnectorFailure(
-                        failed_document=DocumentFailure(
-                            document_id=str(issue.id),
-                            document_link=issue.html_url,
-                        ),
-                        failure_message=error_msg,
-                        exception=e,
-                    )
-                    continue
-
-            logging.info(f"Fetched {num_issues} issues for repo: {repo.name}")
-            # if we found any issues on the page, and we're not done, return the checkpoint.
-            # don't return if we're using cursor-based pagination to avoid infinite loops
-            if num_issues > 0 and not done_with_issues and not checkpoint.cursor_url:
-                return checkpoint
-
-            # if we went past the start date during the loop or there are no more
-            # issues to get, we move on to the next repo
-            checkpoint.stage = GithubConnectorStage.PRS
-            checkpoint.reset()
-
-        checkpoint.has_more = len(checkpoint.cached_repo_ids) > 0
-        if checkpoint.cached_repo_ids:
-            next_id = checkpoint.cached_repo_ids.pop()
-            next_repo = self.github_client.get_repo(next_id)
-            checkpoint.cached_repo = SerializedRepository(
-                id=next_id,
-                headers=next_repo.raw_headers,
-                raw_data=next_repo.raw_data,
-            )
-            checkpoint.stage = GithubConnectorStage.PRS
-            checkpoint.reset()
-
-        if checkpoint.cached_repo_ids:
-            logging.info(
-                f"{len(checkpoint.cached_repo_ids)} repos remaining (IDs: {checkpoint.cached_repo_ids})"
-            )
-        else:
-            logging.info("No more repos remaining")
-
-        return checkpoint
-
-    def _load_from_checkpoint(
-        self,
-        start: SecondsSinceUnixEpoch,
-        end: SecondsSinceUnixEpoch,
-        checkpoint: GithubConnectorCheckpoint,
-        include_permissions: bool = False,
-    ) -> CheckpointOutput[GithubConnectorCheckpoint]:
-        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
-        # add a day for timezone safety
-        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc) + ONE_DAY
-
-        # Move start time back by 3 hours, since some Issues/PRs are getting dropped
-        # Could be due to delayed processing on GitHub side
-        # The non-updated issues since last poll will be shortcut-ed and not embedded
-        adjusted_start_datetime = start_datetime - timedelta(hours=3)
-
-        epoch = datetime.fromtimestamp(0, tz=timezone.utc)
-        if adjusted_start_datetime < epoch:
-            adjusted_start_datetime = epoch
-
-        return self._fetch_from_github(
-            checkpoint,
-            start=adjusted_start_datetime,
-            end=end_datetime,
-            include_permissions=include_permissions,
-        )
-
-    @override
-    def load_from_checkpoint(
-        self,
-        start: SecondsSinceUnixEpoch,
-        end: SecondsSinceUnixEpoch,
-        checkpoint: GithubConnectorCheckpoint,
-    ) -> CheckpointOutput[GithubConnectorCheckpoint]:
-        return self._load_from_checkpoint(
-            start, end, checkpoint, include_permissions=False
-        )
-
-    @override
-    def load_from_checkpoint_with_perm_sync(
-        self,
-        start: SecondsSinceUnixEpoch,
-        end: SecondsSinceUnixEpoch,
-        checkpoint: GithubConnectorCheckpoint,
-    ) -> CheckpointOutput[GithubConnectorCheckpoint]:
-        return self._load_from_checkpoint(
-            start, end, checkpoint, include_permissions=True
-        )
-
-    def validate_connector_settings(self) -> None:
-        if self.github_client is None:
-            raise ConnectorMissingCredentialError("GitHub credentials not loaded.")
-
-        if not self.repo_owner:
-            raise ConnectorValidationError(
-                "Invalid connector settings: 'repo_owner' must be provided."
-            )
-
-        try:
-            if self.repositories:
-                if "," in self.repositories:
-                    # Multiple repositories specified
-                    repo_names = [name.strip() for name in self.repositories.split(",")]
-                    if not repo_names:
-                        raise ConnectorValidationError(
-                            "Invalid connector settings: No valid repository names provided."
-                        )
-
-                    # Validate at least one repository exists and is accessible
-                    valid_repos = False
-                    validation_errors = []
-
-                    for repo_name in repo_names:
-                        if not repo_name:
-                            continue
-
-                        try:
-                            test_repo = self.github_client.get_repo(
-                                f"{self.repo_owner}/{repo_name}"
-                            )
-                            logging.info(
-                                f"Successfully accessed repository: {self.repo_owner}/{repo_name}"
-                            )
-                            test_repo.get_contents("")
-                            valid_repos = True
-                            # If at least one repo is valid, we can proceed
-                            break
-                        except GithubException as e:
-                            validation_errors.append(
-                                f"Repository '{repo_name}': {e.data.get('message', str(e))}"
-                            )
-
-                    if not valid_repos:
-                        error_msg = (
-                            "None of the specified repositories could be accessed: "
-                        )
-                        error_msg += ", ".join(validation_errors)
-                        raise ConnectorValidationError(error_msg)
-                else:
-                    # Single repository (backward compatibility)
-                    test_repo = self.github_client.get_repo(
-                        f"{self.repo_owner}/{self.repositories}"
-                    )
-                    test_repo.get_contents("")
-            else:
-                # Try to get organization first
-                try:
-                    org = self.github_client.get_organization(self.repo_owner)
-                    total_count = org.get_repos().totalCount
-                    if total_count == 0:
-                        raise ConnectorValidationError(
-                            f"Found no repos for organization: {self.repo_owner}. "
-                            "Does the credential have the right scopes?"
-                        )
-                except GithubException as e:
-                    # Check for missing SSO
-                    MISSING_SSO_ERROR_MESSAGE = "You must grant your Personal Access token access to this organization".lower()
-                    if MISSING_SSO_ERROR_MESSAGE in str(e).lower():
-                        SSO_GUIDE_LINK = (
-                            "https://docs.github.com/en/enterprise-cloud@latest/authentication/"
-                            "authenticating-with-saml-single-sign-on/"
-                            "authorizing-a-personal-access-token-for-use-with-saml-single-sign-on"
-                        )
-                        raise ConnectorValidationError(
-                            f"Your GitHub token is missing authorization to access the "
-                            f"`{self.repo_owner}` organization. Please follow the guide to "
-                            f"authorize your token: {SSO_GUIDE_LINK}"
-                        )
-                    # If not an org, try as a user
-                    user = self.github_client.get_user(self.repo_owner)
-
-                    # Check if we can access any repos
-                    total_count = user.get_repos().totalCount
-                    if total_count == 0:
-                        raise ConnectorValidationError(
-                            f"Found no repos for user: {self.repo_owner}. "
-                            "Does the credential have the right scopes?"
-                        )
-
-        except RateLimitExceededException:
-            raise UnexpectedValidationError(
-                "Validation failed due to GitHub rate-limits being exceeded. Please try again later."
-            )
-
-        except GithubException as e:
-            if e.status == 401:
-                raise CredentialExpiredError(
-                    "GitHub credential appears to be invalid or expired (HTTP 401)."
-                )
-            elif e.status == 403:
-                raise InsufficientPermissionsError(
-                    "Your GitHub token does not have sufficient permissions for this repository (HTTP 403)."
-                )
-            elif e.status == 404:
-                if self.repositories:
-                    if "," in self.repositories:
-                        raise ConnectorValidationError(
-                            f"None of the specified GitHub repositories could be found for owner: {self.repo_owner}"
-                        )
-                    else:
-                        raise ConnectorValidationError(
-                            f"GitHub repository not found with name: {self.repo_owner}/{self.repositories}"
-                        )
-                else:
-                    raise ConnectorValidationError(
-                        f"GitHub user or organization not found: {self.repo_owner}"
-                    )
-            else:
-                raise ConnectorValidationError(
-                    f"Unexpected GitHub error (status={e.status}): {e.data}"
-                )
-
-        except Exception as exc:
-            raise Exception(
-                f"Unexpected error during GitHub settings validation: {exc}"
-            )
-
-    def validate_checkpoint_json(
-        self, checkpoint_json: str
-    ) -> GithubConnectorCheckpoint:
-        return GithubConnectorCheckpoint.model_validate_json(checkpoint_json)
-
-    def build_dummy_checkpoint(self) -> GithubConnectorCheckpoint:
-        return GithubConnectorCheckpoint(
-            stage=GithubConnectorStage.PRS, curr_page=0, has_more=True, num_retrieved=0
-        )
-
-
-if __name__ == "__main__":
-    import os
-
-    # Initialize the connector
-    connector = GithubConnector(
-        repo_owner=os.environ["REPO_OWNER"],
-        repositories=os.environ.get("REPOSITORIES"),
-    )
-    connector.load_credentials(
-        {"github_access_token": os.environ["ACCESS_TOKEN_GITHUB"]}
-    )
-
-    if connector.github_client:
-        get_external_access_permission(
-            connector.get_github_repos(connector.github_client).pop(),
-            connector.github_client,
-        )
-
-    # Create a time range from epoch to now
-    end_time = datetime.now(timezone.utc)
-    start_time = datetime.fromtimestamp(0, tz=timezone.utc)
-    time_range = (start_time, end_time)
-
-    # Initialize the runner with a batch size of 10
-    runner: ConnectorRunner[GithubConnectorCheckpoint] = ConnectorRunner(
-        connector, batch_size=10, include_permissions=False, time_range=time_range
-    )
-
-    # Get initial checkpoint
-    checkpoint = connector.build_dummy_checkpoint()
-
-    # Run the connector
-    while checkpoint.has_more:
-        for doc_batch, failure, next_checkpoint in runner.run(checkpoint):
-            if doc_batch:
-                print(f"Retrieved batch of {len(doc_batch)} documents")
-                for doc in doc_batch:
-                    print(f"Document: {doc.semantic_identifier}")
-            if failure:
-                print(f"Failure: {failure.failure_message}")
-            if next_checkpoint:
-                checkpoint = next_checkpoint
--- a/common/data_source/github/models.py
+++ b/common/data_source/github/models.py
@ -1,17 +0,0 @@
-from typing import Any
-
-from github import Repository
-from github.Requester import Requester
-from pydantic import BaseModel
-
-
-class SerializedRepository(BaseModel):
-    # id is part of the raw_data as well, just pulled out for convenience
-    id: int
-    headers: dict[str, str | int]
-    raw_data: dict[str, Any]
-
-    def to_Repository(self, requester: Requester) -> Repository.Repository:
-        return Repository.Repository(
-            requester, self.headers, self.raw_data, completed=True
-        )
--- a/common/data_source/github/rate_limit_utils.py
+++ b/common/data_source/github/rate_limit_utils.py
@ -1,24 +0,0 @@
-import time
-import logging
-from datetime import datetime
-from datetime import timedelta
-from datetime import timezone
-
-from github import Github
-
-
-def sleep_after_rate_limit_exception(github_client: Github) -> None:
-    """
-    Sleep until the GitHub rate limit resets.
-
-    Args:
-        github_client: The GitHub client that hit the rate limit
-    """
-    sleep_time = github_client.get_rate_limit().core.reset.replace(
-        tzinfo=timezone.utc
-    ) - datetime.now(tz=timezone.utc)
-    sleep_time += timedelta(minutes=1)  # add an extra minute just to be safe
-    logging.info(
-        "Ran into Github rate-limit. Sleeping %s seconds.", sleep_time.seconds
-    )
-    time.sleep(sleep_time.total_seconds())
--- a/common/data_source/github/utils.py
+++ b/common/data_source/github/utils.py
@ -1,46 +0,0 @@
-import logging
-from collections.abc import Callable
-from typing import cast
-
-from github import Github
-from github.Repository import Repository
-
-from common.data_source.models import ExternalAccess
-
-from .models import SerializedRepository
-
-
-def get_external_access_permission(
-    repo: Repository, github_client: Github
-) -> ExternalAccess:
-    """
-    Get the external access permission for a repository.
-    This functionality requires Enterprise Edition.
-    """
-    # RAGFlow doesn't implement the Onyx EE external-permissions system.
-    # Default to private/unknown permissions.
-    return ExternalAccess.empty()
-
-
-def deserialize_repository(
-    cached_repo: SerializedRepository, github_client: Github
-) -> Repository:
-    """
-    Deserialize a SerializedRepository back into a Repository object.
-    """
-    # Try to access the requester - different PyGithub versions may use different attribute names
-    try:
-        # Try to get the requester using getattr to avoid linter errors
-        requester = getattr(github_client, "_requester", None)
-        if requester is None:
-            requester = getattr(github_client, "_Github__requester", None)
-        if requester is None:
-            # If we can't find the requester attribute, we need to fall back to recreating the repo
-            raise AttributeError("Could not find requester attribute")
-
-        return cached_repo.to_Repository(requester)
-    except Exception as e:
-        # If all else fails, re-fetch the repo directly
-        logging.warning("Failed to deserialize repository: %s. Attempting to re-fetch.", e)
-        repo_id = cached_repo.id
-        return github_client.get_repo(repo_id)
--- a/common/data_source/interfaces.py
+++ b/common/data_source/interfaces.py
@ -237,13 +237,16 @@ class BaseConnector(abc.ABC, Generic[CT]):

    def validate_perm_sync(self) -> None:
        """
-        Permission-sync validation hook.
-
-        RAGFlow doesn't ship the Onyx EE permission-sync validation package.
-        Connectors that support permission sync should override
-        `validate_connector_settings()` as needed.
+        Don't override this; add a function to perm_sync_valid.py in the ee package
+        to do permission sync validation
        """
-        return None
+        """
+        validate_connector_settings_fn = fetch_ee_implementation_or_noop(
+            "onyx.connectors.perm_sync_valid",
+            "validate_perm_sync",
+            noop_return_value=None,
+        )
+        validate_connector_settings_fn(self)"""

    def set_allow_images(self, value: bool) -> None:
        """Implement if the underlying connector wants to skip/allow image downloading
--- a/pyproject.toml
+++ b/pyproject.toml
@ -149,7 +149,6 @@ dependencies = [
    #    "cryptography==46.0.3",
    #    "jinja2>=3.1.0",
    "pyairtable>=3.3.0",
-    "pygithub>=2.8.1",
    "asana>=5.2.2",
    "python-gitlab>=7.0.0",
 ]
--- a/uv.lock
+++ b/uv.lock
@ -5509,22 +5509,6 @@ dependencies = [
 ]
 sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/8e/aedef81641c8dca6fd0fb7294de5bed9c45f3397d67fddf755c1042c2642/PyExecJS-1.5.1.tar.gz", hash = "sha256:34cc1d070976918183ff7bdc0ad71f8157a891c92708c00c5fbbff7a769f505c", size = 13344, upload-time = "2018-01-18T04:33:55.126Z" }

-[[package]]
-name = "pygithub"
-version = "2.8.1"
-source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
-dependencies = [
-    { name = "pyjwt", extra = ["crypto"] },
-    { name = "pynacl" },
-    { name = "requests" },
-    { name = "typing-extensions" },
-    { name = "urllib3" },
-]
-sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/74/e560bdeffea72ecb26cff27f0fad548bbff5ecc51d6a155311ea7f9e4c4c/pygithub-2.8.1.tar.gz", hash = "sha256:341b7c78521cb07324ff670afd1baa2bf5c286f8d9fd302c1798ba594a5400c9", size = 2246994, upload-time = "2025-09-02T17:41:54.674Z" }
-wheels = [
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/ba/7049ce39f653f6140aac4beb53a5aaf08b4407b6a3019aae394c1c5244ff/pygithub-2.8.1-py3-none-any.whl", hash = "sha256:23a0a5bca93baef082e03411bf0ce27204c32be8bfa7abc92fe4a3e132936df0", size = 432709, upload-time = "2025-09-02T17:41:52.947Z" },
-]
-
 [[package]]
 name = "pygments"
 version = "2.19.2"
@ -5557,43 +5541,6 @@ wheels = [
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7c/4c/ad33b92b9864cbde84f259d5df035a6447f91891f5be77788e2a3892bce3/pymysql-1.1.2-py3-none-any.whl", hash = "sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9", size = 45300, upload-time = "2025-08-24T12:55:53.394Z" },
 ]

-[[package]]
-name = "pynacl"
-version = "1.6.1"
-source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
-dependencies = [
-    { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
-]
-sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/46/aeca065d227e2265125aea590c9c47fbf5786128c9400ee0eb7c88931f06/pynacl-1.6.1.tar.gz", hash = "sha256:8d361dac0309f2b6ad33b349a56cd163c98430d409fa503b10b70b3ad66eaa1d", size = 3506616, upload-time = "2025-11-10T16:02:13.195Z" }
-wheels = [
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/75/d6/4b2dca33ed512de8f54e5c6074aa06eaeb225bfbcd9b16f33a414389d6bd/pynacl-1.6.1-cp314-cp314t-macosx_10_10_universal2.whl", hash = "sha256:7d7c09749450c385301a3c20dca967a525152ae4608c0a096fe8464bfc3df93d", size = 389109, upload-time = "2025-11-10T16:01:28.79Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/30/e8dbb8ff4fa2559bbbb2187ba0d0d7faf728d17cb8396ecf4a898b22d3da/pynacl-1.6.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc734c1696ffd49b40f7c1779c89ba908157c57345cf626be2e0719488a076d3", size = 808254, upload-time = "2025-11-10T16:01:37.839Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/f9/f5449c652f31da00249638dbab065ad4969c635119094b79b17c3a4da2ab/pynacl-1.6.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3cd787ec1f5c155dc8ecf39b1333cfef41415dc96d392f1ce288b4fe970df489", size = 1407365, upload-time = "2025-11-10T16:01:40.454Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/2f/9aa5605f473b712065c0a193ebf4ad4725d7a245533f0cd7e5dcdbc78f35/pynacl-1.6.1-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b35d93ab2df03ecb3aa506be0d3c73609a51449ae0855c2e89c7ed44abde40b", size = 843842, upload-time = "2025-11-10T16:01:30.524Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/32/8d/748f0f6956e207453da8f5f21a70885fbbb2e060d5c9d78e0a4a06781451/pynacl-1.6.1-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dece79aecbb8f4640a1adbb81e4aa3bfb0e98e99834884a80eb3f33c7c30e708", size = 1445559, upload-time = "2025-11-10T16:01:33.663Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/d0/2387f0dcb0e9816f38373999e48db4728ed724d31accdd4e737473319d35/pynacl-1.6.1-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:c2228054f04bf32d558fb89bb99f163a8197d5a9bf4efa13069a7fa8d4b93fc3", size = 825791, upload-time = "2025-11-10T16:01:34.823Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/3d/ef6fb7eb072aaf15f280bc66f26ab97e7fc9efa50fb1927683013ef47473/pynacl-1.6.1-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:2b12f1b97346f177affcdfdc78875ff42637cb40dcf79484a97dae3448083a78", size = 1410843, upload-time = "2025-11-10T16:01:36.401Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e3/fb/23824a017526850ee7d8a1cc4cd1e3e5082800522c10832edbbca8619537/pynacl-1.6.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e735c3a1bdfde3834503baf1a6d74d4a143920281cb724ba29fb84c9f49b9c48", size = 801140, upload-time = "2025-11-10T16:01:42.013Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/d1/ebc6b182cb98603a35635b727d62f094bc201bf610f97a3bb6357fe688d2/pynacl-1.6.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3384a454adf5d716a9fadcb5eb2e3e72cd49302d1374a60edc531c9957a9b014", size = 1371966, upload-time = "2025-11-10T16:01:43.297Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/f4/c9d7b6f02924b1f31db546c7bd2a83a2421c6b4a8e6a2e53425c9f2802e0/pynacl-1.6.1-cp314-cp314t-win32.whl", hash = "sha256:d8615ee34d01c8e0ab3f302dcdd7b32e2bcf698ba5f4809e7cc407c8cdea7717", size = 230482, upload-time = "2025-11-10T16:01:47.688Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/2c/942477957fba22da7bf99131850e5ebdff66623418ab48964e78a7a8293e/pynacl-1.6.1-cp314-cp314t-win_amd64.whl", hash = "sha256:5f5b35c1a266f8a9ad22525049280a600b19edd1f785bccd01ae838437dcf935", size = 243232, upload-time = "2025-11-10T16:01:45.208Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/0c/bdbc0d04a53b96a765ab03aa2cf9a76ad8653d70bf1665459b9a0dedaa1c/pynacl-1.6.1-cp314-cp314t-win_arm64.whl", hash = "sha256:d984c91fe3494793b2a1fb1e91429539c6c28e9ec8209d26d25041ec599ccf63", size = 187907, upload-time = "2025-11-10T16:01:46.328Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/49/41/3cfb3b4f3519f6ff62bf71bf1722547644bcfb1b05b8fdbdc300249ba113/pynacl-1.6.1-cp38-abi3-macosx_10_10_universal2.whl", hash = "sha256:a6f9fd6d6639b1e81115c7f8ff16b8dedba1e8098d2756275d63d208b0e32021", size = 387591, upload-time = "2025-11-10T16:01:49.1Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/21/b8a6563637799f617a3960f659513eccb3fcc655d5fc2be6e9dc6416826f/pynacl-1.6.1-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e49a3f3d0da9f79c1bec2aa013261ab9fa651c7da045d376bd306cf7c1792993", size = 798866, upload-time = "2025-11-10T16:01:55.688Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e8/6c/dc38033bc3ea461e05ae8f15a81e0e67ab9a01861d352ae971c99de23e7c/pynacl-1.6.1-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7713f8977b5d25f54a811ec9efa2738ac592e846dd6e8a4d3f7578346a841078", size = 1398001, upload-time = "2025-11-10T16:01:57.101Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9f/05/3ec0796a9917100a62c5073b20c4bce7bf0fea49e99b7906d1699cc7b61b/pynacl-1.6.1-cp38-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a3becafc1ee2e5ea7f9abc642f56b82dcf5be69b961e782a96ea52b55d8a9fc", size = 834024, upload-time = "2025-11-10T16:01:50.228Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/b7/ae9982be0f344f58d9c64a1c25d1f0125c79201634efe3c87305ac7cb3e3/pynacl-1.6.1-cp38-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4ce50d19f1566c391fedc8dc2f2f5be265ae214112ebe55315e41d1f36a7f0a9", size = 1436766, upload-time = "2025-11-10T16:01:51.886Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/51/b2ccbf89cf3025a02e044dd68a365cad593ebf70f532299f2c047d2b7714/pynacl-1.6.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:543f869140f67d42b9b8d47f922552d7a967e6c116aad028c9bfc5f3f3b3a7b7", size = 817275, upload-time = "2025-11-10T16:01:53.351Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/6c/dd9ee8214edf63ac563b08a9b30f98d116942b621d39a751ac3256694536/pynacl-1.6.1-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a2bb472458c7ca959aeeff8401b8efef329b0fc44a89d3775cffe8fad3398ad8", size = 1401891, upload-time = "2025-11-10T16:01:54.587Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0f/c1/97d3e1c83772d78ee1db3053fd674bc6c524afbace2bfe8d419fd55d7ed1/pynacl-1.6.1-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:3206fa98737fdc66d59b8782cecc3d37d30aeec4593d1c8c145825a345bba0f0", size = 772291, upload-time = "2025-11-10T16:01:58.111Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4d/ca/691ff2fe12f3bb3e43e8e8df4b806f6384593d427f635104d337b8e00291/pynacl-1.6.1-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:53543b4f3d8acb344f75fd4d49f75e6572fce139f4bfb4815a9282296ff9f4c0", size = 1370839, upload-time = "2025-11-10T16:01:59.252Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/27/06fe5389d30391fce006442246062cc35773c84fbcad0209fbbf5e173734/pynacl-1.6.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:319de653ef84c4f04e045eb250e6101d23132372b0a61a7acf91bac0fda8e58c", size = 791371, upload-time = "2025-11-10T16:02:01.075Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/7a/e2bde8c9d39074a5aa046c7d7953401608d1f16f71e237f4bef3fb9d7e49/pynacl-1.6.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:262a8de6bba4aee8a66f5edf62c214b06647461c9b6b641f8cd0cb1e3b3196fe", size = 1363031, upload-time = "2025-11-10T16:02:02.656Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dd/b6/63fd77264dae1087770a1bb414bc604470f58fbc21d83822fc9c76248076/pynacl-1.6.1-cp38-abi3-win32.whl", hash = "sha256:9fd1a4eb03caf8a2fe27b515a998d26923adb9ddb68db78e35ca2875a3830dde", size = 226585, upload-time = "2025-11-10T16:02:07.116Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/12/c8/b419180f3fdb72ab4d45e1d88580761c267c7ca6eda9a20dcbcba254efe6/pynacl-1.6.1-cp38-abi3-win_amd64.whl", hash = "sha256:a569a4069a7855f963940040f35e87d8bc084cb2d6347428d5ad20550a0a1a21", size = 238923, upload-time = "2025-11-10T16:02:04.401Z" },
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/76/c34426d532e4dce7ff36e4d92cb20f4cbbd94b619964b93d24e8f5b5510f/pynacl-1.6.1-cp38-abi3-win_arm64.whl", hash = "sha256:5953e8b8cfadb10889a6e7bd0f53041a745d1b3d30111386a1bb37af171e6daf", size = 183970, upload-time = "2025-11-10T16:02:05.786Z" },
-]
-
 [[package]]
 name = "pynndescent"
 version = "0.5.13"
@ -6237,7 +6184,6 @@ dependencies = [
    { name = "pyairtable" },
    { name = "pyclipper" },
    { name = "pycryptodomex" },
-    { name = "pygithub" },
    { name = "pyobvector" },
    { name = "pyodbc" },
    { name = "pypandoc" },
@ -6369,7 +6315,6 @@ requires-dist = [
    { name = "pyairtable", specifier = ">=3.3.0" },
    { name = "pyclipper", specifier = ">=1.4.0,<2.0.0" },
    { name = "pycryptodomex", specifier = "==3.20.0" },
-    { name = "pygithub", specifier = ">=2.8.1" },
    { name = "pyobvector", specifier = "==0.2.18" },
    { name = "pyodbc", specifier = ">=5.2.0,<6.0.0" },
    { name = "pypandoc", specifier = ">=1.16" },