Fix errors (#11804)

### What problem does this PR solve? 1. typos 2. grammar errors. ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-01-30 07:06:39 +08:00 · 2025-12-08 12:21:18 +08:00
parent 5a2011e687
commit 43f51baa96
25 changed files with 53 additions and 54 deletions
--- a/common/data_source/confluence_connector.py
+++ b/common/data_source/confluence_connector.py
@ -126,7 +126,7 @@ class OnyxConfluence:
    def _renew_credentials(self) -> tuple[dict[str, Any], bool]:
        """credential_json - the current json credentials
        Returns a tuple
-        1. The up to date credentials
+        1. The up-to-date credentials
        2. True if the credentials were updated

        This method is intended to be used within a distributed lock.
@ -179,8 +179,8 @@ class OnyxConfluence:
            credential_json["confluence_refresh_token"],
        )

-        # store the new credentials to redis and to the db thru the provider
-        # redis: we use a 5 min TTL because we are given a 10 minute grace period
+        # store the new credentials to redis and to the db through the provider
+        # redis: we use a 5 min TTL because we are given a 10 minutes grace period
        # when keys are rotated. it's easier to expire the cached credentials
        # reasonably frequently rather than trying to handle strong synchronization
        # between the db and redis everywhere the credentials might be updated
@ -690,7 +690,7 @@ class OnyxConfluence:
    ) -> Iterator[dict[str, Any]]:
        """
        This function will paginate through the top level query first, then
-        paginate through all of the expansions.
+        paginate through all the expansions.
        """

        def _traverse_and_update(data: dict | list) -> None:
@ -863,7 +863,7 @@ def get_user_email_from_username__server(
            # For now, we'll just return None and log a warning. This means
            # we will keep retrying to get the email every group sync.
            email = None
-            # We may want to just return a string that indicates failure so we dont
+            # We may want to just return a string that indicates failure so we don't
            # keep retrying
            # email = f"FAILED TO GET CONFLUENCE EMAIL FOR {user_name}"
        _USER_EMAIL_CACHE[user_name] = email
@ -912,7 +912,7 @@ def extract_text_from_confluence_html(
    confluence_object: dict[str, Any],
    fetched_titles: set[str],
 ) -> str:
-    """Parse a Confluence html page and replace the 'user Id' by the real
+    """Parse a Confluence html page and replace the 'user id' by the real
        User Display Name

    Args:
--- a/common/data_source/google_drive/doc_conversion.py
+++ b/common/data_source/google_drive/doc_conversion.py
@ -76,7 +76,7 @@ ALL_ACCEPTED_FILE_EXTENSIONS = ACCEPTED_PLAIN_TEXT_FILE_EXTENSIONS + ACCEPTED_DO

 MAX_RETRIEVER_EMAILS = 20
 CHUNK_SIZE_BUFFER = 64  # extra bytes past the limit to read
-# This is not a standard valid unicode char, it is used by the docs advanced API to
+# This is not a standard valid Unicode char, it is used by the docs advanced API to
 # represent smart chips (elements like dates and doc links).
 SMART_CHIP_CHAR = "\ue907"
 WEB_VIEW_LINK_KEY = "webViewLink"
--- a/common/data_source/google_drive/file_retrieval.py
+++ b/common/data_source/google_drive/file_retrieval.py
@ -141,7 +141,7 @@ def crawl_folders_for_files(
            # Only mark a folder as done if it was fully traversed without errors
            # This usually indicates that the owner of the folder was impersonated.
            # In cases where this never happens, most likely the folder owner is
-            # not part of the google workspace in question (or for oauth, the authenticated
+            # not part of the Google Workspace in question (or for oauth, the authenticated
            # user doesn't own the folder)
            if found_files:
                update_traversed_ids_func(parent_id)
@ -232,7 +232,7 @@ def get_files_in_shared_drive(
        **kwargs,
    ):
        # If we found any files, mark this drive as traversed. When a user has access to a drive,
-        # they have access to all the files in the drive. Also not a huge deal if we re-traverse
+        # they have access to all the files in the drive. Also, not a huge deal if we re-traverse
        # empty drives.
        # NOTE: ^^ the above is not actually true due to folder restrictions:
        # https://support.google.com/a/users/answer/12380484?hl=en
--- a/common/data_source/google_drive/model.py
+++ b/common/data_source/google_drive/model.py
@ -22,7 +22,7 @@ class GDriveMimeType(str, Enum):
    MARKDOWN = "text/markdown"


-# These correspond to The major stages of retrieval for google drive.
+# These correspond to The major stages of retrieval for Google Drive.
 # The stages for the oauth flow are:
 # get_all_files_for_oauth(),
 # get_all_drive_ids(),
@ -117,7 +117,7 @@ class GoogleDriveCheckpoint(ConnectorCheckpoint):

 class RetrievedDriveFile(BaseModel):
    """
-    Describes a file that has been retrieved from google drive.
+    Describes a file that has been retrieved from Google Drive.
    user_email is the email of the user that the file was retrieved
    by impersonating. If an error worthy of being reported is encountered,
    error should be set and later propagated as a ConnectorFailure.
--- a/common/data_source/google_util/resource.py
+++ b/common/data_source/google_util/resource.py
@ -29,8 +29,8 @@ class GmailService(Resource):

 class RefreshableDriveObject:
    """
-    Running Google drive service retrieval functions
-    involves accessing methods of the service object (ie. files().list())
+    Running Google Drive service retrieval functions
+    involves accessing methods of the service object (i.e. files().list())
    which can raise a RefreshError if the access token is expired.
    This class is a wrapper that propagates the ability to refresh the access token
    and retry the final retrieval function until execute() is called.
--- a/common/data_source/html_utils.py
+++ b/common/data_source/html_utils.py
@ -120,7 +120,7 @@ def format_document_soup(
            # table is standard HTML element
            if e.name == "table":
                in_table = True
-            # tr is for rows
+            # TR is for rows
            elif e.name == "tr" and in_table:
                text += "\n"
            # td for data cell, th for header
--- a/common/data_source/interfaces.py
+++ b/common/data_source/interfaces.py
@ -395,8 +395,7 @@ class AttachmentProcessingResult(BaseModel):


 class IndexingHeartbeatInterface(ABC):
-    """Defines a callback interface to be passed to
-    to run_indexing_entrypoint."""
+    """Defines a callback interface to be passed to run_indexing_entrypoint."""

    @abstractmethod
    def should_stop(self) -> bool:
--- a/common/data_source/jira/connector.py
+++ b/common/data_source/jira/connector.py
@ -80,7 +80,7 @@ _TZ_OFFSET_PATTERN = re.compile(r"([+-])(\d{2})(:?)(\d{2})$")


 class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync):
-    """Retrieve Jira issues and emit them as markdown documents."""
+    """Retrieve Jira issues and emit them as Markdown documents."""

    def __init__(
        self,
--- a/common/data_source/models.py
+++ b/common/data_source/models.py
@ -54,8 +54,8 @@ class ExternalAccess:
        A helper function that returns an *empty* set of external user-emails and group-ids, and sets `is_public` to `False`.
        This effectively makes the document in question "private" or inaccessible to anyone else.

-        This is especially helpful to use when you are performing permission-syncing, and some document's permissions aren't able
-        to be determined (for whatever reason). Setting its `ExternalAccess` to "private" is a feasible fallback.
+        This is especially helpful to use when you are performing permission-syncing, and some document's permissions can't
+        be determined (for whatever reason). Setting its `ExternalAccess` to "private" is a feasible fallback.
        """

        return cls(