From 43f51baa96a86e23b3c955563e40cc27ea52ba1a Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Mon, 8 Dec 2025 12:21:18 +0800 Subject: [PATCH] Fix errors (#11804) ### What problem does this PR solve? 1. typos 2. grammar errors. ### Type of change - [x] Refactoring Signed-off-by: Jin Hai --- agent/canvas.py | 2 +- agent/tools/yahoofinance.py | 28 +++++++++---------- api/apps/__init__.py | 2 +- api/apps/sdk/doc.py | 4 +-- common/data_source/confluence_connector.py | 12 ++++---- .../google_drive/doc_conversion.py | 2 +- .../google_drive/file_retrieval.py | 4 +-- common/data_source/google_drive/model.py | 4 +-- common/data_source/google_util/resource.py | 4 +-- common/data_source/html_utils.py | 2 +- common/data_source/interfaces.py | 3 +- common/data_source/jira/connector.py | 2 +- common/data_source/models.py | 4 +-- common/string_utils.py | 2 +- deepdoc/README.md | 4 +-- deepdoc/vision/table_structure_recognizer.py | 2 +- docs/references/http_api_reference.md | 4 +-- docs/references/python_api_reference.md | 2 +- plugin/README.md | 2 +- .../llm_tools/bad_calculator.py | 2 +- rag/app/book.py | 2 +- rag/app/qa.py | 2 +- rag/app/tag.py | 2 +- web/src/components/json-edit/interface.ts | 2 +- web/src/services/admin.service.d.ts | 8 +++--- 25 files changed, 53 insertions(+), 54 deletions(-) diff --git a/agent/canvas.py b/agent/canvas.py index cc40fd174..1f789d952 100644 --- a/agent/canvas.py +++ b/agent/canvas.py @@ -478,7 +478,7 @@ class Canvas(Graph): }) await _run_batch(idx, to) to = len(self.path) - # post processing of components invocation + # post-processing of components invocation for i in range(idx, to): cpn = self.get_component(self.path[i]) cpn_obj = self.get_component_obj(self.path[i]) diff --git a/agent/tools/yahoofinance.py b/agent/tools/yahoofinance.py index 324dfb643..06a4a9dad 100644 --- a/agent/tools/yahoofinance.py +++ b/agent/tools/yahoofinance.py @@ -75,7 +75,7 @@ class YahooFinance(ToolBase, ABC): @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 60))) def _invoke(self, **kwargs): if self.check_if_canceled("YahooFinance processing"): - return + return None if not kwargs.get("stock_code"): self.set_output("report", "") @@ -84,33 +84,33 @@ class YahooFinance(ToolBase, ABC): last_e = "" for _ in range(self._param.max_retries+1): if self.check_if_canceled("YahooFinance processing"): - return + return None - yohoo_res = [] + yahoo_res = [] try: msft = yf.Ticker(kwargs["stock_code"]) if self.check_if_canceled("YahooFinance processing"): - return + return None if self._param.info: - yohoo_res.append("# Information:\n" + pd.Series(msft.info).to_markdown() + "\n") + yahoo_res.append("# Information:\n" + pd.Series(msft.info).to_markdown() + "\n") if self._param.history: - yohoo_res.append("# History:\n" + msft.history().to_markdown() + "\n") + yahoo_res.append("# History:\n" + msft.history().to_markdown() + "\n") if self._param.financials: - yohoo_res.append("# Calendar:\n" + pd.DataFrame(msft.calendar).to_markdown() + "\n") + yahoo_res.append("# Calendar:\n" + pd.DataFrame(msft.calendar).to_markdown() + "\n") if self._param.balance_sheet: - yohoo_res.append("# Balance sheet:\n" + msft.balance_sheet.to_markdown() + "\n") - yohoo_res.append("# Quarterly balance sheet:\n" + msft.quarterly_balance_sheet.to_markdown() + "\n") + yahoo_res.append("# Balance sheet:\n" + msft.balance_sheet.to_markdown() + "\n") + yahoo_res.append("# Quarterly balance sheet:\n" + msft.quarterly_balance_sheet.to_markdown() + "\n") if self._param.cash_flow_statement: - yohoo_res.append("# Cash flow statement:\n" + msft.cashflow.to_markdown() + "\n") - yohoo_res.append("# Quarterly cash flow statement:\n" + msft.quarterly_cashflow.to_markdown() + "\n") + yahoo_res.append("# Cash flow statement:\n" + msft.cashflow.to_markdown() + "\n") + yahoo_res.append("# Quarterly cash flow statement:\n" + msft.quarterly_cashflow.to_markdown() + "\n") if self._param.news: - yohoo_res.append("# News:\n" + pd.DataFrame(msft.news).to_markdown() + "\n") - self.set_output("report", "\n\n".join(yohoo_res)) + yahoo_res.append("# News:\n" + pd.DataFrame(msft.news).to_markdown() + "\n") + self.set_output("report", "\n\n".join(yahoo_res)) return self.output("report") except Exception as e: if self.check_if_canceled("YahooFinance processing"): - return + return None last_e = e logging.exception(f"YahooFinance error: {e}") diff --git a/api/apps/__init__.py b/api/apps/__init__.py index 4d9c7c501..9ef2f97d9 100644 --- a/api/apps/__init__.py +++ b/api/apps/__init__.py @@ -180,7 +180,7 @@ def login_user(user, remember=False, duration=None, force=False, fresh=True): user's `is_active` property is ``False``, they will not be logged in unless `force` is ``True``. - This will return ``True`` if the log in attempt succeeds, and ``False`` if + This will return ``True`` if the login attempt succeeds, and ``False`` if it fails (i.e. because the user is inactive). :param user: The user object to log in. diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index d7b918d95..0019e2a42 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -552,7 +552,7 @@ def list_docs(dataset_id, tenant_id): create_time_from = int(q.get("create_time_from", 0)) create_time_to = int(q.get("create_time_to", 0)) - # map run status (accept text or numeric) - align with API parameter + # map run status (text or numeric) - align with API parameter run_status_text_to_numeric = {"UNSTART": "0", "RUNNING": "1", "CANCEL": "2", "DONE": "3", "FAIL": "4"} run_status_converted = [run_status_text_to_numeric.get(v, v) for v in run_status] @@ -890,7 +890,7 @@ def list_chunks(tenant_id, dataset_id, document_id): type: string required: false default: "" - description: Chunk Id. + description: Chunk id. - in: header name: Authorization type: string diff --git a/common/data_source/confluence_connector.py b/common/data_source/confluence_connector.py index a7935ff6d..a057d0694 100644 --- a/common/data_source/confluence_connector.py +++ b/common/data_source/confluence_connector.py @@ -126,7 +126,7 @@ class OnyxConfluence: def _renew_credentials(self) -> tuple[dict[str, Any], bool]: """credential_json - the current json credentials Returns a tuple - 1. The up to date credentials + 1. The up-to-date credentials 2. True if the credentials were updated This method is intended to be used within a distributed lock. @@ -179,8 +179,8 @@ class OnyxConfluence: credential_json["confluence_refresh_token"], ) - # store the new credentials to redis and to the db thru the provider - # redis: we use a 5 min TTL because we are given a 10 minute grace period + # store the new credentials to redis and to the db through the provider + # redis: we use a 5 min TTL because we are given a 10 minutes grace period # when keys are rotated. it's easier to expire the cached credentials # reasonably frequently rather than trying to handle strong synchronization # between the db and redis everywhere the credentials might be updated @@ -690,7 +690,7 @@ class OnyxConfluence: ) -> Iterator[dict[str, Any]]: """ This function will paginate through the top level query first, then - paginate through all of the expansions. + paginate through all the expansions. """ def _traverse_and_update(data: dict | list) -> None: @@ -863,7 +863,7 @@ def get_user_email_from_username__server( # For now, we'll just return None and log a warning. This means # we will keep retrying to get the email every group sync. email = None - # We may want to just return a string that indicates failure so we dont + # We may want to just return a string that indicates failure so we don't # keep retrying # email = f"FAILED TO GET CONFLUENCE EMAIL FOR {user_name}" _USER_EMAIL_CACHE[user_name] = email @@ -912,7 +912,7 @@ def extract_text_from_confluence_html( confluence_object: dict[str, Any], fetched_titles: set[str], ) -> str: - """Parse a Confluence html page and replace the 'user Id' by the real + """Parse a Confluence html page and replace the 'user id' by the real User Display Name Args: diff --git a/common/data_source/google_drive/doc_conversion.py b/common/data_source/google_drive/doc_conversion.py index d697c1b2b..5ab68f9bf 100644 --- a/common/data_source/google_drive/doc_conversion.py +++ b/common/data_source/google_drive/doc_conversion.py @@ -76,7 +76,7 @@ ALL_ACCEPTED_FILE_EXTENSIONS = ACCEPTED_PLAIN_TEXT_FILE_EXTENSIONS + ACCEPTED_DO MAX_RETRIEVER_EMAILS = 20 CHUNK_SIZE_BUFFER = 64 # extra bytes past the limit to read -# This is not a standard valid unicode char, it is used by the docs advanced API to +# This is not a standard valid Unicode char, it is used by the docs advanced API to # represent smart chips (elements like dates and doc links). SMART_CHIP_CHAR = "\ue907" WEB_VIEW_LINK_KEY = "webViewLink" diff --git a/common/data_source/google_drive/file_retrieval.py b/common/data_source/google_drive/file_retrieval.py index ee6ea6b62..d2a8e0611 100644 --- a/common/data_source/google_drive/file_retrieval.py +++ b/common/data_source/google_drive/file_retrieval.py @@ -141,7 +141,7 @@ def crawl_folders_for_files( # Only mark a folder as done if it was fully traversed without errors # This usually indicates that the owner of the folder was impersonated. # In cases where this never happens, most likely the folder owner is - # not part of the google workspace in question (or for oauth, the authenticated + # not part of the Google Workspace in question (or for oauth, the authenticated # user doesn't own the folder) if found_files: update_traversed_ids_func(parent_id) @@ -232,7 +232,7 @@ def get_files_in_shared_drive( **kwargs, ): # If we found any files, mark this drive as traversed. When a user has access to a drive, - # they have access to all the files in the drive. Also not a huge deal if we re-traverse + # they have access to all the files in the drive. Also, not a huge deal if we re-traverse # empty drives. # NOTE: ^^ the above is not actually true due to folder restrictions: # https://support.google.com/a/users/answer/12380484?hl=en diff --git a/common/data_source/google_drive/model.py b/common/data_source/google_drive/model.py index d0e89c24e..d66cc21a5 100644 --- a/common/data_source/google_drive/model.py +++ b/common/data_source/google_drive/model.py @@ -22,7 +22,7 @@ class GDriveMimeType(str, Enum): MARKDOWN = "text/markdown" -# These correspond to The major stages of retrieval for google drive. +# These correspond to The major stages of retrieval for Google Drive. # The stages for the oauth flow are: # get_all_files_for_oauth(), # get_all_drive_ids(), @@ -117,7 +117,7 @@ class GoogleDriveCheckpoint(ConnectorCheckpoint): class RetrievedDriveFile(BaseModel): """ - Describes a file that has been retrieved from google drive. + Describes a file that has been retrieved from Google Drive. user_email is the email of the user that the file was retrieved by impersonating. If an error worthy of being reported is encountered, error should be set and later propagated as a ConnectorFailure. diff --git a/common/data_source/google_util/resource.py b/common/data_source/google_util/resource.py index fa598c1c0..eb060e468 100644 --- a/common/data_source/google_util/resource.py +++ b/common/data_source/google_util/resource.py @@ -29,8 +29,8 @@ class GmailService(Resource): class RefreshableDriveObject: """ - Running Google drive service retrieval functions - involves accessing methods of the service object (ie. files().list()) + Running Google Drive service retrieval functions + involves accessing methods of the service object (i.e. files().list()) which can raise a RefreshError if the access token is expired. This class is a wrapper that propagates the ability to refresh the access token and retry the final retrieval function until execute() is called. diff --git a/common/data_source/html_utils.py b/common/data_source/html_utils.py index 5f548c631..5eff62463 100644 --- a/common/data_source/html_utils.py +++ b/common/data_source/html_utils.py @@ -120,7 +120,7 @@ def format_document_soup( # table is standard HTML element if e.name == "table": in_table = True - # tr is for rows + # TR is for rows elif e.name == "tr" and in_table: text += "\n" # td for data cell, th for header diff --git a/common/data_source/interfaces.py b/common/data_source/interfaces.py index 47ad94c9b..c5c665aa2 100644 --- a/common/data_source/interfaces.py +++ b/common/data_source/interfaces.py @@ -395,8 +395,7 @@ class AttachmentProcessingResult(BaseModel): class IndexingHeartbeatInterface(ABC): - """Defines a callback interface to be passed to - to run_indexing_entrypoint.""" + """Defines a callback interface to be passed to run_indexing_entrypoint.""" @abstractmethod def should_stop(self) -> bool: diff --git a/common/data_source/jira/connector.py b/common/data_source/jira/connector.py index 4635d72f3..06a0a9069 100644 --- a/common/data_source/jira/connector.py +++ b/common/data_source/jira/connector.py @@ -80,7 +80,7 @@ _TZ_OFFSET_PATTERN = re.compile(r"([+-])(\d{2})(:?)(\d{2})$") class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync): - """Retrieve Jira issues and emit them as markdown documents.""" + """Retrieve Jira issues and emit them as Markdown documents.""" def __init__( self, diff --git a/common/data_source/models.py b/common/data_source/models.py index e956194b8..2e18b958c 100644 --- a/common/data_source/models.py +++ b/common/data_source/models.py @@ -54,8 +54,8 @@ class ExternalAccess: A helper function that returns an *empty* set of external user-emails and group-ids, and sets `is_public` to `False`. This effectively makes the document in question "private" or inaccessible to anyone else. - This is especially helpful to use when you are performing permission-syncing, and some document's permissions aren't able - to be determined (for whatever reason). Setting its `ExternalAccess` to "private" is a feasible fallback. + This is especially helpful to use when you are performing permission-syncing, and some document's permissions can't + be determined (for whatever reason). Setting its `ExternalAccess` to "private" is a feasible fallback. """ return cls( diff --git a/common/string_utils.py b/common/string_utils.py index 9d4dc8d4d..5af008933 100644 --- a/common/string_utils.py +++ b/common/string_utils.py @@ -61,7 +61,7 @@ def clean_markdown_block(text): str: Cleaned text with Markdown code block syntax removed, and stripped of surrounding whitespace """ - # Remove opening ```markdown tag with optional whitespace and newlines + # Remove opening ```Markdown tag with optional whitespace and newlines # Matches: optional whitespace + ```markdown + optional whitespace + optional newline text = re.sub(r'^\s*```markdown\s*\n?', '', text) diff --git a/deepdoc/README.md b/deepdoc/README.md index 14c7947bf..9a5e44089 100644 --- a/deepdoc/README.md +++ b/deepdoc/README.md @@ -51,7 +51,7 @@ We use vision information to resolve problems as human being. ```bash python deepdoc/vision/t_ocr.py --inputs=path_to_images_or_pdfs --output_dir=path_to_store_result ``` - The inputs could be directory to images or PDF, or a image or PDF. + The inputs could be directory to images or PDF, or an image or PDF. You can look into the folder 'path_to_store_result' where has images which demonstrate the positions of results, txt files which contain the OCR text.
@@ -78,7 +78,7 @@ We use vision information to resolve problems as human being. ```bash python deepdoc/vision/t_recognizer.py --inputs=path_to_images_or_pdfs --threshold=0.2 --mode=layout --output_dir=path_to_store_result ``` - The inputs could be directory to images or PDF, or a image or PDF. + The inputs could be directory to images or PDF, or an image or PDF. You can look into the folder 'path_to_store_result' where has images which demonstrate the detection results as following:
diff --git a/deepdoc/vision/table_structure_recognizer.py b/deepdoc/vision/table_structure_recognizer.py index cf1c79db1..0cd762576 100644 --- a/deepdoc/vision/table_structure_recognizer.py +++ b/deepdoc/vision/table_structure_recognizer.py @@ -155,7 +155,7 @@ class TableStructureRecognizer(Recognizer): while i < len(boxes): if TableStructureRecognizer.is_caption(boxes[i]): if is_english: - cap + " " + cap += " " cap += boxes[i]["text"] boxes.pop(i) i -= 1 diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index d6a62a241..12d9e5cd8 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -4013,7 +4013,7 @@ Failure: **DELETE** `/api/v1/agents/{agent_id}/sessions` -Deletes sessions of a agent by ID. +Deletes sessions of an agent by ID. #### Request @@ -4072,7 +4072,7 @@ Failure: Generates five to ten alternative question strings from the user's original query to retrieve more relevant search results. -This operation requires a `Bearer Login Token`, which typically expires with in 24 hours. You can find the it in the Request Headers in your browser easily as shown below: +This operation requires a `Bearer Login Token`, which typically expires with in 24 hours. You can find it in the Request Headers in your browser easily as shown below: ![Image](https://raw.githubusercontent.com/infiniflow/ragflow-docs/main/images/login_token.jpg) diff --git a/docs/references/python_api_reference.md b/docs/references/python_api_reference.md index 6b8e15485..26717cac9 100644 --- a/docs/references/python_api_reference.md +++ b/docs/references/python_api_reference.md @@ -1740,7 +1740,7 @@ for session in sessions: Agent.delete_sessions(ids: list[str] = None) ``` -Deletes sessions of a agent by ID. +Deletes sessions of an agent by ID. #### Parameters diff --git a/plugin/README.md b/plugin/README.md index 1de01e2c4..cd11e91db 100644 --- a/plugin/README.md +++ b/plugin/README.md @@ -92,6 +92,6 @@ def get_metadata(cls) -> LLMToolMetadata: The `get_metadata` method is a `classmethod`. It will provide the description of this tool to LLM. -The fields starts with `display` can use a special notation: `$t:xxx`, which will use the i18n mechanism in the RAGFlow frontend, getting text from the `llmTools` category. The frontend will display what you put here if you don't use this notation. +The fields start with `display` can use a special notation: `$t:xxx`, which will use the i18n mechanism in the RAGFlow frontend, getting text from the `llmTools` category. The frontend will display what you put here if you don't use this notation. Now our tool is ready. You can select it in the `Generate` component and try it out. diff --git a/plugin/embedded_plugins/llm_tools/bad_calculator.py b/plugin/embedded_plugins/llm_tools/bad_calculator.py index 537875f0b..04c3b815a 100644 --- a/plugin/embedded_plugins/llm_tools/bad_calculator.py +++ b/plugin/embedded_plugins/llm_tools/bad_calculator.py @@ -5,7 +5,7 @@ from plugin.llm_tool_plugin import LLMToolMetadata, LLMToolPlugin class BadCalculatorPlugin(LLMToolPlugin): """ A sample LLM tool plugin, will add two numbers with 100. - It only present for demo purpose. Do not use it in production. + It only presents for demo purpose. Do not use it in production. """ _version_ = "1.0.0" diff --git a/rag/app/book.py b/rag/app/book.py index 4b59fdc86..fe079783c 100644 --- a/rag/app/book.py +++ b/rag/app/book.py @@ -70,7 +70,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, """ Supported file formats are docx, pdf, txt. Since a book is long and not all the parts are useful, if it's a PDF, - please setup the page ranges for every book in order eliminate negative effects and save elapsed computing time. + please set up the page ranges for every book in order eliminate negative effects and save elapsed computing time. """ parser_config = kwargs.get( "parser_config", { diff --git a/rag/app/qa.py b/rag/app/qa.py index 72a7eefae..ecf60ec4f 100644 --- a/rag/app/qa.py +++ b/rag/app/qa.py @@ -313,7 +313,7 @@ def mdQuestionLevel(s): def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, **kwargs): """ Excel and csv(txt) format files are supported. - If the file is in excel format, there should be 2 column question and answer without header. + If the file is in Excel format, there should be 2 column question and answer without header. And question column is ahead of answer column. And it's O.K if it has multiple sheets as long as the columns are rightly composed. diff --git a/rag/app/tag.py b/rag/app/tag.py index 5bd40f66f..fda91f1a3 100644 --- a/rag/app/tag.py +++ b/rag/app/tag.py @@ -37,7 +37,7 @@ def beAdoc(d, q, a, eng, row_num=-1): def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs): """ Excel and csv(txt) format files are supported. - If the file is in excel format, there should be 2 column content and tags without header. + If the file is in Excel format, there should be 2 column content and tags without header. And content column is ahead of tags column. And it's O.K if it has multiple sheets as long as the columns are rightly composed. diff --git a/web/src/components/json-edit/interface.ts b/web/src/components/json-edit/interface.ts index 9807411c9..897a11031 100644 --- a/web/src/components/json-edit/interface.ts +++ b/web/src/components/json-edit/interface.ts @@ -244,7 +244,7 @@ export interface JsonEditorOptions { timestampFormat?: string; /** - * If true, unicode characters are escaped. false by default. + * If true, Unicode characters are escaped. false by default. */ escapeUnicode?: boolean; diff --git a/web/src/services/admin.service.d.ts b/web/src/services/admin.service.d.ts index 596528ad7..ce5fb80ac 100644 --- a/web/src/services/admin.service.d.ts +++ b/web/src/services/admin.service.d.ts @@ -66,7 +66,7 @@ declare module AdminService { title: string; }; - export type TaskExectorHeartbeatItem = { + export type TaskExecutorHeartbeatItem = { name: string; boot_at: string; now: string; @@ -79,7 +79,7 @@ declare module AdminService { pid: number; }; - export type TaskExecutorInfo = Record; + export type TaskExecutorInfo = Record; export type ListServicesItem = { extra: Record; @@ -134,7 +134,7 @@ declare module AdminService { export type RoleDetail = { id: string; name: string; - descrtiption: string; + description: string; create_date: string; update_date: string; }; @@ -162,7 +162,7 @@ declare module AdminService { id: number; email: string; create_date: string; - createt_time: number; + create_time: number; update_date: string; update_time: number; };