Feat: add Jira connector (#11285)

### What problem does this PR solve? Add Jira connector. <img width="978" height="925" alt="image" src="https://github.com/user-attachments/assets/78bb5c77-2710-4569-a76e-9087ca23b227" /> --- <img width="1903" height="489" alt="image" src="https://github.com/user-attachments/assets/193bc5c5-f751-4bd5-883a-2173282c2b96" /> --- <img width="1035" height="925" alt="image" src="https://github.com/user-attachments/assets/1a0aec19-30eb-4ada-9283-61d1c915f59d" /> --- <img width="1905" height="601" alt="image" src="https://github.com/user-attachments/assets/3dde1062-3f27-4717-8e09-fd5fd5e64171" /> ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-01-29 14:46:35 +08:00 · 2025-11-17 09:38:04 +08:00
parent 61cf430dbb
commit 13e212c856
15 changed files with 1521 additions and 179 deletions
--- a/common/data_source/utils.py
+++ b/common/data_source/utils.py
@ -48,17 +48,35 @@ from common.data_source.exceptions import RateLimitTriedTooManyTimesError
 from common.data_source.interfaces import CT, CheckpointedConnector, CheckpointOutputWrapper, ConfluenceUser, LoadFunction, OnyxExtensionType, SecondsSinceUnixEpoch, TokenResponse
 from common.data_source.models import BasicExpertInfo, Document

+_TZ_SUFFIX_PATTERN = re.compile(r"([+-])([\d:]+)$")
+

 def datetime_from_string(datetime_string: str) -> datetime:
    datetime_string = datetime_string.strip()

+    match_jira_format = _TZ_SUFFIX_PATTERN.search(datetime_string)
+    if match_jira_format:
+        sign, tz_field = match_jira_format.groups()
+        digits = tz_field.replace(":", "")
+
+        if digits.isdigit() and 1 <= len(digits) <= 4:
+            if len(digits) >= 3:
+                hours = digits[:-2].rjust(2, "0")
+                minutes = digits[-2:]
+            else:
+                hours = digits.rjust(2, "0")
+                minutes = "00"
+
+            normalized = f"{sign}{hours}:{minutes}"
+            datetime_string = f"{datetime_string[: match_jira_format.start()]}{normalized}"
+
    # Handle the case where the datetime string ends with 'Z' (Zulu time)
-    if datetime_string.endswith('Z'):
-        datetime_string = datetime_string[:-1] + '+00:00'
+    if datetime_string.endswith("Z"):
+        datetime_string = datetime_string[:-1] + "+00:00"

    # Handle timezone format "+0000" -> "+00:00"
-    if datetime_string.endswith('+0000'):
-        datetime_string = datetime_string[:-5] + '+00:00'
+    if datetime_string.endswith("+0000"):
+        datetime_string = datetime_string[:-5] + "+00:00"

    datetime_object = datetime.fromisoformat(datetime_string)

@ -480,7 +498,7 @@ def get_file_ext(file_name: str) -> str:


 def is_accepted_file_ext(file_ext: str, extension_type: OnyxExtensionType) -> bool:
-    image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
+    image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"}
    text_extensions = {".txt", ".md", ".mdx", ".conf", ".log", ".json", ".csv", ".tsv", ".xml", ".yml", ".yaml", ".sql"}
    document_extensions = {".pdf", ".docx", ".pptx", ".xlsx", ".eml", ".epub", ".html"}

@ -902,6 +920,18 @@ def load_all_docs_from_checkpoint_connector(
    )


+_ATLASSIAN_CLOUD_DOMAINS = (".atlassian.net", ".jira.com", ".jira-dev.com")
+
+
+def is_atlassian_cloud_url(url: str) -> bool:
+    try:
+        host = urlparse(url).hostname or ""
+    except ValueError:
+        return False
+    host = host.lower()
+    return any(host.endswith(domain) for domain in _ATLASSIAN_CLOUD_DOMAINS)
+
+
 def get_cloudId(base_url: str) -> str:
    tenant_info_url = urljoin(base_url, "/_edge/tenant_info")
    response = requests.get(tenant_info_url, timeout=10)