Feat: add Jira connector (#11285)

### What problem does this PR solve?

Add Jira connector.

<img width="978" height="925" alt="image"
src="https://github.com/user-attachments/assets/78bb5c77-2710-4569-a76e-9087ca23b227"
/>

---

<img width="1903" height="489" alt="image"
src="https://github.com/user-attachments/assets/193bc5c5-f751-4bd5-883a-2173282c2b96"
/>

---

<img width="1035" height="925" alt="image"
src="https://github.com/user-attachments/assets/1a0aec19-30eb-4ada-9283-61d1c915f59d"
/>

---

<img width="1905" height="601" alt="image"
src="https://github.com/user-attachments/assets/3dde1062-3f27-4717-8e09-fd5fd5e64171"
/>

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Yongteng Lei
2025-11-17 09:38:04 +08:00
committed by GitHub
parent 61cf430dbb
commit 13e212c856
15 changed files with 1521 additions and 179 deletions

View File

@ -48,17 +48,35 @@ from common.data_source.exceptions import RateLimitTriedTooManyTimesError
from common.data_source.interfaces import CT, CheckpointedConnector, CheckpointOutputWrapper, ConfluenceUser, LoadFunction, OnyxExtensionType, SecondsSinceUnixEpoch, TokenResponse
from common.data_source.models import BasicExpertInfo, Document
_TZ_SUFFIX_PATTERN = re.compile(r"([+-])([\d:]+)$")
def datetime_from_string(datetime_string: str) -> datetime:
datetime_string = datetime_string.strip()
match_jira_format = _TZ_SUFFIX_PATTERN.search(datetime_string)
if match_jira_format:
sign, tz_field = match_jira_format.groups()
digits = tz_field.replace(":", "")
if digits.isdigit() and 1 <= len(digits) <= 4:
if len(digits) >= 3:
hours = digits[:-2].rjust(2, "0")
minutes = digits[-2:]
else:
hours = digits.rjust(2, "0")
minutes = "00"
normalized = f"{sign}{hours}:{minutes}"
datetime_string = f"{datetime_string[: match_jira_format.start()]}{normalized}"
# Handle the case where the datetime string ends with 'Z' (Zulu time)
if datetime_string.endswith('Z'):
datetime_string = datetime_string[:-1] + '+00:00'
if datetime_string.endswith("Z"):
datetime_string = datetime_string[:-1] + "+00:00"
# Handle timezone format "+0000" -> "+00:00"
if datetime_string.endswith('+0000'):
datetime_string = datetime_string[:-5] + '+00:00'
if datetime_string.endswith("+0000"):
datetime_string = datetime_string[:-5] + "+00:00"
datetime_object = datetime.fromisoformat(datetime_string)
@ -480,7 +498,7 @@ def get_file_ext(file_name: str) -> str:
def is_accepted_file_ext(file_ext: str, extension_type: OnyxExtensionType) -> bool:
image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"}
text_extensions = {".txt", ".md", ".mdx", ".conf", ".log", ".json", ".csv", ".tsv", ".xml", ".yml", ".yaml", ".sql"}
document_extensions = {".pdf", ".docx", ".pptx", ".xlsx", ".eml", ".epub", ".html"}
@ -902,6 +920,18 @@ def load_all_docs_from_checkpoint_connector(
)
_ATLASSIAN_CLOUD_DOMAINS = (".atlassian.net", ".jira.com", ".jira-dev.com")
def is_atlassian_cloud_url(url: str) -> bool:
try:
host = urlparse(url).hostname or ""
except ValueError:
return False
host = host.lower()
return any(host.endswith(domain) for domain in _ATLASSIAN_CLOUD_DOMAINS)
def get_cloudId(base_url: str) -> str:
tenant_info_url = urljoin(base_url, "/_edge/tenant_info")
response = requests.get(tenant_info_url, timeout=10)