Feat: Bitbucket connector (#12332)

### What problem does this PR solve? Feat: Bitbucket connector NOT READY TO MERGE ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-01-02 02:25:31 +08:00 · 2025-12-31 17:18:30 +08:00
parent 6a664fea3b
commit 7d4d687dde
26 changed files with 1294 additions and 555 deletions
--- a/common/data_source/cross_connector_utils/init.py
+++ b/common/data_source/cross_connector_utils/init.py
--- a/common/data_source/cross_connector_utils/rate_limit_wrapper.py
+++ b/common/data_source/cross_connector_utils/rate_limit_wrapper.py
@ -0,0 +1,126 @@
+import time
+import logging
+from collections.abc import Callable
+from functools import wraps
+from typing import Any
+from typing import cast
+from typing import TypeVar
+
+import requests
+
+F = TypeVar("F", bound=Callable[..., Any])
+
+
+class RateLimitTriedTooManyTimesError(Exception):
+    pass
+
+
+class _RateLimitDecorator:
+    """Builds a generic wrapper/decorator for calls to external APIs that
+    prevents making more than `max_calls` requests per `period`
+
+    Implementation inspired by the `ratelimit` library:
+    https://github.com/tomasbasham/ratelimit.
+
+    NOTE: is not thread safe.
+    """
+
+    def __init__(
+        self,
+        max_calls: int,
+        period: float,  # in seconds
+        sleep_time: float = 2,  # in seconds
+        sleep_backoff: float = 2,  # applies exponential backoff
+        max_num_sleep: int = 0,
+    ):
+        self.max_calls = max_calls
+        self.period = period
+        self.sleep_time = sleep_time
+        self.sleep_backoff = sleep_backoff
+        self.max_num_sleep = max_num_sleep
+
+        self.call_history: list[float] = []
+        self.curr_calls = 0
+
+    def __call__(self, func: F) -> F:
+        @wraps(func)
+        def wrapped_func(*args: list, **kwargs: dict[str, Any]) -> Any:
+            # cleanup calls which are no longer relevant
+            self._cleanup()
+
+            # check if we've exceeded the rate limit
+            sleep_cnt = 0
+            while len(self.call_history) == self.max_calls:
+                sleep_time = self.sleep_time * (self.sleep_backoff**sleep_cnt)
+                logging.warning(
+                    f"Rate limit exceeded for function {func.__name__}. "
+                    f"Waiting {sleep_time} seconds before retrying."
+                )
+                time.sleep(sleep_time)
+                sleep_cnt += 1
+                if self.max_num_sleep != 0 and sleep_cnt >= self.max_num_sleep:
+                    raise RateLimitTriedTooManyTimesError(
+                        f"Exceeded '{self.max_num_sleep}' retries for function '{func.__name__}'"
+                    )
+
+                self._cleanup()
+
+            # add the current call to the call history
+            self.call_history.append(time.monotonic())
+            return func(*args, **kwargs)
+
+        return cast(F, wrapped_func)
+
+    def _cleanup(self) -> None:
+        curr_time = time.monotonic()
+        time_to_expire_before = curr_time - self.period
+        self.call_history = [
+            call_time
+            for call_time in self.call_history
+            if call_time > time_to_expire_before
+        ]
+
+
+rate_limit_builder = _RateLimitDecorator
+
+
+"""If you want to allow the external service to tell you when you've hit the rate limit,
+use the following instead"""
+
+R = TypeVar("R", bound=Callable[..., requests.Response])
+
+
+def wrap_request_to_handle_ratelimiting(
+    request_fn: R, default_wait_time_sec: int = 30, max_waits: int = 30
+) -> R:
+    def wrapped_request(*args: list, **kwargs: dict[str, Any]) -> requests.Response:
+        for _ in range(max_waits):
+            response = request_fn(*args, **kwargs)
+            if response.status_code == 429:
+                try:
+                    wait_time = int(
+                        response.headers.get("Retry-After", default_wait_time_sec)
+                    )
+                except ValueError:
+                    wait_time = default_wait_time_sec
+
+                time.sleep(wait_time)
+                continue
+
+            return response
+
+        raise RateLimitTriedTooManyTimesError(f"Exceeded '{max_waits}' retries")
+
+    return cast(R, wrapped_request)
+
+
+_rate_limited_get = wrap_request_to_handle_ratelimiting(requests.get)
+_rate_limited_post = wrap_request_to_handle_ratelimiting(requests.post)
+
+
+class _RateLimitedRequest:
+    get = _rate_limited_get
+    post = _rate_limited_post
+
+
+rl_requests = _RateLimitedRequest
--- a/common/data_source/cross_connector_utils/retry_wrapper.py
+++ b/common/data_source/cross_connector_utils/retry_wrapper.py
@ -0,0 +1,88 @@
+from collections.abc import Callable
+import logging
+from logging import Logger
+from typing import Any
+from typing import cast
+from typing import TypeVar
+import requests
+from retry import retry
+
+from common.data_source.config import REQUEST_TIMEOUT_SECONDS
+
+
+F = TypeVar("F", bound=Callable[..., Any])
+logger = logging.getLogger(__name__)
+
+def retry_builder(
+    tries: int = 20,
+    delay: float = 0.1,
+    max_delay: float | None = 60,
+    backoff: float = 2,
+    jitter: tuple[float, float] | float = 1,
+    exceptions: type[Exception] | tuple[type[Exception], ...] = (Exception,),
+) -> Callable[[F], F]:
+    """Builds a generic wrapper/decorator for calls to external APIs that
+    may fail due to rate limiting, flakes, or other reasons. Applies exponential
+    backoff with jitter to retry the call."""
+
+    def retry_with_default(func: F) -> F:
+        @retry(
+            tries=tries,
+            delay=delay,
+            max_delay=max_delay,
+            backoff=backoff,
+            jitter=jitter,
+            logger=cast(Logger, logger),
+            exceptions=exceptions,
+        )
+        def wrapped_func(*args: list, **kwargs: dict[str, Any]) -> Any:
+            return func(*args, **kwargs)
+
+        return cast(F, wrapped_func)
+
+    return retry_with_default
+
+
+def request_with_retries(
+    method: str,
+    url: str,
+    *,
+    data: dict[str, Any] | None = None,
+    headers: dict[str, Any] | None = None,
+    params: dict[str, Any] | None = None,
+    timeout: int = REQUEST_TIMEOUT_SECONDS,
+    stream: bool = False,
+    tries: int = 8,
+    delay: float = 1,
+    backoff: float = 2,
+) -> requests.Response:
+    @retry(tries=tries, delay=delay, backoff=backoff, logger=cast(Logger, logger))
+    def _make_request() -> requests.Response:
+        response = requests.request(
+            method=method,
+            url=url,
+            data=data,
+            headers=headers,
+            params=params,
+            timeout=timeout,
+            stream=stream,
+        )
+        try:
+            response.raise_for_status()
+        except requests.exceptions.HTTPError:
+            logging.exception(
+                "Request failed:\n%s",
+                {
+                    "method": method,
+                    "url": url,
+                    "data": data,
+                    "headers": headers,
+                    "params": params,
+                    "timeout": timeout,
+                    "stream": stream,
+                },
+            )
+            raise
+        return response
+
+    return _make_request()