Feat: Bitbucket connector (#12332)

### What problem does this PR solve?

Feat: Bitbucket connector NOT READY TO MERGE

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Magicbook1108
2025-12-31 17:18:30 +08:00
committed by GitHub
parent 6a664fea3b
commit 7d4d687dde
26 changed files with 1294 additions and 555 deletions

View File

@ -0,0 +1,126 @@
import time
import logging
from collections.abc import Callable
from functools import wraps
from typing import Any
from typing import cast
from typing import TypeVar
import requests
F = TypeVar("F", bound=Callable[..., Any])
class RateLimitTriedTooManyTimesError(Exception):
pass
class _RateLimitDecorator:
"""Builds a generic wrapper/decorator for calls to external APIs that
prevents making more than `max_calls` requests per `period`
Implementation inspired by the `ratelimit` library:
https://github.com/tomasbasham/ratelimit.
NOTE: is not thread safe.
"""
def __init__(
self,
max_calls: int,
period: float, # in seconds
sleep_time: float = 2, # in seconds
sleep_backoff: float = 2, # applies exponential backoff
max_num_sleep: int = 0,
):
self.max_calls = max_calls
self.period = period
self.sleep_time = sleep_time
self.sleep_backoff = sleep_backoff
self.max_num_sleep = max_num_sleep
self.call_history: list[float] = []
self.curr_calls = 0
def __call__(self, func: F) -> F:
@wraps(func)
def wrapped_func(*args: list, **kwargs: dict[str, Any]) -> Any:
# cleanup calls which are no longer relevant
self._cleanup()
# check if we've exceeded the rate limit
sleep_cnt = 0
while len(self.call_history) == self.max_calls:
sleep_time = self.sleep_time * (self.sleep_backoff**sleep_cnt)
logging.warning(
f"Rate limit exceeded for function {func.__name__}. "
f"Waiting {sleep_time} seconds before retrying."
)
time.sleep(sleep_time)
sleep_cnt += 1
if self.max_num_sleep != 0 and sleep_cnt >= self.max_num_sleep:
raise RateLimitTriedTooManyTimesError(
f"Exceeded '{self.max_num_sleep}' retries for function '{func.__name__}'"
)
self._cleanup()
# add the current call to the call history
self.call_history.append(time.monotonic())
return func(*args, **kwargs)
return cast(F, wrapped_func)
def _cleanup(self) -> None:
curr_time = time.monotonic()
time_to_expire_before = curr_time - self.period
self.call_history = [
call_time
for call_time in self.call_history
if call_time > time_to_expire_before
]
rate_limit_builder = _RateLimitDecorator
"""If you want to allow the external service to tell you when you've hit the rate limit,
use the following instead"""
R = TypeVar("R", bound=Callable[..., requests.Response])
def wrap_request_to_handle_ratelimiting(
request_fn: R, default_wait_time_sec: int = 30, max_waits: int = 30
) -> R:
def wrapped_request(*args: list, **kwargs: dict[str, Any]) -> requests.Response:
for _ in range(max_waits):
response = request_fn(*args, **kwargs)
if response.status_code == 429:
try:
wait_time = int(
response.headers.get("Retry-After", default_wait_time_sec)
)
except ValueError:
wait_time = default_wait_time_sec
time.sleep(wait_time)
continue
return response
raise RateLimitTriedTooManyTimesError(f"Exceeded '{max_waits}' retries")
return cast(R, wrapped_request)
_rate_limited_get = wrap_request_to_handle_ratelimiting(requests.get)
_rate_limited_post = wrap_request_to_handle_ratelimiting(requests.post)
class _RateLimitedRequest:
get = _rate_limited_get
post = _rate_limited_post
rl_requests = _RateLimitedRequest

View File

@ -0,0 +1,88 @@
from collections.abc import Callable
import logging
from logging import Logger
from typing import Any
from typing import cast
from typing import TypeVar
import requests
from retry import retry
from common.data_source.config import REQUEST_TIMEOUT_SECONDS
F = TypeVar("F", bound=Callable[..., Any])
logger = logging.getLogger(__name__)
def retry_builder(
tries: int = 20,
delay: float = 0.1,
max_delay: float | None = 60,
backoff: float = 2,
jitter: tuple[float, float] | float = 1,
exceptions: type[Exception] | tuple[type[Exception], ...] = (Exception,),
) -> Callable[[F], F]:
"""Builds a generic wrapper/decorator for calls to external APIs that
may fail due to rate limiting, flakes, or other reasons. Applies exponential
backoff with jitter to retry the call."""
def retry_with_default(func: F) -> F:
@retry(
tries=tries,
delay=delay,
max_delay=max_delay,
backoff=backoff,
jitter=jitter,
logger=cast(Logger, logger),
exceptions=exceptions,
)
def wrapped_func(*args: list, **kwargs: dict[str, Any]) -> Any:
return func(*args, **kwargs)
return cast(F, wrapped_func)
return retry_with_default
def request_with_retries(
method: str,
url: str,
*,
data: dict[str, Any] | None = None,
headers: dict[str, Any] | None = None,
params: dict[str, Any] | None = None,
timeout: int = REQUEST_TIMEOUT_SECONDS,
stream: bool = False,
tries: int = 8,
delay: float = 1,
backoff: float = 2,
) -> requests.Response:
@retry(tries=tries, delay=delay, backoff=backoff, logger=cast(Logger, logger))
def _make_request() -> requests.Response:
response = requests.request(
method=method,
url=url,
data=data,
headers=headers,
params=params,
timeout=timeout,
stream=stream,
)
try:
response.raise_for_status()
except requests.exceptions.HTTPError:
logging.exception(
"Request failed:\n%s",
{
"method": method,
"url": url,
"data": data,
"headers": headers,
"params": params,
"timeout": timeout,
"stream": stream,
},
)
raise
return response
return _make_request()