mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: jinhai <haijin.chn@gmail.com> Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Lynn <lynn_inf@hotmail.com> Co-authored-by: chanx <1243304602@qq.com> Co-authored-by: balibabu <cike8899@users.noreply.github.com> Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com> Co-authored-by: huangzl <huangzl@shinemo.com> Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> Co-authored-by: Wilmer <33392318@qq.com> Co-authored-by: Adrian Weidig <adrianweidig@gmx.net> Co-authored-by: Zhichang Yu <yuzhichang@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Yongteng Lei <yongtengrey@outlook.com> Co-authored-by: Liu An <asiro@qq.com> Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com> Co-authored-by: BadwomanCraZY <511528396@qq.com> Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com> Co-authored-by: Russell Valentine <russ@coldstonelabs.org> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Billy Bao <newyorkupperbay@gmail.com> Co-authored-by: Zhedong Cen <cenzhedong2@126.com> Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com> Co-authored-by: TensorNull <tensor.null@gmail.com> Co-authored-by: Ajay <160579663+aybanda@users.noreply.github.com> Co-authored-by: AB <aj@Ajays-MacBook-Air.local> Co-authored-by: 天海蒼灆 <huangaoqin@tecpie.com> Co-authored-by: He Wang <wanghechn@qq.com> Co-authored-by: Atsushi Hatakeyama <atu729@icloud.com> Co-authored-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Mohamed Mathari <155896313+melmathari@users.noreply.github.com> Co-authored-by: Mohamed Mathari <nocodeventure@Mac-mini-van-Mohamed.fritz.box> Co-authored-by: Stephen Hu <stephenhu@seismic.com> Co-authored-by: Shaun Zhang <zhangwfjh@users.noreply.github.com> Co-authored-by: zhimeng123 <60221886+zhimeng123@users.noreply.github.com> Co-authored-by: mxc <mxc@example.com> Co-authored-by: Dominik Novotný <50611433+SgtMarmite@users.noreply.github.com> Co-authored-by: EVGENY M <168018528+rjohny55@users.noreply.github.com> Co-authored-by: mcoder6425 <mcoder64@gmail.com> Co-authored-by: TeslaZY <TeslaZY@outlook.com> Co-authored-by: lemsn <lemsn@msn.com> Co-authored-by: lemsn <lemsn@126.com> Co-authored-by: Adrian Gora <47756404+adagora@users.noreply.github.com> Co-authored-by: Womsxd <45663319+Womsxd@users.noreply.github.com> Co-authored-by: FatMii <39074672+FatMii@users.noreply.github.com>
80 lines
2.8 KiB
Python
80 lines
2.8 KiB
Python
"""
|
|
Configuration management for Firecrawl integration with RAGFlow.
|
|
"""
|
|
|
|
import os
|
|
from typing import Dict, Any
|
|
from dataclasses import dataclass
|
|
import json
|
|
|
|
|
|
@dataclass
|
|
class FirecrawlConfig:
|
|
"""Configuration class for Firecrawl integration."""
|
|
|
|
api_key: str
|
|
api_url: str = "https://api.firecrawl.dev"
|
|
max_retries: int = 3
|
|
timeout: int = 30
|
|
rate_limit_delay: float = 1.0
|
|
max_concurrent_requests: int = 5
|
|
|
|
def __post_init__(self):
|
|
"""Validate configuration after initialization."""
|
|
if not self.api_key:
|
|
raise ValueError("Firecrawl API key is required")
|
|
|
|
if not self.api_key.startswith("fc-"):
|
|
raise ValueError("Invalid Firecrawl API key format. Must start with 'fc-'")
|
|
|
|
if self.max_retries < 1 or self.max_retries > 10:
|
|
raise ValueError("Max retries must be between 1 and 10")
|
|
|
|
if self.timeout < 5 or self.timeout > 300:
|
|
raise ValueError("Timeout must be between 5 and 300 seconds")
|
|
|
|
if self.rate_limit_delay < 0.1 or self.rate_limit_delay > 10.0:
|
|
raise ValueError("Rate limit delay must be between 0.1 and 10.0 seconds")
|
|
|
|
@classmethod
|
|
def from_env(cls) -> "FirecrawlConfig":
|
|
"""Create configuration from environment variables."""
|
|
api_key = os.getenv("FIRECRAWL_API_KEY")
|
|
if not api_key:
|
|
raise ValueError("FIRECRAWL_API_KEY environment variable not set")
|
|
|
|
return cls(
|
|
api_key=api_key,
|
|
api_url=os.getenv("FIRECRAWL_API_URL", "https://api.firecrawl.dev"),
|
|
max_retries=int(os.getenv("FIRECRAWL_MAX_RETRIES", "3")),
|
|
timeout=int(os.getenv("FIRECRAWL_TIMEOUT", "30")),
|
|
rate_limit_delay=float(os.getenv("FIRECRAWL_RATE_LIMIT_DELAY", "1.0")),
|
|
max_concurrent_requests=int(os.getenv("FIRECRAWL_MAX_CONCURRENT", "5"))
|
|
)
|
|
|
|
@classmethod
|
|
def from_dict(cls, config_dict: Dict[str, Any]) -> "FirecrawlConfig":
|
|
"""Create configuration from dictionary."""
|
|
return cls(**config_dict)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert configuration to dictionary."""
|
|
return {
|
|
"api_key": self.api_key,
|
|
"api_url": self.api_url,
|
|
"max_retries": self.max_retries,
|
|
"timeout": self.timeout,
|
|
"rate_limit_delay": self.rate_limit_delay,
|
|
"max_concurrent_requests": self.max_concurrent_requests
|
|
}
|
|
|
|
def to_json(self) -> str:
|
|
"""Convert configuration to JSON string."""
|
|
return json.dumps(self.to_dict(), indent=2)
|
|
|
|
@classmethod
|
|
def from_json(cls, json_str: str) -> "FirecrawlConfig":
|
|
"""Create configuration from JSON string."""
|
|
config_dict = json.loads(json_str)
|
|
return cls.from_dict(config_dict)
|