Files
ragflow/intergrations/firecrawl/firecrawl_config.py
Kevin Hu 20b577a72c Fix: Merge main branch (#10377)
### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Signed-off-by: dependabot[bot] <support@github.com>
Signed-off-by: jinhai <haijin.chn@gmail.com>
Signed-off-by: Jin Hai <haijin.chn@gmail.com>
Co-authored-by: Lynn <lynn_inf@hotmail.com>
Co-authored-by: chanx <1243304602@qq.com>
Co-authored-by: balibabu <cike8899@users.noreply.github.com>
Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com>
Co-authored-by: huangzl <huangzl@shinemo.com>
Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com>
Co-authored-by: Wilmer <33392318@qq.com>
Co-authored-by: Adrian Weidig <adrianweidig@gmx.net>
Co-authored-by: Zhichang Yu <yuzhichang@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Yongteng Lei <yongtengrey@outlook.com>
Co-authored-by: Liu An <asiro@qq.com>
Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com>
Co-authored-by: BadwomanCraZY <511528396@qq.com>
Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com>
Co-authored-by: Russell Valentine <russ@coldstonelabs.org>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Billy Bao <newyorkupperbay@gmail.com>
Co-authored-by: Zhedong Cen <cenzhedong2@126.com>
Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com>
Co-authored-by: TensorNull <tensor.null@gmail.com>
Co-authored-by: Ajay <160579663+aybanda@users.noreply.github.com>
Co-authored-by: AB <aj@Ajays-MacBook-Air.local>
Co-authored-by: 天海蒼灆 <huangaoqin@tecpie.com>
Co-authored-by: He Wang <wanghechn@qq.com>
Co-authored-by: Atsushi Hatakeyama <atu729@icloud.com>
Co-authored-by: Jin Hai <haijin.chn@gmail.com>
Co-authored-by: Mohamed Mathari <155896313+melmathari@users.noreply.github.com>
Co-authored-by: Mohamed Mathari <nocodeventure@Mac-mini-van-Mohamed.fritz.box>
Co-authored-by: Stephen Hu <stephenhu@seismic.com>
Co-authored-by: Shaun Zhang <zhangwfjh@users.noreply.github.com>
Co-authored-by: zhimeng123 <60221886+zhimeng123@users.noreply.github.com>
Co-authored-by: mxc <mxc@example.com>
Co-authored-by: Dominik Novotný <50611433+SgtMarmite@users.noreply.github.com>
Co-authored-by: EVGENY M <168018528+rjohny55@users.noreply.github.com>
Co-authored-by: mcoder6425 <mcoder64@gmail.com>
Co-authored-by: TeslaZY <TeslaZY@outlook.com>
Co-authored-by: lemsn <lemsn@msn.com>
Co-authored-by: lemsn <lemsn@126.com>
Co-authored-by: Adrian Gora <47756404+adagora@users.noreply.github.com>
Co-authored-by: Womsxd <45663319+Womsxd@users.noreply.github.com>
Co-authored-by: FatMii <39074672+FatMii@users.noreply.github.com>
2025-09-30 13:13:15 +08:00

80 lines
2.8 KiB
Python

"""
Configuration management for Firecrawl integration with RAGFlow.
"""
import os
from typing import Dict, Any
from dataclasses import dataclass
import json
@dataclass
class FirecrawlConfig:
"""Configuration class for Firecrawl integration."""
api_key: str
api_url: str = "https://api.firecrawl.dev"
max_retries: int = 3
timeout: int = 30
rate_limit_delay: float = 1.0
max_concurrent_requests: int = 5
def __post_init__(self):
"""Validate configuration after initialization."""
if not self.api_key:
raise ValueError("Firecrawl API key is required")
if not self.api_key.startswith("fc-"):
raise ValueError("Invalid Firecrawl API key format. Must start with 'fc-'")
if self.max_retries < 1 or self.max_retries > 10:
raise ValueError("Max retries must be between 1 and 10")
if self.timeout < 5 or self.timeout > 300:
raise ValueError("Timeout must be between 5 and 300 seconds")
if self.rate_limit_delay < 0.1 or self.rate_limit_delay > 10.0:
raise ValueError("Rate limit delay must be between 0.1 and 10.0 seconds")
@classmethod
def from_env(cls) -> "FirecrawlConfig":
"""Create configuration from environment variables."""
api_key = os.getenv("FIRECRAWL_API_KEY")
if not api_key:
raise ValueError("FIRECRAWL_API_KEY environment variable not set")
return cls(
api_key=api_key,
api_url=os.getenv("FIRECRAWL_API_URL", "https://api.firecrawl.dev"),
max_retries=int(os.getenv("FIRECRAWL_MAX_RETRIES", "3")),
timeout=int(os.getenv("FIRECRAWL_TIMEOUT", "30")),
rate_limit_delay=float(os.getenv("FIRECRAWL_RATE_LIMIT_DELAY", "1.0")),
max_concurrent_requests=int(os.getenv("FIRECRAWL_MAX_CONCURRENT", "5"))
)
@classmethod
def from_dict(cls, config_dict: Dict[str, Any]) -> "FirecrawlConfig":
"""Create configuration from dictionary."""
return cls(**config_dict)
def to_dict(self) -> Dict[str, Any]:
"""Convert configuration to dictionary."""
return {
"api_key": self.api_key,
"api_url": self.api_url,
"max_retries": self.max_retries,
"timeout": self.timeout,
"rate_limit_delay": self.rate_limit_delay,
"max_concurrent_requests": self.max_concurrent_requests
}
def to_json(self) -> str:
"""Convert configuration to JSON string."""
return json.dumps(self.to_dict(), indent=2)
@classmethod
def from_json(cls, json_str: str) -> "FirecrawlConfig":
"""Create configuration from JSON string."""
config_dict = json.loads(json_str)
return cls.from_dict(config_dict)