""" UI components for Firecrawl integration in RAGFlow. """ from typing import Dict, Any, List, Optional from dataclasses import dataclass @dataclass class FirecrawlUIComponent: """Represents a UI component for Firecrawl integration.""" component_type: str props: Dict[str, Any] children: Optional[List['FirecrawlUIComponent']] = None class FirecrawlUIBuilder: """Builder for Firecrawl UI components in RAGFlow.""" @staticmethod def create_data_source_config() -> Dict[str, Any]: """Create configuration for Firecrawl data source.""" return { "name": "firecrawl", "display_name": "Firecrawl Web Scraper", "description": "Import web content using Firecrawl's powerful scraping capabilities", "icon": "🌐", "category": "web", "version": "1.0.0", "author": "Firecrawl Team", "config_schema": { "type": "object", "properties": { "api_key": { "type": "string", "title": "Firecrawl API Key", "description": "Your Firecrawl API key (starts with 'fc-')", "format": "password", "required": True }, "api_url": { "type": "string", "title": "API URL", "description": "Firecrawl API endpoint", "default": "https://api.firecrawl.dev", "required": False }, "max_retries": { "type": "integer", "title": "Max Retries", "description": "Maximum number of retry attempts", "default": 3, "minimum": 1, "maximum": 10 }, "timeout": { "type": "integer", "title": "Timeout (seconds)", "description": "Request timeout in seconds", "default": 30, "minimum": 5, "maximum": 300 }, "rate_limit_delay": { "type": "number", "title": "Rate Limit Delay", "description": "Delay between requests in seconds", "default": 1.0, "minimum": 0.1, "maximum": 10.0 } }, "required": ["api_key"] } } @staticmethod def create_scraping_form() -> Dict[str, Any]: """Create form for scraping configuration.""" return { "type": "form", "title": "Firecrawl Web Scraping", "description": "Configure web scraping parameters", "fields": [ { "name": "urls", "type": "array", "title": "URLs to Scrape", "description": "Enter URLs to scrape (one per line)", "items": { "type": "string", "format": "uri" }, "required": True, "minItems": 1 }, { "name": "scrape_type", "type": "string", "title": "Scrape Type", "description": "Choose scraping method", "enum": ["single", "crawl", "batch"], "enumNames": ["Single URL", "Crawl Website", "Batch URLs"], "default": "single", "required": True }, { "name": "formats", "type": "array", "title": "Output Formats", "description": "Select output formats", "items": { "type": "string", "enum": ["markdown", "html", "links", "screenshot"] }, "default": ["markdown", "html"], "required": True }, { "name": "crawl_limit", "type": "integer", "title": "Crawl Limit", "description": "Maximum number of pages to crawl (for crawl type)", "default": 100, "minimum": 1, "maximum": 1000, "condition": { "field": "scrape_type", "equals": "crawl" } }, { "name": "extract_options", "type": "object", "title": "Extraction Options", "description": "Advanced extraction settings", "properties": { "extractMainContent": { "type": "boolean", "title": "Extract Main Content Only", "default": True }, "excludeTags": { "type": "array", "title": "Exclude Tags", "description": "HTML tags to exclude", "items": {"type": "string"}, "default": ["nav", "footer", "header", "aside"] }, "includeTags": { "type": "array", "title": "Include Tags", "description": "HTML tags to include", "items": {"type": "string"}, "default": ["main", "article", "section", "div", "p"] } } } ] } @staticmethod def create_progress_component() -> Dict[str, Any]: """Create progress tracking component.""" return { "type": "progress", "title": "Scraping Progress", "description": "Track the progress of your web scraping job", "properties": { "show_percentage": True, "show_eta": True, "show_details": True } } @staticmethod def create_results_view() -> Dict[str, Any]: """Create results display component.""" return { "type": "results", "title": "Scraping Results", "description": "View and manage scraped content", "properties": { "show_preview": True, "show_metadata": True, "allow_editing": True, "show_chunks": True } } @staticmethod def create_error_handler() -> Dict[str, Any]: """Create error handling component.""" return { "type": "error_handler", "title": "Error Handling", "description": "Handle scraping errors and retries", "properties": { "show_retry_button": True, "show_error_details": True, "auto_retry": False, "max_retries": 3 } } @staticmethod def create_validation_rules() -> Dict[str, Any]: """Create validation rules for Firecrawl integration.""" return { "url_validation": { "pattern": r"^https?://.+", "message": "URL must start with http:// or https://" }, "api_key_validation": { "pattern": r"^fc-[a-zA-Z0-9]+$", "message": "API key must start with 'fc-' followed by alphanumeric characters" }, "rate_limit_validation": { "min": 0.1, "max": 10.0, "message": "Rate limit delay must be between 0.1 and 10.0 seconds" } } @staticmethod def create_help_text() -> Dict[str, str]: """Create help text for users.""" return { "api_key_help": "Get your API key from https://firecrawl.dev. Sign up for a free account to get started.", "url_help": "Enter the URLs you want to scrape. You can add multiple URLs for batch processing.", "crawl_help": "Crawling will follow links from the starting URL and scrape all accessible pages within the limit.", "formats_help": "Choose the output formats you need. Markdown is recommended for RAG processing.", "extract_help": "Extraction options help filter content to get only the main content without navigation and ads." } @staticmethod def create_ui_schema() -> Dict[str, Any]: """Create complete UI schema for Firecrawl integration.""" return { "version": "1.0.0", "components": { "data_source_config": FirecrawlUIBuilder.create_data_source_config(), "scraping_form": FirecrawlUIBuilder.create_scraping_form(), "progress_component": FirecrawlUIBuilder.create_progress_component(), "results_view": FirecrawlUIBuilder.create_results_view(), "error_handler": FirecrawlUIBuilder.create_error_handler() }, "validation_rules": FirecrawlUIBuilder.create_validation_rules(), "help_text": FirecrawlUIBuilder.create_help_text(), "workflow": [ "configure_data_source", "setup_scraping_parameters", "start_scraping_job", "monitor_progress", "review_results", "import_to_ragflow" ] }