diff --git a/TOOLS.md b/TOOLS.md index f2c2b60..8927807 100644 --- a/TOOLS.md +++ b/TOOLS.md @@ -337,3 +337,23 @@ validate_excel_range( - `start_cell`: Starting cell of range - `end_cell`: Optional ending cell of range - Returns: Validation result message + +### get_data_validation_info + +Get data validation rules and metadata for a worksheet. + +```python +get_data_validation_info(filepath: str, sheet_name: str) -> str +``` + +- `filepath`: Path to Excel file +- `sheet_name`: Target worksheet name +- Returns: JSON string containing all data validation rules with metadata including: + - Validation type (list, whole, decimal, date, time, textLength) + - Operator (between, notBetween, equal, greaterThan, lessThan, etc.) + - Allowed values for list validations (resolved from ranges) + - Formula constraints for numeric/date validations + - Cell ranges where validation applies + - Prompt and error messages + +**Note**: The `read_data_from_excel` tool automatically includes validation metadata for individual cells when available. diff --git a/src/excel_mcp/cell_validation.py b/src/excel_mcp/cell_validation.py new file mode 100644 index 0000000..ef14f14 --- /dev/null +++ b/src/excel_mcp/cell_validation.py @@ -0,0 +1,179 @@ +import logging +from typing import Any, Dict, List, Optional + +from openpyxl.worksheet.worksheet import Worksheet +from openpyxl.utils.cell import coordinate_from_string, column_index_from_string + +logger = logging.getLogger(__name__) + +def get_data_validation_for_cell(worksheet: Worksheet, cell_address: str) -> Optional[Dict[str, Any]]: + """Get data validation metadata for a specific cell. + + Args: + worksheet: The openpyxl worksheet object + cell_address: Cell address like 'A1', 'B2', etc. + + Returns: + Dictionary with validation metadata or None if no validation exists + """ + try: + # Convert cell address to row/col coordinates + col_letter, row = coordinate_from_string(cell_address) + col_idx = column_index_from_string(col_letter) + + # Check each data validation rule in the worksheet + for dv in worksheet.data_validations.dataValidation: + # Check if this cell is covered by the validation rule + if _cell_in_validation_range(row, col_idx, dv): + return _extract_validation_metadata(dv, cell_address, worksheet) + + return None + + except Exception as e: + logger.warning(f"Failed to get validation for cell {cell_address}: {e}") + return None + +def _cell_in_validation_range(row: int, col: int, data_validation) -> bool: + """Check if a cell is within a data validation range.""" + try: + # data_validation.sqref contains the cell ranges this validation applies to + for cell_range in data_validation.sqref.ranges: + if (cell_range.min_row <= row <= cell_range.max_row and + cell_range.min_col <= col <= cell_range.max_col): + return True + return False + except Exception as e: + logger.warning(f"Error checking if cell ({row}, {col}) is in validation range for DV sqref '{getattr(data_validation, 'sqref', 'N/A')}': {e}") + return False + +def _extract_validation_metadata(data_validation, cell_address: str, worksheet: Optional[Worksheet] = None) -> Dict[str, Any]: + """Extract metadata from a DataValidation object.""" + try: + validation_info = { + "cell": cell_address, + "has_validation": True, + "validation_type": data_validation.type, + "allow_blank": data_validation.allowBlank, + } + + # Add operator for validation types that use it + if data_validation.operator: + validation_info["operator"] = data_validation.operator + + # Add optional fields if they exist + if data_validation.prompt: + validation_info["prompt"] = data_validation.prompt + if data_validation.promptTitle: + validation_info["prompt_title"] = data_validation.promptTitle + if data_validation.error: + validation_info["error_message"] = data_validation.error + if data_validation.errorTitle: + validation_info["error_title"] = data_validation.errorTitle + + # For list type validations (dropdown lists), extract allowed values + if data_validation.type == "list" and data_validation.formula1: + allowed_values = _extract_list_values(data_validation.formula1, worksheet) + validation_info["allowed_values"] = allowed_values + + # For other validation types, include the formulas + elif data_validation.formula1: + validation_info["formula1"] = data_validation.formula1 + if data_validation.formula2: + validation_info["formula2"] = data_validation.formula2 + + return validation_info + + except Exception as e: + logger.warning(f"Failed to extract validation metadata: {e}") + return { + "cell": cell_address, + "has_validation": True, + "validation_type": "unknown", + "error": f"Failed to parse validation: {e}" + } + +def _extract_list_values(formula: str, worksheet: Optional[Worksheet] = None) -> List[str]: + """Extract allowed values from a list validation formula.""" + try: + # Remove quotes if present + formula = formula.strip('"') + + # Handle comma-separated list + if ',' in formula: + # Split by comma and clean up each value + values = [val.strip().strip('"') for val in formula.split(',')] + return [val for val in values if val] # Remove empty values + + # Handle range reference (e.g., "$A$1:$A$5" or "Sheet1!$A$1:$A$5") + elif (':' in formula or formula.startswith('$')) and worksheet: + try: + # Remove potential leading '=' if it's a formula like '=Sheet1!$A$1:$A$5' + range_ref = formula + if formula.startswith('='): + range_ref = formula[1:] + + actual_values = [] + # worksheet[range_ref] can resolve ranges like "A1:A5" or "SheetName!A1:A5" + # It returns a tuple of tuples of cells for ranges, or a single cell + range_cells = worksheet[range_ref] + + # Handle single cell or range + if hasattr(range_cells, 'value'): # Single cell + if range_cells.value is not None: + actual_values.append(str(range_cells.value)) + else: # Range of cells + for row_of_cells in range_cells: + # Handle case where row_of_cells might be a single cell + if hasattr(row_of_cells, 'value'): + if row_of_cells.value is not None: + actual_values.append(str(row_of_cells.value)) + else: + for cell in row_of_cells: + if cell.value is not None: + actual_values.append(str(cell.value)) + + if actual_values: + return actual_values + return [f"Range: {formula} (empty or unresolvable)"] + + except Exception as e: + logger.warning(f"Could not resolve range '{formula}' for list validation: {e}") + return [f"Range: {formula} (resolution error)"] + + # Handle range reference when worksheet not available + elif ':' in formula or formula.startswith('$'): + return [f"Range: {formula}"] + + # Single value + else: + return [formula.strip('"')] + + except Exception as e: + logger.warning(f"Failed to parse list formula '{formula}': {e}") + return [formula] # Return original formula if parsing fails + +def get_all_validation_ranges(worksheet: Worksheet) -> List[Dict[str, Any]]: + """Get all data validation ranges in a worksheet. + + Returns: + List of dictionaries containing validation range information + """ + validations = [] + + try: + for dv in worksheet.data_validations.dataValidation: + validation_info = { + "ranges": str(dv.sqref), + "validation_type": dv.type, + "allow_blank": dv.allowBlank, + } + + if dv.type == "list" and dv.formula1: + validation_info["allowed_values"] = _extract_list_values(dv.formula1, worksheet) + + validations.append(validation_info) + + except Exception as e: + logger.warning(f"Failed to get validation ranges: {e}") + + return validations \ No newline at end of file diff --git a/src/excel_mcp/data.py b/src/excel_mcp/data.py index dfe89b5..761ef75 100644 --- a/src/excel_mcp/data.py +++ b/src/excel_mcp/data.py @@ -1,14 +1,14 @@ from pathlib import Path -from typing import Any +from typing import Any, Dict import logging from openpyxl import load_workbook -from openpyxl.styles import Font from openpyxl.worksheet.worksheet import Worksheet from openpyxl.utils import get_column_letter from .exceptions import DataError from .cell_utils import parse_cell_range +from .cell_validation import get_data_validation_for_cell logger = logging.getLogger(__name__) @@ -244,3 +244,108 @@ def _write_data_to_worksheet( except Exception as e: logger.error(f"Failed to write worksheet data: {e}") raise DataError(str(e)) + +def read_excel_range_with_metadata( + filepath: Path | str, + sheet_name: str, + start_cell: str = "A1", + end_cell: str | None = None, + include_validation: bool = True +) -> Dict[str, Any]: + """Read data from Excel range with cell metadata including validation rules. + + Args: + filepath: Path to Excel file + sheet_name: Name of worksheet + start_cell: Starting cell address + end_cell: Ending cell address (optional) + include_validation: Whether to include validation metadata + + Returns: + Dictionary containing structured cell data with metadata + """ + try: + wb = load_workbook(filepath, read_only=False) + + if sheet_name not in wb.sheetnames: + raise DataError(f"Sheet '{sheet_name}' not found") + + ws = wb[sheet_name] + + # Parse start cell + if ':' in start_cell: + start_cell, end_cell = start_cell.split(':') + + # Get start coordinates + try: + start_coords = parse_cell_range(f"{start_cell}:{start_cell}") + if not start_coords or not all(coord is not None for coord in start_coords[:2]): + raise DataError(f"Invalid start cell reference: {start_cell}") + start_row, start_col = start_coords[0], start_coords[1] + except ValueError as e: + raise DataError(f"Invalid start cell format: {str(e)}") + + # Determine end coordinates + if end_cell: + try: + end_coords = parse_cell_range(f"{end_cell}:{end_cell}") + if not end_coords or not all(coord is not None for coord in end_coords[:2]): + raise DataError(f"Invalid end cell reference: {end_cell}") + end_row, end_col = end_coords[0], end_coords[1] + except ValueError as e: + raise DataError(f"Invalid end cell format: {str(e)}") + else: + # Dynamically expand range until all values are empty + end_row, end_col = start_row, start_col + while end_row <= ws.max_row and any(ws.cell(row=end_row, column=c).value is not None for c in range(start_col, ws.max_column + 1)): + end_row += 1 + while end_col <= ws.max_column and any(ws.cell(row=r, column=end_col).value is not None for r in range(start_row, ws.max_row + 1)): + end_col += 1 + end_row -= 1 # Adjust back to last non-empty row + end_col -= 1 # Adjust back to last non-empty column + + # Validate range bounds + if start_row > ws.max_row or start_col > ws.max_column: + raise DataError( + f"Start cell out of bounds. Sheet dimensions are " + f"A1:{get_column_letter(ws.max_column)}{ws.max_row}" + ) + + # Build structured cell data + range_data = { + "range": f"{start_cell}:{get_column_letter(end_col)}{end_row}" if end_cell else start_cell, + "sheet_name": sheet_name, + "cells": [] + } + + for row in range(start_row, end_row + 1): + for col in range(start_col, end_col + 1): + cell = ws.cell(row=row, column=col) + cell_address = f"{get_column_letter(col)}{row}" + + cell_data = { + "address": cell_address, + "value": cell.value, + "row": row, + "column": col + } + + # Add validation metadata if requested + if include_validation: + validation_info = get_data_validation_for_cell(ws, cell_address) + if validation_info: + cell_data["validation"] = validation_info + else: + cell_data["validation"] = {"has_validation": False} + + range_data["cells"].append(cell_data) + + wb.close() + return range_data + + except DataError as e: + logger.error(str(e)) + raise + except Exception as e: + logger.error(f"Failed to read Excel range with metadata: {e}") + raise DataError(str(e)) diff --git a/src/excel_mcp/server.py b/src/excel_mcp/server.py index 86a16ec..7af345e 100644 --- a/src/excel_mcp/server.py +++ b/src/excel_mcp/server.py @@ -165,7 +165,7 @@ def format_range( full_path = get_excel_path(filepath) from excel_mcp.formatting import format_range as format_range_func - result = format_range_func( + format_range_func( filepath=full_path, sheet_name=sheet_name, start_cell=start_cell, @@ -201,20 +201,35 @@ def read_data_from_excel( preview_only: bool = False ) -> str: """ - Read data from Excel worksheet. + Read data from Excel worksheet with cell metadata including validation rules. + + Args: + filepath: Path to Excel file + sheet_name: Name of worksheet + start_cell: Starting cell (default A1) + end_cell: Ending cell (optional, auto-expands if not provided) + preview_only: Whether to return preview only Returns: - Data from Excel worksheet as json string. list of lists or empty list if no data found. sublists are assumed to be rows. + JSON string containing structured cell data with validation metadata. + Each cell includes: address, value, row, column, and validation info (if any). """ try: full_path = get_excel_path(filepath) - from excel_mcp.data import read_excel_range - result = read_excel_range(full_path, sheet_name, start_cell, end_cell, preview_only) - if not result: + from excel_mcp.data import read_excel_range_with_metadata + result = read_excel_range_with_metadata( + full_path, + sheet_name, + start_cell, + end_cell + ) + if not result or not result.get("cells"): return "No data found in specified range" - # Convert the list of dicts to a formatted string - data_str = "\n".join([str(row) for row in result]) - return data_str + + # Return as formatted JSON string + import json + return json.dumps(result, indent=2, default=str) + except Exception as e: logger.error(f"Error reading data: {e}") raise @@ -253,7 +268,7 @@ def create_workbook(filepath: str) -> str: try: full_path = get_excel_path(filepath) from excel_mcp.workbook import create_workbook as create_workbook_impl - result = create_workbook_impl(full_path) + create_workbook_impl(full_path) return f"Created workbook at {full_path}" except WorkbookError as e: return f"Error: {str(e)}" @@ -500,6 +515,50 @@ def validate_excel_range( logger.error(f"Error validating range: {e}") raise +@mcp.tool() +def get_data_validation_info( + filepath: str, + sheet_name: str +) -> str: + """ + Get all data validation rules in a worksheet. + + This tool helps identify which cell ranges have validation rules + and what types of validation are applied. + + Args: + filepath: Path to Excel file + sheet_name: Name of worksheet + + Returns: + JSON string containing all validation rules in the worksheet + """ + try: + full_path = get_excel_path(filepath) + from openpyxl import load_workbook + from excel_mcp.cell_validation import get_all_validation_ranges + + wb = load_workbook(full_path, read_only=False) + if sheet_name not in wb.sheetnames: + return f"Error: Sheet '{sheet_name}' not found" + + ws = wb[sheet_name] + validations = get_all_validation_ranges(ws) + wb.close() + + if not validations: + return "No data validation rules found in this worksheet" + + import json + return json.dumps({ + "sheet_name": sheet_name, + "validation_rules": validations + }, indent=2, default=str) + + except Exception as e: + logger.error(f"Error getting validation info: {e}") + raise + async def run_sse(): """Run Excel MCP server in SSE mode.""" # Assign value to EXCEL_FILES_PATH in SSE mode diff --git a/uv.lock b/uv.lock index 0ea26e9..e9e178a 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,4 @@ version = 1 -revision = 1 requires-python = ">=3.10" [[package]] @@ -67,17 +66,19 @@ wheels = [ [[package]] name = "excel-mcp-server" -version = "0.1.1" +version = "0.1.3" source = { editable = "." } dependencies = [ { name = "mcp", extra = ["cli"] }, { name = "openpyxl" }, + { name = "typer" }, ] [package.metadata] requires-dist = [ - { name = "mcp", extras = ["cli"], specifier = ">=1.2.0" }, + { name = "mcp", extras = ["cli"], specifier = ">=1.6.0" }, { name = "openpyxl", specifier = ">=3.1.2" }, + { name = "typer", specifier = ">=0.15.1" }, ] [[package]] @@ -158,7 +159,7 @@ wheels = [ [[package]] name = "mcp" -version = "1.2.1" +version = "1.9.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -166,13 +167,14 @@ dependencies = [ { name = "httpx-sse" }, { name = "pydantic" }, { name = "pydantic-settings" }, + { name = "python-multipart" }, { name = "sse-starlette" }, { name = "starlette" }, - { name = "uvicorn" }, + { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fc/30/51e4555826126e3954fa2ab1e934bf74163c5fe05e98f38ca4d0f8abbf63/mcp-1.2.1.tar.gz", hash = "sha256:c9d43dbfe943aa1530e2be8f54b73af3ebfb071243827b4483d421684806cb45", size = 103968 } +sdist = { url = "https://files.pythonhosted.org/packages/e7/bc/54aec2c334698cc575ca3b3481eed627125fb66544152fa1af927b1a495c/mcp-1.9.1.tar.gz", hash = "sha256:19879cd6dde3d763297617242888c2f695a95dfa854386a6a68676a646ce75e4", size = 316247 } wheels = [ - { url = "https://files.pythonhosted.org/packages/4c/0d/6770742a84c8aa1d36c0d628896a380584c5759612e66af7446af07d8775/mcp-1.2.1-py3-none-any.whl", hash = "sha256:579bf9c9157850ebb1344f3ca6f7a3021b0123c44c9f089ef577a7062522f0fd", size = 66453 }, + { url = "https://files.pythonhosted.org/packages/a6/c0/4ac795585a22a0a2d09cd2b1187b0252d2afcdebd01e10a68bbac4d34890/mcp-1.9.1-py3-none-any.whl", hash = "sha256:2900ded8ffafc3c8a7bfcfe8bc5204037e988e753ec398f371663e6a06ecd9a9", size = 130261 }, ] [package.optional-dependencies] @@ -322,6 +324,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863 }, ] +[[package]] +name = "python-multipart" +version = "0.0.20" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/87/f44d7c9f274c7ee665a29b885ec97089ec5dc034c7f3fafa03da9e39a09e/python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13", size = 37158 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546 }, +] + [[package]] name = "rich" version = "13.9.4"