mirror of
https://github.com/haris-musa/excel-mcp-server.git
synced 2025-12-08 17:12:41 +08:00
feat(cell validation): add comprehensive data validation capabilities (#37)
- Add cell_validation.py module for Excel data validation metadata extraction - Implement get_data_validation_for_cell() and get_all_validation_ranges() - Include validation metadata in read_data_from_excel responses automatically - Add get_data_validation_info MCP tool for validation rule summaries - Resolve range references in list validations to actual cell values - Support all validation types: list, whole, decimal, date, time, textLength - Include operators (between, notBetween, equal, greaterThan, etc.) in metadata This allows LLMs to understand Excel validation constraints including dropdown options, numeric ranges, date constraints, and text length limits.
This commit is contained in:
20
TOOLS.md
20
TOOLS.md
@ -337,3 +337,23 @@ validate_excel_range(
|
||||
- `start_cell`: Starting cell of range
|
||||
- `end_cell`: Optional ending cell of range
|
||||
- Returns: Validation result message
|
||||
|
||||
### get_data_validation_info
|
||||
|
||||
Get data validation rules and metadata for a worksheet.
|
||||
|
||||
```python
|
||||
get_data_validation_info(filepath: str, sheet_name: str) -> str
|
||||
```
|
||||
|
||||
- `filepath`: Path to Excel file
|
||||
- `sheet_name`: Target worksheet name
|
||||
- Returns: JSON string containing all data validation rules with metadata including:
|
||||
- Validation type (list, whole, decimal, date, time, textLength)
|
||||
- Operator (between, notBetween, equal, greaterThan, lessThan, etc.)
|
||||
- Allowed values for list validations (resolved from ranges)
|
||||
- Formula constraints for numeric/date validations
|
||||
- Cell ranges where validation applies
|
||||
- Prompt and error messages
|
||||
|
||||
**Note**: The `read_data_from_excel` tool automatically includes validation metadata for individual cells when available.
|
||||
|
||||
179
src/excel_mcp/cell_validation.py
Normal file
179
src/excel_mcp/cell_validation.py
Normal file
@ -0,0 +1,179 @@
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
from openpyxl.utils.cell import coordinate_from_string, column_index_from_string
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def get_data_validation_for_cell(worksheet: Worksheet, cell_address: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get data validation metadata for a specific cell.
|
||||
|
||||
Args:
|
||||
worksheet: The openpyxl worksheet object
|
||||
cell_address: Cell address like 'A1', 'B2', etc.
|
||||
|
||||
Returns:
|
||||
Dictionary with validation metadata or None if no validation exists
|
||||
"""
|
||||
try:
|
||||
# Convert cell address to row/col coordinates
|
||||
col_letter, row = coordinate_from_string(cell_address)
|
||||
col_idx = column_index_from_string(col_letter)
|
||||
|
||||
# Check each data validation rule in the worksheet
|
||||
for dv in worksheet.data_validations.dataValidation:
|
||||
# Check if this cell is covered by the validation rule
|
||||
if _cell_in_validation_range(row, col_idx, dv):
|
||||
return _extract_validation_metadata(dv, cell_address, worksheet)
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get validation for cell {cell_address}: {e}")
|
||||
return None
|
||||
|
||||
def _cell_in_validation_range(row: int, col: int, data_validation) -> bool:
|
||||
"""Check if a cell is within a data validation range."""
|
||||
try:
|
||||
# data_validation.sqref contains the cell ranges this validation applies to
|
||||
for cell_range in data_validation.sqref.ranges:
|
||||
if (cell_range.min_row <= row <= cell_range.max_row and
|
||||
cell_range.min_col <= col <= cell_range.max_col):
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking if cell ({row}, {col}) is in validation range for DV sqref '{getattr(data_validation, 'sqref', 'N/A')}': {e}")
|
||||
return False
|
||||
|
||||
def _extract_validation_metadata(data_validation, cell_address: str, worksheet: Optional[Worksheet] = None) -> Dict[str, Any]:
|
||||
"""Extract metadata from a DataValidation object."""
|
||||
try:
|
||||
validation_info = {
|
||||
"cell": cell_address,
|
||||
"has_validation": True,
|
||||
"validation_type": data_validation.type,
|
||||
"allow_blank": data_validation.allowBlank,
|
||||
}
|
||||
|
||||
# Add operator for validation types that use it
|
||||
if data_validation.operator:
|
||||
validation_info["operator"] = data_validation.operator
|
||||
|
||||
# Add optional fields if they exist
|
||||
if data_validation.prompt:
|
||||
validation_info["prompt"] = data_validation.prompt
|
||||
if data_validation.promptTitle:
|
||||
validation_info["prompt_title"] = data_validation.promptTitle
|
||||
if data_validation.error:
|
||||
validation_info["error_message"] = data_validation.error
|
||||
if data_validation.errorTitle:
|
||||
validation_info["error_title"] = data_validation.errorTitle
|
||||
|
||||
# For list type validations (dropdown lists), extract allowed values
|
||||
if data_validation.type == "list" and data_validation.formula1:
|
||||
allowed_values = _extract_list_values(data_validation.formula1, worksheet)
|
||||
validation_info["allowed_values"] = allowed_values
|
||||
|
||||
# For other validation types, include the formulas
|
||||
elif data_validation.formula1:
|
||||
validation_info["formula1"] = data_validation.formula1
|
||||
if data_validation.formula2:
|
||||
validation_info["formula2"] = data_validation.formula2
|
||||
|
||||
return validation_info
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract validation metadata: {e}")
|
||||
return {
|
||||
"cell": cell_address,
|
||||
"has_validation": True,
|
||||
"validation_type": "unknown",
|
||||
"error": f"Failed to parse validation: {e}"
|
||||
}
|
||||
|
||||
def _extract_list_values(formula: str, worksheet: Optional[Worksheet] = None) -> List[str]:
|
||||
"""Extract allowed values from a list validation formula."""
|
||||
try:
|
||||
# Remove quotes if present
|
||||
formula = formula.strip('"')
|
||||
|
||||
# Handle comma-separated list
|
||||
if ',' in formula:
|
||||
# Split by comma and clean up each value
|
||||
values = [val.strip().strip('"') for val in formula.split(',')]
|
||||
return [val for val in values if val] # Remove empty values
|
||||
|
||||
# Handle range reference (e.g., "$A$1:$A$5" or "Sheet1!$A$1:$A$5")
|
||||
elif (':' in formula or formula.startswith('$')) and worksheet:
|
||||
try:
|
||||
# Remove potential leading '=' if it's a formula like '=Sheet1!$A$1:$A$5'
|
||||
range_ref = formula
|
||||
if formula.startswith('='):
|
||||
range_ref = formula[1:]
|
||||
|
||||
actual_values = []
|
||||
# worksheet[range_ref] can resolve ranges like "A1:A5" or "SheetName!A1:A5"
|
||||
# It returns a tuple of tuples of cells for ranges, or a single cell
|
||||
range_cells = worksheet[range_ref]
|
||||
|
||||
# Handle single cell or range
|
||||
if hasattr(range_cells, 'value'): # Single cell
|
||||
if range_cells.value is not None:
|
||||
actual_values.append(str(range_cells.value))
|
||||
else: # Range of cells
|
||||
for row_of_cells in range_cells:
|
||||
# Handle case where row_of_cells might be a single cell
|
||||
if hasattr(row_of_cells, 'value'):
|
||||
if row_of_cells.value is not None:
|
||||
actual_values.append(str(row_of_cells.value))
|
||||
else:
|
||||
for cell in row_of_cells:
|
||||
if cell.value is not None:
|
||||
actual_values.append(str(cell.value))
|
||||
|
||||
if actual_values:
|
||||
return actual_values
|
||||
return [f"Range: {formula} (empty or unresolvable)"]
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not resolve range '{formula}' for list validation: {e}")
|
||||
return [f"Range: {formula} (resolution error)"]
|
||||
|
||||
# Handle range reference when worksheet not available
|
||||
elif ':' in formula or formula.startswith('$'):
|
||||
return [f"Range: {formula}"]
|
||||
|
||||
# Single value
|
||||
else:
|
||||
return [formula.strip('"')]
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse list formula '{formula}': {e}")
|
||||
return [formula] # Return original formula if parsing fails
|
||||
|
||||
def get_all_validation_ranges(worksheet: Worksheet) -> List[Dict[str, Any]]:
|
||||
"""Get all data validation ranges in a worksheet.
|
||||
|
||||
Returns:
|
||||
List of dictionaries containing validation range information
|
||||
"""
|
||||
validations = []
|
||||
|
||||
try:
|
||||
for dv in worksheet.data_validations.dataValidation:
|
||||
validation_info = {
|
||||
"ranges": str(dv.sqref),
|
||||
"validation_type": dv.type,
|
||||
"allow_blank": dv.allowBlank,
|
||||
}
|
||||
|
||||
if dv.type == "list" and dv.formula1:
|
||||
validation_info["allowed_values"] = _extract_list_values(dv.formula1, worksheet)
|
||||
|
||||
validations.append(validation_info)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get validation ranges: {e}")
|
||||
|
||||
return validations
|
||||
@ -1,14 +1,14 @@
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from typing import Any, Dict
|
||||
import logging
|
||||
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl.styles import Font
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
from .exceptions import DataError
|
||||
from .cell_utils import parse_cell_range
|
||||
from .cell_validation import get_data_validation_for_cell
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -244,3 +244,108 @@ def _write_data_to_worksheet(
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to write worksheet data: {e}")
|
||||
raise DataError(str(e))
|
||||
|
||||
def read_excel_range_with_metadata(
|
||||
filepath: Path | str,
|
||||
sheet_name: str,
|
||||
start_cell: str = "A1",
|
||||
end_cell: str | None = None,
|
||||
include_validation: bool = True
|
||||
) -> Dict[str, Any]:
|
||||
"""Read data from Excel range with cell metadata including validation rules.
|
||||
|
||||
Args:
|
||||
filepath: Path to Excel file
|
||||
sheet_name: Name of worksheet
|
||||
start_cell: Starting cell address
|
||||
end_cell: Ending cell address (optional)
|
||||
include_validation: Whether to include validation metadata
|
||||
|
||||
Returns:
|
||||
Dictionary containing structured cell data with metadata
|
||||
"""
|
||||
try:
|
||||
wb = load_workbook(filepath, read_only=False)
|
||||
|
||||
if sheet_name not in wb.sheetnames:
|
||||
raise DataError(f"Sheet '{sheet_name}' not found")
|
||||
|
||||
ws = wb[sheet_name]
|
||||
|
||||
# Parse start cell
|
||||
if ':' in start_cell:
|
||||
start_cell, end_cell = start_cell.split(':')
|
||||
|
||||
# Get start coordinates
|
||||
try:
|
||||
start_coords = parse_cell_range(f"{start_cell}:{start_cell}")
|
||||
if not start_coords or not all(coord is not None for coord in start_coords[:2]):
|
||||
raise DataError(f"Invalid start cell reference: {start_cell}")
|
||||
start_row, start_col = start_coords[0], start_coords[1]
|
||||
except ValueError as e:
|
||||
raise DataError(f"Invalid start cell format: {str(e)}")
|
||||
|
||||
# Determine end coordinates
|
||||
if end_cell:
|
||||
try:
|
||||
end_coords = parse_cell_range(f"{end_cell}:{end_cell}")
|
||||
if not end_coords or not all(coord is not None for coord in end_coords[:2]):
|
||||
raise DataError(f"Invalid end cell reference: {end_cell}")
|
||||
end_row, end_col = end_coords[0], end_coords[1]
|
||||
except ValueError as e:
|
||||
raise DataError(f"Invalid end cell format: {str(e)}")
|
||||
else:
|
||||
# Dynamically expand range until all values are empty
|
||||
end_row, end_col = start_row, start_col
|
||||
while end_row <= ws.max_row and any(ws.cell(row=end_row, column=c).value is not None for c in range(start_col, ws.max_column + 1)):
|
||||
end_row += 1
|
||||
while end_col <= ws.max_column and any(ws.cell(row=r, column=end_col).value is not None for r in range(start_row, ws.max_row + 1)):
|
||||
end_col += 1
|
||||
end_row -= 1 # Adjust back to last non-empty row
|
||||
end_col -= 1 # Adjust back to last non-empty column
|
||||
|
||||
# Validate range bounds
|
||||
if start_row > ws.max_row or start_col > ws.max_column:
|
||||
raise DataError(
|
||||
f"Start cell out of bounds. Sheet dimensions are "
|
||||
f"A1:{get_column_letter(ws.max_column)}{ws.max_row}"
|
||||
)
|
||||
|
||||
# Build structured cell data
|
||||
range_data = {
|
||||
"range": f"{start_cell}:{get_column_letter(end_col)}{end_row}" if end_cell else start_cell,
|
||||
"sheet_name": sheet_name,
|
||||
"cells": []
|
||||
}
|
||||
|
||||
for row in range(start_row, end_row + 1):
|
||||
for col in range(start_col, end_col + 1):
|
||||
cell = ws.cell(row=row, column=col)
|
||||
cell_address = f"{get_column_letter(col)}{row}"
|
||||
|
||||
cell_data = {
|
||||
"address": cell_address,
|
||||
"value": cell.value,
|
||||
"row": row,
|
||||
"column": col
|
||||
}
|
||||
|
||||
# Add validation metadata if requested
|
||||
if include_validation:
|
||||
validation_info = get_data_validation_for_cell(ws, cell_address)
|
||||
if validation_info:
|
||||
cell_data["validation"] = validation_info
|
||||
else:
|
||||
cell_data["validation"] = {"has_validation": False}
|
||||
|
||||
range_data["cells"].append(cell_data)
|
||||
|
||||
wb.close()
|
||||
return range_data
|
||||
|
||||
except DataError as e:
|
||||
logger.error(str(e))
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read Excel range with metadata: {e}")
|
||||
raise DataError(str(e))
|
||||
|
||||
@ -165,7 +165,7 @@ def format_range(
|
||||
full_path = get_excel_path(filepath)
|
||||
from excel_mcp.formatting import format_range as format_range_func
|
||||
|
||||
result = format_range_func(
|
||||
format_range_func(
|
||||
filepath=full_path,
|
||||
sheet_name=sheet_name,
|
||||
start_cell=start_cell,
|
||||
@ -201,20 +201,35 @@ def read_data_from_excel(
|
||||
preview_only: bool = False
|
||||
) -> str:
|
||||
"""
|
||||
Read data from Excel worksheet.
|
||||
Read data from Excel worksheet with cell metadata including validation rules.
|
||||
|
||||
Args:
|
||||
filepath: Path to Excel file
|
||||
sheet_name: Name of worksheet
|
||||
start_cell: Starting cell (default A1)
|
||||
end_cell: Ending cell (optional, auto-expands if not provided)
|
||||
preview_only: Whether to return preview only
|
||||
|
||||
Returns:
|
||||
Data from Excel worksheet as json string. list of lists or empty list if no data found. sublists are assumed to be rows.
|
||||
JSON string containing structured cell data with validation metadata.
|
||||
Each cell includes: address, value, row, column, and validation info (if any).
|
||||
"""
|
||||
try:
|
||||
full_path = get_excel_path(filepath)
|
||||
from excel_mcp.data import read_excel_range
|
||||
result = read_excel_range(full_path, sheet_name, start_cell, end_cell, preview_only)
|
||||
if not result:
|
||||
from excel_mcp.data import read_excel_range_with_metadata
|
||||
result = read_excel_range_with_metadata(
|
||||
full_path,
|
||||
sheet_name,
|
||||
start_cell,
|
||||
end_cell
|
||||
)
|
||||
if not result or not result.get("cells"):
|
||||
return "No data found in specified range"
|
||||
# Convert the list of dicts to a formatted string
|
||||
data_str = "\n".join([str(row) for row in result])
|
||||
return data_str
|
||||
|
||||
# Return as formatted JSON string
|
||||
import json
|
||||
return json.dumps(result, indent=2, default=str)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading data: {e}")
|
||||
raise
|
||||
@ -253,7 +268,7 @@ def create_workbook(filepath: str) -> str:
|
||||
try:
|
||||
full_path = get_excel_path(filepath)
|
||||
from excel_mcp.workbook import create_workbook as create_workbook_impl
|
||||
result = create_workbook_impl(full_path)
|
||||
create_workbook_impl(full_path)
|
||||
return f"Created workbook at {full_path}"
|
||||
except WorkbookError as e:
|
||||
return f"Error: {str(e)}"
|
||||
@ -500,6 +515,50 @@ def validate_excel_range(
|
||||
logger.error(f"Error validating range: {e}")
|
||||
raise
|
||||
|
||||
@mcp.tool()
|
||||
def get_data_validation_info(
|
||||
filepath: str,
|
||||
sheet_name: str
|
||||
) -> str:
|
||||
"""
|
||||
Get all data validation rules in a worksheet.
|
||||
|
||||
This tool helps identify which cell ranges have validation rules
|
||||
and what types of validation are applied.
|
||||
|
||||
Args:
|
||||
filepath: Path to Excel file
|
||||
sheet_name: Name of worksheet
|
||||
|
||||
Returns:
|
||||
JSON string containing all validation rules in the worksheet
|
||||
"""
|
||||
try:
|
||||
full_path = get_excel_path(filepath)
|
||||
from openpyxl import load_workbook
|
||||
from excel_mcp.cell_validation import get_all_validation_ranges
|
||||
|
||||
wb = load_workbook(full_path, read_only=False)
|
||||
if sheet_name not in wb.sheetnames:
|
||||
return f"Error: Sheet '{sheet_name}' not found"
|
||||
|
||||
ws = wb[sheet_name]
|
||||
validations = get_all_validation_ranges(ws)
|
||||
wb.close()
|
||||
|
||||
if not validations:
|
||||
return "No data validation rules found in this worksheet"
|
||||
|
||||
import json
|
||||
return json.dumps({
|
||||
"sheet_name": sheet_name,
|
||||
"validation_rules": validations
|
||||
}, indent=2, default=str)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting validation info: {e}")
|
||||
raise
|
||||
|
||||
async def run_sse():
|
||||
"""Run Excel MCP server in SSE mode."""
|
||||
# Assign value to EXCEL_FILES_PATH in SSE mode
|
||||
|
||||
25
uv.lock
generated
25
uv.lock
generated
@ -1,5 +1,4 @@
|
||||
version = 1
|
||||
revision = 1
|
||||
requires-python = ">=3.10"
|
||||
|
||||
[[package]]
|
||||
@ -67,17 +66,19 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "excel-mcp-server"
|
||||
version = "0.1.1"
|
||||
version = "0.1.3"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "mcp", extra = ["cli"] },
|
||||
{ name = "openpyxl" },
|
||||
{ name = "typer" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "mcp", extras = ["cli"], specifier = ">=1.2.0" },
|
||||
{ name = "mcp", extras = ["cli"], specifier = ">=1.6.0" },
|
||||
{ name = "openpyxl", specifier = ">=3.1.2" },
|
||||
{ name = "typer", specifier = ">=0.15.1" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -158,7 +159,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "mcp"
|
||||
version = "1.2.1"
|
||||
version = "1.9.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
@ -166,13 +167,14 @@ dependencies = [
|
||||
{ name = "httpx-sse" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "pydantic-settings" },
|
||||
{ name = "python-multipart" },
|
||||
{ name = "sse-starlette" },
|
||||
{ name = "starlette" },
|
||||
{ name = "uvicorn" },
|
||||
{ name = "uvicorn", marker = "sys_platform != 'emscripten'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fc/30/51e4555826126e3954fa2ab1e934bf74163c5fe05e98f38ca4d0f8abbf63/mcp-1.2.1.tar.gz", hash = "sha256:c9d43dbfe943aa1530e2be8f54b73af3ebfb071243827b4483d421684806cb45", size = 103968 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/e7/bc/54aec2c334698cc575ca3b3481eed627125fb66544152fa1af927b1a495c/mcp-1.9.1.tar.gz", hash = "sha256:19879cd6dde3d763297617242888c2f695a95dfa854386a6a68676a646ce75e4", size = 316247 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/4c/0d/6770742a84c8aa1d36c0d628896a380584c5759612e66af7446af07d8775/mcp-1.2.1-py3-none-any.whl", hash = "sha256:579bf9c9157850ebb1344f3ca6f7a3021b0123c44c9f089ef577a7062522f0fd", size = 66453 },
|
||||
{ url = "https://files.pythonhosted.org/packages/a6/c0/4ac795585a22a0a2d09cd2b1187b0252d2afcdebd01e10a68bbac4d34890/mcp-1.9.1-py3-none-any.whl", hash = "sha256:2900ded8ffafc3c8a7bfcfe8bc5204037e988e753ec398f371663e6a06ecd9a9", size = 130261 },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
@ -322,6 +324,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-multipart"
|
||||
version = "0.0.20"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f3/87/f44d7c9f274c7ee665a29b885ec97089ec5dc034c7f3fafa03da9e39a09e/python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13", size = 37158 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rich"
|
||||
version = "13.9.4"
|
||||
|
||||
Reference in New Issue
Block a user