mirror of
https://github.com/haris-musa/excel-mcp-server.git
synced 2025-12-08 17:12:41 +08:00
Initial commit
This commit is contained in:
235
src/excel_mcp/validation.py
Normal file
235
src/excel_mcp/validation.py
Normal file
@ -0,0 +1,235 @@
|
||||
import logging
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl.utils import get_column_letter
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
|
||||
from .cell_utils import parse_cell_range, validate_cell_reference
|
||||
from .exceptions import ValidationError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def validate_formula_in_cell_operation(
|
||||
filepath: str,
|
||||
sheet_name: str,
|
||||
cell: str,
|
||||
formula: str
|
||||
) -> dict[str, Any]:
|
||||
"""Validate Excel formula before writing"""
|
||||
try:
|
||||
wb = load_workbook(filepath)
|
||||
if sheet_name not in wb.sheetnames:
|
||||
raise ValidationError(f"Sheet '{sheet_name}' not found")
|
||||
|
||||
if not validate_cell_reference(cell):
|
||||
raise ValidationError(f"Invalid cell reference: {cell}")
|
||||
|
||||
# First validate the provided formula's syntax
|
||||
is_valid, message = validate_formula(formula)
|
||||
if not is_valid:
|
||||
raise ValidationError(f"Invalid formula syntax: {message}")
|
||||
|
||||
# Additional validation for cell references in formula
|
||||
cell_refs = re.findall(r'[A-Z]+[0-9]+(?::[A-Z]+[0-9]+)?', formula)
|
||||
for ref in cell_refs:
|
||||
if ':' in ref: # Range reference
|
||||
start, end = ref.split(':')
|
||||
if not (validate_cell_reference(start) and validate_cell_reference(end)):
|
||||
raise ValidationError(f"Invalid cell range reference in formula: {ref}")
|
||||
else: # Single cell reference
|
||||
if not validate_cell_reference(ref):
|
||||
raise ValidationError(f"Invalid cell reference in formula: {ref}")
|
||||
|
||||
# Now check if there's a formula in the cell and compare
|
||||
sheet = wb[sheet_name]
|
||||
cell_obj = sheet[cell]
|
||||
current_formula = cell_obj.value
|
||||
|
||||
# If cell has a formula (starts with =)
|
||||
if isinstance(current_formula, str) and current_formula.startswith('='):
|
||||
if formula.startswith('='):
|
||||
if current_formula != formula:
|
||||
return {
|
||||
"message": "Formula is valid but doesn't match cell content",
|
||||
"valid": True,
|
||||
"matches": False,
|
||||
"cell": cell,
|
||||
"provided_formula": formula,
|
||||
"current_formula": current_formula
|
||||
}
|
||||
else:
|
||||
if current_formula != f"={formula}":
|
||||
return {
|
||||
"message": "Formula is valid but doesn't match cell content",
|
||||
"valid": True,
|
||||
"matches": False,
|
||||
"cell": cell,
|
||||
"provided_formula": formula,
|
||||
"current_formula": current_formula
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"message": "Formula is valid and matches cell content",
|
||||
"valid": True,
|
||||
"matches": True,
|
||||
"cell": cell,
|
||||
"formula": formula
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"message": "Formula is valid but cell contains no formula",
|
||||
"valid": True,
|
||||
"matches": False,
|
||||
"cell": cell,
|
||||
"provided_formula": formula,
|
||||
"current_content": str(current_formula) if current_formula else ""
|
||||
}
|
||||
|
||||
except ValidationError as e:
|
||||
logger.error(str(e))
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to validate formula: {e}")
|
||||
raise ValidationError(str(e))
|
||||
|
||||
def validate_range_in_sheet_operation(
|
||||
filepath: str,
|
||||
sheet_name: str,
|
||||
start_cell: str,
|
||||
end_cell: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Validate if a range exists in a worksheet and return data range info."""
|
||||
try:
|
||||
wb = load_workbook(filepath)
|
||||
if sheet_name not in wb.sheetnames:
|
||||
raise ValidationError(f"Sheet '{sheet_name}' not found")
|
||||
|
||||
worksheet = wb[sheet_name]
|
||||
|
||||
# Get actual data dimensions
|
||||
data_max_row = worksheet.max_row
|
||||
data_max_col = worksheet.max_column
|
||||
|
||||
# Validate range
|
||||
try:
|
||||
start_row, start_col, end_row, end_col = parse_cell_range(start_cell, end_cell)
|
||||
except ValueError as e:
|
||||
raise ValidationError(f"Invalid range: {str(e)}")
|
||||
|
||||
# If end not specified, use start
|
||||
if end_row is None:
|
||||
end_row = start_row
|
||||
if end_col is None:
|
||||
end_col = start_col
|
||||
|
||||
# Validate bounds against maximum possible Excel limits
|
||||
is_valid, message = validate_range_bounds(
|
||||
worksheet, start_row, start_col, end_row, end_col
|
||||
)
|
||||
if not is_valid:
|
||||
raise ValidationError(message)
|
||||
|
||||
range_str = f"{start_cell}" if end_cell is None else f"{start_cell}:{end_cell}"
|
||||
data_range_str = f"A1:{get_column_letter(data_max_col)}{data_max_row}"
|
||||
|
||||
# Check if range is within data or extends beyond
|
||||
extends_beyond_data = (
|
||||
end_row > data_max_row or
|
||||
end_col > data_max_col
|
||||
)
|
||||
|
||||
return {
|
||||
"message": (
|
||||
f"Range '{range_str}' is valid. "
|
||||
f"Sheet contains data in range '{data_range_str}'"
|
||||
),
|
||||
"valid": True,
|
||||
"range": range_str,
|
||||
"data_range": data_range_str,
|
||||
"extends_beyond_data": extends_beyond_data,
|
||||
"data_dimensions": {
|
||||
"max_row": data_max_row,
|
||||
"max_col": data_max_col,
|
||||
"max_col_letter": get_column_letter(data_max_col)
|
||||
}
|
||||
}
|
||||
except ValidationError as e:
|
||||
logger.error(str(e))
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to validate range: {e}")
|
||||
raise ValidationError(str(e))
|
||||
|
||||
def validate_formula(formula: str) -> tuple[bool, str]:
|
||||
"""Validate Excel formula syntax and safety"""
|
||||
if not formula.startswith("="):
|
||||
return False, "Formula must start with '='"
|
||||
|
||||
# Remove the '=' prefix for validation
|
||||
formula = formula[1:]
|
||||
|
||||
# Check for balanced parentheses
|
||||
parens = 0
|
||||
for c in formula:
|
||||
if c == "(":
|
||||
parens += 1
|
||||
elif c == ")":
|
||||
parens -= 1
|
||||
if parens < 0:
|
||||
return False, "Unmatched closing parenthesis"
|
||||
|
||||
if parens > 0:
|
||||
return False, "Unclosed parenthesis"
|
||||
|
||||
# Basic function name validation
|
||||
func_pattern = r"([A-Z]+)\("
|
||||
funcs = re.findall(func_pattern, formula)
|
||||
unsafe_funcs = {"INDIRECT", "HYPERLINK", "WEBSERVICE", "DGET", "RTD"}
|
||||
|
||||
for func in funcs:
|
||||
if func in unsafe_funcs:
|
||||
return False, f"Unsafe function: {func}"
|
||||
|
||||
return True, "Formula is valid"
|
||||
|
||||
|
||||
def validate_range_bounds(
|
||||
worksheet: Worksheet,
|
||||
start_row: int,
|
||||
start_col: int,
|
||||
end_row: int | None = None,
|
||||
end_col: int | None = None,
|
||||
) -> tuple[bool, str]:
|
||||
"""Validate that cell range is within worksheet bounds"""
|
||||
max_row = worksheet.max_row
|
||||
max_col = worksheet.max_column
|
||||
|
||||
try:
|
||||
# Check start cell bounds
|
||||
if start_row < 1 or start_row > max_row:
|
||||
return False, f"Start row {start_row} out of bounds (1-{max_row})"
|
||||
if start_col < 1 or start_col > max_col:
|
||||
return False, (
|
||||
f"Start column {get_column_letter(start_col)} "
|
||||
f"out of bounds (A-{get_column_letter(max_col)})"
|
||||
)
|
||||
|
||||
# If end cell specified, check its bounds
|
||||
if end_row is not None and end_col is not None:
|
||||
if end_row < start_row:
|
||||
return False, "End row cannot be before start row"
|
||||
if end_col < start_col:
|
||||
return False, "End column cannot be before start column"
|
||||
if end_row > max_row:
|
||||
return False, f"End row {end_row} out of bounds (1-{max_row})"
|
||||
if end_col > max_col:
|
||||
return False, (
|
||||
f"End column {get_column_letter(end_col)} "
|
||||
f"out of bounds (A-{get_column_letter(max_col)})"
|
||||
)
|
||||
|
||||
return True, "Range is valid"
|
||||
except Exception as e:
|
||||
return False, f"Invalid range: {e!s}"
|
||||
Reference in New Issue
Block a user