Cleanup: Remove unused header-related functions from data.py

This commit is contained in:
Haris Musa
2025-06-10 21:44:16 +05:00
parent de38958edc
commit e58c459493

View File

@ -135,91 +135,6 @@ def write_data(
logger.error(f"Failed to write data: {e}")
raise DataError(str(e))
def _looks_like_headers(row_dict):
"""Check if a data row appears to be headers (keys match values)."""
return all(
isinstance(value, str) and str(value).strip() == str(key).strip()
for key, value in row_dict.items()
)
def _check_for_headers_above(worksheet, start_row, start_col, headers):
"""Check if cells above start position contain headers."""
if start_row <= 1:
return False # Nothing above row 1
# Look for header-like content above
for check_row in range(max(1, start_row - 5), start_row):
# Count matches for this row
header_count = 0
cell_count = 0
for i, header in enumerate(headers):
if i >= 10: # Limit check to first 10 columns for performance
break
cell = worksheet.cell(row=check_row, column=start_col + i)
cell_count += 1
# Check if cell is formatted like a header (bold)
is_formatted = cell.font.bold if hasattr(cell.font, 'bold') else False
# Check for any content that could be a header
if cell.value is not None:
# Case 1: Direct match with expected header
if str(cell.value).strip().lower() == str(header).strip().lower():
header_count += 2 # Give higher weight to exact matches
# Case 2: Any formatted cell with content
elif is_formatted and cell.value:
header_count += 1
# Case 3: Any cell with content in the first row we check
elif check_row == max(1, start_row - 5):
header_count += 0.5
# If we have a significant number of matching cells, consider it a header row
if cell_count > 0 and header_count >= cell_count * 0.5:
return True
# No headers found above
return False
def _determine_header_behavior(worksheet, start_row, start_col, data):
"""Determine if headers should be written based on context."""
if not data:
return False # No data means no headers
# Check if we're in the title area (rows 1-4)
if start_row <= 4:
return False # Don't add headers in title area
# If we already have data in the sheet, be cautious about adding headers
if worksheet.max_row > 1:
# Check if the target row already has content
has_content = any(
worksheet.cell(row=start_row, column=start_col + i).value is not None
for i in range(min(5, len(data[0].keys())))
)
if has_content:
return False # Don't overwrite existing content with headers
# Check if first row appears to be headers
first_row_is_headers = _looks_like_headers(data[0])
# Check extensively for headers above (up to 5 rows)
has_headers_above = _check_for_headers_above(worksheet, start_row, start_col, list(data[0].keys()))
# Be conservative - don't add headers if we detect headers above or the data has headers
if has_headers_above or first_row_is_headers:
return False
# If we're appending data immediately after existing data, don't add headers
if any(worksheet.cell(row=start_row-1, column=start_col + i).value is not None
for i in range(min(5, len(data[0].keys())))):
return False
# For completely new sheets or empty areas far from content, add headers
return True
def _write_data_to_worksheet(
worksheet: Worksheet,
data: list[list],