v0.26.1: fix dual-column XLSX parser bugs

- Rewrite detectAllHeaderSets() with repeat-field detection instead of gap-based splitting
- Require ≥2 header fields per group (filters out false matches like 'Left Over | Paid')
- Fix column leakage: right-side bills no longer pick up left-side amounts
- Add header_set_index to analyzeRow return object for frontend use
- Add isLikelySummaryRow() filter (Paycheck, Left Over, Enter how much, etc.)
- Expand isLikelyTotalRow() to catch 'Auto Total ------>' patterns
- Filter leftover calc rows (null name + negative amount, dash separators)
- Remove 'paid' from HEADER_PATTERNS.amount (was false-matching 'Paid' cells)
- Skip empty string cells in detectAllHeaderSets
This commit is contained in:
null 2026-05-11 23:17:19 -05:00
parent d32a30495d
commit 34b0f75918
4 changed files with 257 additions and 1552 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,8 @@
export const APP_VERSION = '0.26.0';
export const APP_VERSION = '0.26.1';
export const APP_NAME = 'BillTracker';
export const RELEASE_NOTES = {
version: '0.26.0',
version: '0.26.1',
date: '2026-05-11',
highlights: [
{ icon: '📊', title: 'Dual-Column XLSX Import', desc: 'Bills due on the 1st and 15th are now both imported from dual-layout spreadsheets' },
@ -10,5 +10,6 @@ export const RELEASE_NOTES = {
{ icon: '🗺️', title: 'Roadmap Page Redesign', desc: 'Kanban-style priority lanes with collapsible items, admin-only roadmap and activity log APIs replacing AdminDashboard' },
{ icon: '🛡️', title: 'Import CSRF Fix', desc: 'XLSX, SQLite, and backup imports now include CSRF token (previously blocked with "session expired" error)' },
{ icon: '🧹', title: 'AdminDashboard Replaced', desc: 'RoadmapPage now handles admin roadmap and development log display' },
{ icon: '🐞', title: 'Dual-Column Parser Bugfixes', desc: 'Fixed header detection (repeat-field instead of gap-based), column leakage, summary row filtering, header_set_index output, and amount header pattern' },
],
};

View File

@ -1,6 +1,6 @@
{
"name": "bill-tracker",
"version": "0.26.0",
"version": "0.26.1",
"description": "Monthly bill tracking system",
"main": "server.js",
"scripts": {

View File

@ -29,7 +29,7 @@ const LABEL_PATTERNS = {
const HEADER_PATTERNS = {
bill_name: /^(?:bill|name|bill\s*name|description|payee|vendor|service)$/i,
amount: /^(?:amount|amt|expected|expected\s*amount|cost|price|payment|paid|value)$/i,
amount: /^(?:amount|amt|expected|expected\s*amount|cost|price|payment|value)$/i,
due_date: /^(?:due\s*date|due|due\s*day)$/i,
paid_date: /^(?:paid\s*date|date\s*paid|payment\s*date|date\s*cleared|cleared\s*date)$/i,
date: /^(?:date|due\s*date|due|paid\s*date|when|day)$/i,
@ -268,9 +268,10 @@ function detectAllHeaderSets(firstRow) {
firstRow.forEach((cell, idx) => {
if (cell == null) return;
const val = String(cell).trim();
if (!val) return;
for (const field of Object.keys(HEADER_PATTERNS)) {
if (HEADER_PATTERNS[field].test(val)) {
headerCells.push({ idx, field });
headerCells.push({ idx, field, val });
break;
}
}
@ -278,52 +279,57 @@ function detectAllHeaderSets(firstRow) {
if (headerCells.length === 0) return [];
// Group consecutive header cells into sets
// A gap of more than 1 column (empty column) indicates a new header set
const headerSets = [];
let currentSet = { startCol: headerCells[0].idx, endCol: headerCells[0].idx, fields: [headerCells[0].field] };
// Group header cells into sets by detecting when a field repeats.
// When we see the same field name again (e.g., second "Bill", second "Amount"),
// that indicates the start of a new header group (dual-column layout).
// Null columns between fields within a group are just empty columns — they
// don't split the group (left half has: Due date | Bill | Amount | null | Date Cleared).
const seenFields = new Set();
const groups = [];
let currentGroup = { cells: [headerCells[0]] };
seenFields.add(headerCells[0].field);
for (let i = 1; i < headerCells.length; i++) {
const prevIdx = headerCells[i - 1].idx;
const currIdx = headerCells[i].idx;
const cell = headerCells[i];
// Check if there's an empty column between them (gap > 1)
let hasGap = false;
for (let gapIdx = prevIdx + 1; gapIdx < currIdx; gapIdx++) {
if (firstRow[gapIdx] == null || String(firstRow[gapIdx]).trim() === '') {
hasGap = true;
break;
}
}
// Start a new group if this field was already seen (repeat = new column set)
// or if there's a large column gap (>3 empty columns) between this and previous
const prevCell = headerCells[i - 1];
const colGap = cell.idx - prevCell.idx;
const isRepeatField = seenFields.has(cell.field);
const isLargeGap = colGap > 3;
if (hasGap) {
// Save current set and start a new one
headerSets.push(currentSet);
currentSet = { startCol: currIdx, endCol: currIdx, fields: [headerCells[i].field] };
if (isRepeatField || isLargeGap) {
groups.push(currentGroup);
currentGroup = { cells: [cell] };
seenFields.clear();
seenFields.add(cell.field);
} else {
currentSet.endCol = currIdx;
currentSet.fields.push(headerCells[i].field);
currentGroup.cells.push(cell);
seenFields.add(cell.field);
}
}
headerSets.push(currentSet);
groups.push(currentGroup);
// Convert to final format with maps and defaultDueDay
return headerSets.map(set => {
// Convert groups to return format with header maps and default due days
const result = [];
for (const group of groups) {
const map = {};
for (const field of set.fields) {
// Find the first occurrence of this field in the set
for (let i = set.startCol; i <= set.endCol; i++) {
if (firstRow[i] != null && HEADER_PATTERNS[field].test(String(firstRow[i]).trim())) {
map[field] = i;
break;
}
}
}
// Default due_day based on column position: left half (cols < 5) = 1, right half (cols >= 6) = 15
const defaultDueDay = set.startCol < 5 ? 1 : 15;
group.cells.forEach(h => map[h.field] = h.idx);
return { startCol: set.startCol, endCol: set.endCol, map, defaultDueDay };
});
const startCol = group.cells[0].idx;
const endCol = group.cells[group.cells.length - 1].idx;
const defaultDueDay = startCol < 5 ? 1 : 15;
// Require at least 2 header fields (bill_name + amount, or similar) to count as a real header set.
// This filters out spurious rows like "Left Over | $3,204.20 | Paid" where
// "Paid" alone matches the amount pattern but isn't a real column header.
if (Object.keys(map).length >= 2) {
result.push({ startCol, endCol, map, defaultDueDay });
}
}
return result;
}
@ -374,7 +380,17 @@ function isLikelyHeaderRow(cells) {
function isLikelyTotalRow(cells) {
return cells.some(
(c) => c != null && /^(?:total|subtotal|sum|grand\s*total)$/i.test(String(c).trim()),
(c) => c != null && /^(?:total|subtotal|sum|grand\s*total|.*total\s*-+>|auto\s+total)/i.test(String(c).trim()),
);
}
/**
* Detect rows that are financial summaries, not bill entries.
* Catches "Paycheck", "Left Over", "Enter how much...", etc.
*/
function isLikelySummaryRow(cells) {
return cells.some(
(c) => c != null && /^(?:paycheck|left\s*over|enter\s+how\s+much|starting\s+balance|ending\s+balance|carry\s*over|carried\s*over|balance\s+(?:forward|carried)|bank\s+balance)/i.test(String(c).trim()),
);
}
@ -755,8 +771,11 @@ function findFirstAmountCell(cells, skipIndices) {
return null;
}
function collectNotesCells(cells, headerMap, billName) {
function collectNotesCells(cells, headerMap, billName, allHeaderColumns = null) {
const skipIndices = new Set(Object.values(headerMap));
if (allHeaderColumns) {
for (const idx of allHeaderColumns) skipIndices.add(idx);
}
const parts = [];
for (let i = 0; i < cells.length; i++) {
if (skipIndices.has(i) || cells[i] == null) continue;
@ -773,7 +792,7 @@ function collectNotesCells(cells, headerMap, billName) {
// ─── Single-Row Analyzer ──────────────────────────────────────────────────────
function analyzeRow(rowIndex, cells, headerMap, headerLabels, userBills, categories, sheetName, sheetYear, sheetMonth, defaultYear, defaultMonth, rowIdPrefix, defaultDueDay = null, headerSetIndex = null) {
function analyzeRow(rowIndex, cells, headerMap, headerLabels, userBills, categories, sheetName, sheetYear, sheetMonth, defaultYear, defaultMonth, rowIdPrefix, defaultDueDay = null, headerSetIndex = null, allHeaderColumns = null) {
const get = (field) => {
const idx = headerMap[field];
return idx !== undefined ? cells[idx] : undefined;
@ -782,7 +801,12 @@ function analyzeRow(rowIndex, cells, headerMap, headerLabels, userBills, categor
const rawBillName = get('bill_name') ?? cells[0];
const billName = rawBillName ? String(rawBillName).trim() || null : null;
// Skip indices: own header columns + all other header sets' columns (for dual-column layouts)
// This prevents fallback lookups from picking up values from the other column group.
const skipIndices = new Set(Object.values(headerMap));
if (allHeaderColumns) {
for (const idx of allHeaderColumns) skipIndices.add(idx);
}
const rawAmount = get('amount') ?? findFirstAmountCell(cells, skipIndices);
const detectedAmount = parseAmount(rawAmount);
@ -805,7 +829,7 @@ function analyzeRow(rowIndex, cells, headerMap, headerLabels, userBills, categor
const detectedPaidDate = resolveDateIso(parsedPaidDate, paidDateYear);
const rawCategory = get('category');
const detectedCategory = rawCategory ? String(rawCategory).trim() || null : null;
const notesText = collectNotesCells(cells, headerMap, billName);
const notesText = collectNotesCells(cells, headerMap, billName, allHeaderColumns);
const allText = cells.filter((c) => c != null && typeof c === 'string').map((c) => c.trim()).join(' ');
const detectedLabels = detectLabels(allText);
const rawValues = cells.map((c) => (c != null ? String(c) : null));
@ -860,6 +884,7 @@ function analyzeRow(rowIndex, cells, headerMap, headerLabels, userBills, categor
possible_bill_matches: possibleMatches,
requires_user_decision: requiresUserDecision,
due_day: recommendation.due_day,
header_set_index: headerSetIndex,
recommendation,
};
}
@ -913,6 +938,21 @@ function parseSheetRows({ name, rawRows, year: sheetYear, month: sheetMonth, row
const hasHeaders = hasValidHeaders;
const startRow = hasHeaders ? headerRowIndex + 1 : 0;
// For dual-column layouts, collect ALL column indices across all header sets
// so that fallback lookups (findFirstAmountCell, collectNotesCells) don't
// accidentally pick up values from the other column set.
// This includes the full range [startCol..endCol] for each set, not just
// the mapped columns, because gap columns within a set also belong to that side.
const allColumnsIndices = new Set();
for (const set of allHeaderSets) {
for (const idx of Object.values(set.map)) {
allColumnsIndices.add(idx);
}
for (let i = set.startCol; i <= set.endCol; i++) {
allColumnsIndices.add(i);
}
}
const rows = [];
// Process each header set independently
@ -934,11 +974,37 @@ function parseSheetRows({ name, rawRows, year: sheetYear, month: sheetMonth, row
// Skip total rows
if (isLikelyTotalRow(cells)) continue;
// Skip financial summary rows (Paycheck, Left Over, etc.)
if (isLikelySummaryRow(cells)) continue;
// Skip leftover calculation rows: null/blank bill name with negative amount, or dash separators
const getBillName = (field) => {
const idx = headerMap[field];
return idx !== undefined ? cells[idx] : undefined;
};
const get = (field) => {
const idx = headerMap[field];
return idx !== undefined ? cells[idx] : undefined;
};
const rawBillName = getBillName('bill_name') ?? cells[0];
const billName = rawBillName ? String(rawBillName).trim() || null : null;
const rawAmount = get('amount') ?? findFirstAmountCell(cells, new Set(Object.values(headerMap)));
const amount = rawAmount !== null ? parseAmount(rawAmount) : null;
// Check if bill name is a dash separator (--- or ---->)
const isDashSeparator = billName && (billName.match(/^-+>/) || billName.match(/^--+$/));
// Check if this is a leftover calculation row (null/blank bill name + negative amount)
// Skip if bill name is null AND amount is negative
const isLeftoverCalcRow = !billName && amount !== null && amount < 0;
if (isDashSeparator || isLeftoverCalcRow) continue;
rows.push(analyzeRow(
i, cells, headerMap, headerLabels, userBills, categories,
name, sheetYear, sheetMonth,
defaultYear, defaultMonth, rowIdPrefix,
defaultDueDay, setIdx,
defaultDueDay, setIdx, allColumnsIndices,
));
}
}