v0.26.1: fix dual-column XLSX parser bugs
- Rewrite detectAllHeaderSets() with repeat-field detection instead of gap-based splitting - Require ≥2 header fields per group (filters out false matches like 'Left Over | Paid') - Fix column leakage: right-side bills no longer pick up left-side amounts - Add header_set_index to analyzeRow return object for frontend use - Add isLikelySummaryRow() filter (Paycheck, Left Over, Enter how much, etc.) - Expand isLikelyTotalRow() to catch 'Auto Total ------>' patterns - Filter leftover calc rows (null name + negative amount, dash separators) - Remove 'paid' from HEADER_PATTERNS.amount (was false-matching 'Paid' cells) - Skip empty string cells in detectAllHeaderSets
This commit is contained in:
parent
d32a30495d
commit
34b0f75918
1650
DEVELOPMENT_LOG.md
1650
DEVELOPMENT_LOG.md
File diff suppressed because it is too large
Load Diff
|
|
@ -1,8 +1,8 @@
|
|||
export const APP_VERSION = '0.26.0';
|
||||
export const APP_VERSION = '0.26.1';
|
||||
export const APP_NAME = 'BillTracker';
|
||||
|
||||
export const RELEASE_NOTES = {
|
||||
version: '0.26.0',
|
||||
version: '0.26.1',
|
||||
date: '2026-05-11',
|
||||
highlights: [
|
||||
{ icon: '📊', title: 'Dual-Column XLSX Import', desc: 'Bills due on the 1st and 15th are now both imported from dual-layout spreadsheets' },
|
||||
|
|
@ -10,5 +10,6 @@ export const RELEASE_NOTES = {
|
|||
{ icon: '🗺️', title: 'Roadmap Page Redesign', desc: 'Kanban-style priority lanes with collapsible items, admin-only roadmap and activity log APIs replacing AdminDashboard' },
|
||||
{ icon: '🛡️', title: 'Import CSRF Fix', desc: 'XLSX, SQLite, and backup imports now include CSRF token (previously blocked with "session expired" error)' },
|
||||
{ icon: '🧹', title: 'AdminDashboard Replaced', desc: 'RoadmapPage now handles admin roadmap and development log display' },
|
||||
{ icon: '🐞', title: 'Dual-Column Parser Bugfixes', desc: 'Fixed header detection (repeat-field instead of gap-based), column leakage, summary row filtering, header_set_index output, and amount header pattern' },
|
||||
],
|
||||
};
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "bill-tracker",
|
||||
"version": "0.26.0",
|
||||
"version": "0.26.1",
|
||||
"description": "Monthly bill tracking system",
|
||||
"main": "server.js",
|
||||
"scripts": {
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ const LABEL_PATTERNS = {
|
|||
|
||||
const HEADER_PATTERNS = {
|
||||
bill_name: /^(?:bill|name|bill\s*name|description|payee|vendor|service)$/i,
|
||||
amount: /^(?:amount|amt|expected|expected\s*amount|cost|price|payment|paid|value)$/i,
|
||||
amount: /^(?:amount|amt|expected|expected\s*amount|cost|price|payment|value)$/i,
|
||||
due_date: /^(?:due\s*date|due|due\s*day)$/i,
|
||||
paid_date: /^(?:paid\s*date|date\s*paid|payment\s*date|date\s*cleared|cleared\s*date)$/i,
|
||||
date: /^(?:date|due\s*date|due|paid\s*date|when|day)$/i,
|
||||
|
|
@ -268,9 +268,10 @@ function detectAllHeaderSets(firstRow) {
|
|||
firstRow.forEach((cell, idx) => {
|
||||
if (cell == null) return;
|
||||
const val = String(cell).trim();
|
||||
if (!val) return;
|
||||
for (const field of Object.keys(HEADER_PATTERNS)) {
|
||||
if (HEADER_PATTERNS[field].test(val)) {
|
||||
headerCells.push({ idx, field });
|
||||
headerCells.push({ idx, field, val });
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -278,52 +279,57 @@ function detectAllHeaderSets(firstRow) {
|
|||
|
||||
if (headerCells.length === 0) return [];
|
||||
|
||||
// Group consecutive header cells into sets
|
||||
// A gap of more than 1 column (empty column) indicates a new header set
|
||||
const headerSets = [];
|
||||
let currentSet = { startCol: headerCells[0].idx, endCol: headerCells[0].idx, fields: [headerCells[0].field] };
|
||||
// Group header cells into sets by detecting when a field repeats.
|
||||
// When we see the same field name again (e.g., second "Bill", second "Amount"),
|
||||
// that indicates the start of a new header group (dual-column layout).
|
||||
// Null columns between fields within a group are just empty columns — they
|
||||
// don't split the group (left half has: Due date | Bill | Amount | null | Date Cleared).
|
||||
const seenFields = new Set();
|
||||
const groups = [];
|
||||
let currentGroup = { cells: [headerCells[0]] };
|
||||
seenFields.add(headerCells[0].field);
|
||||
|
||||
for (let i = 1; i < headerCells.length; i++) {
|
||||
const prevIdx = headerCells[i - 1].idx;
|
||||
const currIdx = headerCells[i].idx;
|
||||
const cell = headerCells[i];
|
||||
|
||||
// Check if there's an empty column between them (gap > 1)
|
||||
let hasGap = false;
|
||||
for (let gapIdx = prevIdx + 1; gapIdx < currIdx; gapIdx++) {
|
||||
if (firstRow[gapIdx] == null || String(firstRow[gapIdx]).trim() === '') {
|
||||
hasGap = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Start a new group if this field was already seen (repeat = new column set)
|
||||
// or if there's a large column gap (>3 empty columns) between this and previous
|
||||
const prevCell = headerCells[i - 1];
|
||||
const colGap = cell.idx - prevCell.idx;
|
||||
const isRepeatField = seenFields.has(cell.field);
|
||||
const isLargeGap = colGap > 3;
|
||||
|
||||
if (hasGap) {
|
||||
// Save current set and start a new one
|
||||
headerSets.push(currentSet);
|
||||
currentSet = { startCol: currIdx, endCol: currIdx, fields: [headerCells[i].field] };
|
||||
if (isRepeatField || isLargeGap) {
|
||||
groups.push(currentGroup);
|
||||
currentGroup = { cells: [cell] };
|
||||
seenFields.clear();
|
||||
seenFields.add(cell.field);
|
||||
} else {
|
||||
currentSet.endCol = currIdx;
|
||||
currentSet.fields.push(headerCells[i].field);
|
||||
currentGroup.cells.push(cell);
|
||||
seenFields.add(cell.field);
|
||||
}
|
||||
}
|
||||
headerSets.push(currentSet);
|
||||
groups.push(currentGroup);
|
||||
|
||||
// Convert to final format with maps and defaultDueDay
|
||||
return headerSets.map(set => {
|
||||
// Convert groups to return format with header maps and default due days
|
||||
const result = [];
|
||||
for (const group of groups) {
|
||||
const map = {};
|
||||
for (const field of set.fields) {
|
||||
// Find the first occurrence of this field in the set
|
||||
for (let i = set.startCol; i <= set.endCol; i++) {
|
||||
if (firstRow[i] != null && HEADER_PATTERNS[field].test(String(firstRow[i]).trim())) {
|
||||
map[field] = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Default due_day based on column position: left half (cols < 5) = 1, right half (cols >= 6) = 15
|
||||
const defaultDueDay = set.startCol < 5 ? 1 : 15;
|
||||
group.cells.forEach(h => map[h.field] = h.idx);
|
||||
|
||||
return { startCol: set.startCol, endCol: set.endCol, map, defaultDueDay };
|
||||
});
|
||||
const startCol = group.cells[0].idx;
|
||||
const endCol = group.cells[group.cells.length - 1].idx;
|
||||
const defaultDueDay = startCol < 5 ? 1 : 15;
|
||||
|
||||
// Require at least 2 header fields (bill_name + amount, or similar) to count as a real header set.
|
||||
// This filters out spurious rows like "Left Over | $3,204.20 | Paid" where
|
||||
// "Paid" alone matches the amount pattern but isn't a real column header.
|
||||
if (Object.keys(map).length >= 2) {
|
||||
result.push({ startCol, endCol, map, defaultDueDay });
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -374,7 +380,17 @@ function isLikelyHeaderRow(cells) {
|
|||
|
||||
function isLikelyTotalRow(cells) {
|
||||
return cells.some(
|
||||
(c) => c != null && /^(?:total|subtotal|sum|grand\s*total)$/i.test(String(c).trim()),
|
||||
(c) => c != null && /^(?:total|subtotal|sum|grand\s*total|.*total\s*-+>|auto\s+total)/i.test(String(c).trim()),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect rows that are financial summaries, not bill entries.
|
||||
* Catches "Paycheck", "Left Over", "Enter how much...", etc.
|
||||
*/
|
||||
function isLikelySummaryRow(cells) {
|
||||
return cells.some(
|
||||
(c) => c != null && /^(?:paycheck|left\s*over|enter\s+how\s+much|starting\s+balance|ending\s+balance|carry\s*over|carried\s*over|balance\s+(?:forward|carried)|bank\s+balance)/i.test(String(c).trim()),
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -755,8 +771,11 @@ function findFirstAmountCell(cells, skipIndices) {
|
|||
return null;
|
||||
}
|
||||
|
||||
function collectNotesCells(cells, headerMap, billName) {
|
||||
function collectNotesCells(cells, headerMap, billName, allHeaderColumns = null) {
|
||||
const skipIndices = new Set(Object.values(headerMap));
|
||||
if (allHeaderColumns) {
|
||||
for (const idx of allHeaderColumns) skipIndices.add(idx);
|
||||
}
|
||||
const parts = [];
|
||||
for (let i = 0; i < cells.length; i++) {
|
||||
if (skipIndices.has(i) || cells[i] == null) continue;
|
||||
|
|
@ -773,7 +792,7 @@ function collectNotesCells(cells, headerMap, billName) {
|
|||
|
||||
// ─── Single-Row Analyzer ──────────────────────────────────────────────────────
|
||||
|
||||
function analyzeRow(rowIndex, cells, headerMap, headerLabels, userBills, categories, sheetName, sheetYear, sheetMonth, defaultYear, defaultMonth, rowIdPrefix, defaultDueDay = null, headerSetIndex = null) {
|
||||
function analyzeRow(rowIndex, cells, headerMap, headerLabels, userBills, categories, sheetName, sheetYear, sheetMonth, defaultYear, defaultMonth, rowIdPrefix, defaultDueDay = null, headerSetIndex = null, allHeaderColumns = null) {
|
||||
const get = (field) => {
|
||||
const idx = headerMap[field];
|
||||
return idx !== undefined ? cells[idx] : undefined;
|
||||
|
|
@ -782,7 +801,12 @@ function analyzeRow(rowIndex, cells, headerMap, headerLabels, userBills, categor
|
|||
const rawBillName = get('bill_name') ?? cells[0];
|
||||
const billName = rawBillName ? String(rawBillName).trim() || null : null;
|
||||
|
||||
// Skip indices: own header columns + all other header sets' columns (for dual-column layouts)
|
||||
// This prevents fallback lookups from picking up values from the other column group.
|
||||
const skipIndices = new Set(Object.values(headerMap));
|
||||
if (allHeaderColumns) {
|
||||
for (const idx of allHeaderColumns) skipIndices.add(idx);
|
||||
}
|
||||
const rawAmount = get('amount') ?? findFirstAmountCell(cells, skipIndices);
|
||||
const detectedAmount = parseAmount(rawAmount);
|
||||
|
||||
|
|
@ -805,7 +829,7 @@ function analyzeRow(rowIndex, cells, headerMap, headerLabels, userBills, categor
|
|||
const detectedPaidDate = resolveDateIso(parsedPaidDate, paidDateYear);
|
||||
const rawCategory = get('category');
|
||||
const detectedCategory = rawCategory ? String(rawCategory).trim() || null : null;
|
||||
const notesText = collectNotesCells(cells, headerMap, billName);
|
||||
const notesText = collectNotesCells(cells, headerMap, billName, allHeaderColumns);
|
||||
const allText = cells.filter((c) => c != null && typeof c === 'string').map((c) => c.trim()).join(' ');
|
||||
const detectedLabels = detectLabels(allText);
|
||||
const rawValues = cells.map((c) => (c != null ? String(c) : null));
|
||||
|
|
@ -860,6 +884,7 @@ function analyzeRow(rowIndex, cells, headerMap, headerLabels, userBills, categor
|
|||
possible_bill_matches: possibleMatches,
|
||||
requires_user_decision: requiresUserDecision,
|
||||
due_day: recommendation.due_day,
|
||||
header_set_index: headerSetIndex,
|
||||
recommendation,
|
||||
};
|
||||
}
|
||||
|
|
@ -913,6 +938,21 @@ function parseSheetRows({ name, rawRows, year: sheetYear, month: sheetMonth, row
|
|||
const hasHeaders = hasValidHeaders;
|
||||
const startRow = hasHeaders ? headerRowIndex + 1 : 0;
|
||||
|
||||
// For dual-column layouts, collect ALL column indices across all header sets
|
||||
// so that fallback lookups (findFirstAmountCell, collectNotesCells) don't
|
||||
// accidentally pick up values from the other column set.
|
||||
// This includes the full range [startCol..endCol] for each set, not just
|
||||
// the mapped columns, because gap columns within a set also belong to that side.
|
||||
const allColumnsIndices = new Set();
|
||||
for (const set of allHeaderSets) {
|
||||
for (const idx of Object.values(set.map)) {
|
||||
allColumnsIndices.add(idx);
|
||||
}
|
||||
for (let i = set.startCol; i <= set.endCol; i++) {
|
||||
allColumnsIndices.add(i);
|
||||
}
|
||||
}
|
||||
|
||||
const rows = [];
|
||||
|
||||
// Process each header set independently
|
||||
|
|
@ -934,11 +974,37 @@ function parseSheetRows({ name, rawRows, year: sheetYear, month: sheetMonth, row
|
|||
// Skip total rows
|
||||
if (isLikelyTotalRow(cells)) continue;
|
||||
|
||||
// Skip financial summary rows (Paycheck, Left Over, etc.)
|
||||
if (isLikelySummaryRow(cells)) continue;
|
||||
|
||||
// Skip leftover calculation rows: null/blank bill name with negative amount, or dash separators
|
||||
const getBillName = (field) => {
|
||||
const idx = headerMap[field];
|
||||
return idx !== undefined ? cells[idx] : undefined;
|
||||
};
|
||||
const get = (field) => {
|
||||
const idx = headerMap[field];
|
||||
return idx !== undefined ? cells[idx] : undefined;
|
||||
};
|
||||
const rawBillName = getBillName('bill_name') ?? cells[0];
|
||||
const billName = rawBillName ? String(rawBillName).trim() || null : null;
|
||||
const rawAmount = get('amount') ?? findFirstAmountCell(cells, new Set(Object.values(headerMap)));
|
||||
const amount = rawAmount !== null ? parseAmount(rawAmount) : null;
|
||||
|
||||
// Check if bill name is a dash separator (--- or ---->)
|
||||
const isDashSeparator = billName && (billName.match(/^-+>/) || billName.match(/^--+$/));
|
||||
|
||||
// Check if this is a leftover calculation row (null/blank bill name + negative amount)
|
||||
// Skip if bill name is null AND amount is negative
|
||||
const isLeftoverCalcRow = !billName && amount !== null && amount < 0;
|
||||
|
||||
if (isDashSeparator || isLeftoverCalcRow) continue;
|
||||
|
||||
rows.push(analyzeRow(
|
||||
i, cells, headerMap, headerLabels, userBills, categories,
|
||||
name, sheetYear, sheetMonth,
|
||||
defaultYear, defaultMonth, rowIdPrefix,
|
||||
defaultDueDay, setIdx,
|
||||
defaultDueDay, setIdx, allColumnsIndices,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue