feat: Implement data archiving functionality in DataManager, enhance input validation, and add UI option for archiving old data
This commit is contained in:
@@ -2,6 +2,8 @@ import csv
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
@@ -312,6 +314,127 @@ class DataManager:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Archiving / Rotation
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _get_archive_dir(self) -> str:
|
||||||
|
"""Return path to the archives directory next to the main CSV."""
|
||||||
|
base_dir = os.path.dirname(os.path.abspath(self.filename)) or "."
|
||||||
|
archive_dir = os.path.join(base_dir, "archives")
|
||||||
|
os.makedirs(archive_dir, exist_ok=True)
|
||||||
|
return archive_dir
|
||||||
|
|
||||||
|
def _ensure_headers(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""Ensure dataframe has all expected headers in correct order.
|
||||||
|
|
||||||
|
Missing numeric fields default to 0; dose/note string fields to ''.
|
||||||
|
Columns are ordered per _get_csv_headers().
|
||||||
|
"""
|
||||||
|
headers = list(self._get_csv_headers())
|
||||||
|
out = df.copy()
|
||||||
|
for col in headers:
|
||||||
|
if col not in out.columns:
|
||||||
|
if col == "note" or col.endswith("_doses"):
|
||||||
|
out[col] = ""
|
||||||
|
else:
|
||||||
|
out[col] = 0
|
||||||
|
# Drop unknown columns to keep files tidy
|
||||||
|
out = out[headers]
|
||||||
|
return out
|
||||||
|
|
||||||
|
def _write_archive_file(self, year: int, df: pd.DataFrame) -> str:
|
||||||
|
"""Append archived rows to a per-year CSV with full headers.
|
||||||
|
|
||||||
|
Returns the archive file path.
|
||||||
|
"""
|
||||||
|
archive_dir = self._get_archive_dir()
|
||||||
|
base = os.path.splitext(os.path.basename(self.filename))[0]
|
||||||
|
archive_path = os.path.join(archive_dir, f"{base}_{year}.csv")
|
||||||
|
df_to_write = self._ensure_headers(df)
|
||||||
|
# If file doesn't exist, write with header; else append without header
|
||||||
|
write_header = (
|
||||||
|
not os.path.exists(archive_path) or os.path.getsize(archive_path) == 0
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
df_to_write.to_csv(archive_path, mode="a", index=False, header=write_header)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Failed to write archive file {archive_path}: {e}")
|
||||||
|
raise
|
||||||
|
return archive_path
|
||||||
|
|
||||||
|
def archive_old_data(self, keep_years: int = 1) -> dict[str, Any]:
|
||||||
|
"""Archive rows older than the most recent N years into per-year files.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
keep_years: Number of most recent full calendar years to keep in the
|
||||||
|
main CSV (minimum 1). Rows with a date older than the earliest
|
||||||
|
kept year are moved to archives/BASE_YYYY.csv.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Summary dict: { 'archived_rows': int, 'archive_files': set[str],
|
||||||
|
'kept_rows': int }
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
keep_years = max(1, int(keep_years))
|
||||||
|
except Exception:
|
||||||
|
keep_years = 1
|
||||||
|
|
||||||
|
df = self.load_data()
|
||||||
|
if df.empty or "date" not in df.columns:
|
||||||
|
return {"archived_rows": 0, "archive_files": set(), "kept_rows": 0}
|
||||||
|
|
||||||
|
# Parse dates (stored as mm/dd/YYYY normally)
|
||||||
|
dates = pd.to_datetime(df["date"], format="%m/%d/%Y", errors="coerce")
|
||||||
|
df = df.copy()
|
||||||
|
df["__dt"] = dates
|
||||||
|
# If we couldn't parse dates, nothing to archive safely
|
||||||
|
if df["__dt"].isna().all():
|
||||||
|
df.drop(columns=["__dt"], inplace=True)
|
||||||
|
return {
|
||||||
|
"archived_rows": 0,
|
||||||
|
"archive_files": set(),
|
||||||
|
"kept_rows": int(len(df)),
|
||||||
|
}
|
||||||
|
|
||||||
|
current_year = datetime.now().year
|
||||||
|
earliest_kept_year = current_year - keep_years + 1
|
||||||
|
|
||||||
|
to_archive = df[df["__dt"].dt.year < earliest_kept_year]
|
||||||
|
to_keep = df[df["__dt"].dt.year >= earliest_kept_year]
|
||||||
|
|
||||||
|
if to_archive.empty:
|
||||||
|
df.drop(columns=["__dt"], inplace=True)
|
||||||
|
return {
|
||||||
|
"archived_rows": 0,
|
||||||
|
"archive_files": set(),
|
||||||
|
"kept_rows": int(len(df)),
|
||||||
|
}
|
||||||
|
|
||||||
|
archive_files: set[str] = set()
|
||||||
|
try:
|
||||||
|
# Group by year and append to each year's archive file
|
||||||
|
for year, group in to_archive.groupby(to_archive["__dt"].dt.year):
|
||||||
|
group = group.drop(columns=["__dt"]) # remove helper
|
||||||
|
path = self._write_archive_file(int(year), group)
|
||||||
|
archive_files.add(path)
|
||||||
|
|
||||||
|
# Write the kept rows back to main CSV atomically
|
||||||
|
kept_df = to_keep.drop(columns=["__dt"]).copy()
|
||||||
|
# Ensure columns and order
|
||||||
|
kept_df = self._ensure_headers(kept_df)
|
||||||
|
self._atomic_write_csv(kept_df)
|
||||||
|
self._invalidate_cache()
|
||||||
|
except Exception as e:
|
||||||
|
# If archiving failed mid-way, log and propagate minimal info
|
||||||
|
self.logger.error(f"Archiving failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
return {
|
||||||
|
"archived_rows": int(len(to_archive)),
|
||||||
|
"archive_files": archive_files,
|
||||||
|
"kept_rows": int(len(to_keep)),
|
||||||
|
}
|
||||||
|
|
||||||
def get_today_medicine_doses(
|
def get_today_medicine_doses(
|
||||||
self, date: str, medicine_name: str
|
self, date: str, medicine_name: str
|
||||||
) -> list[tuple[str, str]]:
|
) -> list[tuple[str, str]]:
|
||||||
|
|||||||
+41
-16
@@ -233,34 +233,59 @@ class InputValidator:
|
|||||||
entry_data: dict[str, Any],
|
entry_data: dict[str, Any],
|
||||||
) -> tuple[bool, list[str]]:
|
) -> tuple[bool, list[str]]:
|
||||||
"""
|
"""
|
||||||
Validate that an entry has the minimum required data.
|
Backward-compat entry completeness check.
|
||||||
|
|
||||||
|
Delegates to validate_entry_completeness_with_keys when possible.
|
||||||
|
"""
|
||||||
|
# Heuristic split: treat keys ending with _doses and note/date as
|
||||||
|
# non-core and assume the rest are a mix of pathologies and medicines;
|
||||||
|
# callers should prefer the explicit API below.
|
||||||
|
keys = [
|
||||||
|
k
|
||||||
|
for k in entry_data
|
||||||
|
if k not in {"date", "note"} and not str(k).endswith("_doses")
|
||||||
|
]
|
||||||
|
# Even split guess is unreliable; use value patterns instead:
|
||||||
|
path_keys = [k for k in keys if isinstance(entry_data.get(k), int | float)]
|
||||||
|
med_keys = [k for k in keys if k not in path_keys]
|
||||||
|
return InputValidator.validate_entry_completeness_with_keys(
|
||||||
|
entry_data, path_keys, med_keys
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def validate_entry_completeness_with_keys(
|
||||||
|
entry_data: dict[str, Any],
|
||||||
|
pathology_keys: list[str],
|
||||||
|
medicine_keys: list[str],
|
||||||
|
) -> tuple[bool, list[str]]:
|
||||||
|
"""
|
||||||
|
Validate that an entry has the minimum required data using explicit keys.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
entry_data: Dictionary containing entry data
|
entry_data: Dictionary containing entry data
|
||||||
|
pathology_keys: Keys representing pathology scores (numeric, >0 meaningful)
|
||||||
|
medicine_keys: Keys representing medicine taken flags (0/1 boolean)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (is_complete, list_of_missing_fields)
|
Tuple of (is_complete, list_of_missing_fields)
|
||||||
"""
|
"""
|
||||||
missing_fields = []
|
missing_fields: list[str] = []
|
||||||
|
|
||||||
# Check required fields
|
|
||||||
if not entry_data.get("date"):
|
if not entry_data.get("date"):
|
||||||
missing_fields.append("Date")
|
missing_fields.append("Date")
|
||||||
|
|
||||||
# Check that at least one pathology or medicine is recorded
|
def _as_int(v: Any) -> int:
|
||||||
has_pathology_data = any(
|
try:
|
||||||
entry_data.get(key, 0) > 0
|
return int(v)
|
||||||
for key in entry_data
|
except Exception:
|
||||||
if not key.endswith("_doses") and key not in ["date", "note"]
|
try:
|
||||||
)
|
return int(float(v))
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
|
||||||
has_medicine_data = any(
|
has_pathology = any(_as_int(entry_data.get(k, 0)) > 0 for k in pathology_keys)
|
||||||
entry_data.get(key, 0) > 0
|
has_medicine = any(_as_int(entry_data.get(k, 0)) == 1 for k in medicine_keys)
|
||||||
for key in entry_data
|
|
||||||
if not key.endswith("_doses") and key not in ["date", "note"]
|
|
||||||
)
|
|
||||||
|
|
||||||
if not (has_pathology_data or has_medicine_data):
|
if not (has_pathology or has_medicine):
|
||||||
missing_fields.append("At least one pathology score or medicine entry")
|
missing_fields.append("At least one pathology score or medicine entry")
|
||||||
|
|
||||||
return len(missing_fields) == 0, missing_fields
|
return len(missing_fields) == 0, missing_fields
|
||||||
|
|||||||
+52
-14
@@ -509,6 +509,10 @@ class MedTrackerApp:
|
|||||||
command=self._restore_from_backup,
|
command=self._restore_from_backup,
|
||||||
accelerator="Ctrl+Shift+R",
|
accelerator="Ctrl+Shift+R",
|
||||||
)
|
)
|
||||||
|
tools_menu.add_command(
|
||||||
|
label="Archive Old Data...",
|
||||||
|
command=self._archive_old_data,
|
||||||
|
)
|
||||||
tools_menu.add_separator()
|
tools_menu.add_separator()
|
||||||
tools_menu.add_command(
|
tools_menu.add_command(
|
||||||
label="Open Config Folder (Ctrl+Shift+C)",
|
label="Open Config Folder (Ctrl+Shift+C)",
|
||||||
@@ -843,6 +847,47 @@ Use Ctrl+S to save entries and Ctrl+Q to quit."""
|
|||||||
logger.error(f"Failed to open backups folder: {e}")
|
logger.error(f"Failed to open backups folder: {e}")
|
||||||
self.ui_manager.update_status("Failed to open backups folder", "error")
|
self.ui_manager.update_status("Failed to open backups folder", "error")
|
||||||
|
|
||||||
|
def _archive_old_data(self) -> None:
|
||||||
|
"""Archive rows older than configured years and shrink main CSV."""
|
||||||
|
try:
|
||||||
|
keep_years = int(get_pref("archive_keep_years", 1) or 1)
|
||||||
|
except Exception:
|
||||||
|
keep_years = 1
|
||||||
|
# Confirm with user
|
||||||
|
if not messagebox.askyesno(
|
||||||
|
"Archive Old Data",
|
||||||
|
(
|
||||||
|
"This will move entries older than the last "
|
||||||
|
f"{keep_years} year(s) to per-year archive files and shrink the "
|
||||||
|
"main CSV.\n\nProceed?"
|
||||||
|
),
|
||||||
|
parent=self.root,
|
||||||
|
):
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.ui_manager.update_status("Archiving old data...", "info")
|
||||||
|
summary = self.data_manager.archive_old_data(keep_years=keep_years)
|
||||||
|
archived = int(summary.get("archived_rows", 0))
|
||||||
|
kept = int(summary.get("kept_rows", 0))
|
||||||
|
files = summary.get("archive_files", set()) or set()
|
||||||
|
file_list = "\n".join(
|
||||||
|
[f"\u2022 {os.path.basename(str(p))}" for p in sorted(files)]
|
||||||
|
)
|
||||||
|
msg = f"Archived {archived} row(s). Kept {kept}."
|
||||||
|
if file_list:
|
||||||
|
msg += f"\n\n{file_list}"
|
||||||
|
self.ui_manager.update_status("Archiving complete", "success")
|
||||||
|
if hasattr(self.ui_manager, "show_toast"):
|
||||||
|
self.ui_manager.show_toast("Archiving complete", 1500)
|
||||||
|
messagebox.showinfo("Archive Complete", msg, parent=self.root)
|
||||||
|
# Refresh view since data file changed
|
||||||
|
self.refresh_data_display()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Archiving failed: {e}")
|
||||||
|
self.ui_manager.update_status("Archiving failed", "error")
|
||||||
|
messagebox.showerror("Archive Failed", str(e), parent=self.root)
|
||||||
|
|
||||||
def _refresh_ui_after_config_change(self) -> None:
|
def _refresh_ui_after_config_change(self) -> None:
|
||||||
"""Refresh UI components after pathology or medicine configuration changes."""
|
"""Refresh UI components after pathology or medicine configuration changes."""
|
||||||
self.ui_manager.update_status(
|
self.ui_manager.update_status(
|
||||||
@@ -1291,21 +1336,14 @@ Use Ctrl+S to save entries and Ctrl+Q to quit."""
|
|||||||
return
|
return
|
||||||
entry_data["note"] = validated_note
|
entry_data["note"] = validated_note
|
||||||
|
|
||||||
# Check entry completeness: require date and at least one of
|
# Check entry completeness using explicit keys
|
||||||
# (any pathology score > 0) or (any medicine taken == 1)
|
is_complete, missing_fields = (
|
||||||
missing_fields: list[str] = []
|
InputValidator.validate_entry_completeness_with_keys(
|
||||||
if not entry_data.get("date"):
|
entry_data,
|
||||||
missing_fields.append("Date")
|
self.pathology_manager.get_pathology_keys(),
|
||||||
|
self.medicine_manager.get_medicine_keys(),
|
||||||
has_pathology = any(
|
)
|
||||||
entry_data.get(k, 0) > 0
|
|
||||||
for k in self.pathology_manager.get_pathology_keys()
|
|
||||||
)
|
)
|
||||||
has_medicine = any(
|
|
||||||
entry_data.get(k, 0) == 1 for k in self.medicine_manager.get_medicine_keys()
|
|
||||||
)
|
|
||||||
if not (has_pathology or has_medicine):
|
|
||||||
missing_fields.append("At least one pathology score or medicine entry")
|
|
||||||
|
|
||||||
if missing_fields:
|
if missing_fields:
|
||||||
missing_msg = "Missing required data:\n" + "\n".join(
|
missing_msg = "Missing required data:\n" + "\n".join(
|
||||||
|
|||||||
@@ -25,6 +25,8 @@ _DEFAULTS: dict[str, Any] = {
|
|||||||
# Table column UX
|
# Table column UX
|
||||||
"column_widths": {},
|
"column_widths": {},
|
||||||
"last_sort": {"column": None, "ascending": True},
|
"last_sort": {"column": None, "ascending": True},
|
||||||
|
# Data: archiving/rotation
|
||||||
|
"archive_keep_years": 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
_PREFERENCES: dict[str, Any] = dict(_DEFAULTS)
|
_PREFERENCES: dict[str, Any] = dict(_DEFAULTS)
|
||||||
|
|||||||
Reference in New Issue
Block a user