#!/usr/bin/env python3 from __future__ import annotations from pathlib import Path from PIL import Image, ImageDraw, ImageFilter ROOT = Path(__file__).resolve().parent SAMPLES = ROOT / "samples" def make_doc(path: Path, lines: list[str], size=(900, 1200), rotate: int = 0, blur: bool = False) -> None: img = Image.new("RGB", size, "white") draw = ImageDraw.Draw(img) y = 70 for line in lines: draw.text((70, y), line, fill="black") y += 55 draw.rectangle((55, 50, size[0] - 55, min(size[1] - 50, y + 30)), outline="gray", width=3) if blur: img = img.filter(ImageFilter.GaussianBlur(2.5)) if rotate: img = img.rotate(rotate, expand=True, fillcolor="white") img.save(path) path.with_suffix(path.suffix + ".txt").write_text("\n".join(lines) + "\n") def main() -> int: SAMPLES.mkdir(exist_ok=True) make_doc(SAMPLES / "synthetic_invoice.png", [ "ACME Utilities Invoice", "Invoice No: INV-2026-0604", "Amount Due: $123.45", "Payment due 2026-06-30", "Please submit payment by the due date.", ]) make_doc(SAMPLES / "synthetic_receipt.png", [ "Neighborhood Store Receipt", "Subtotal $14.20", "Tax $1.42", "Total $15.62", "Thank you for shopping", ], size=(720, 1100), rotate=3) make_doc(SAMPLES / "synthetic_conversation.png", [ "Messages with Alex", "Can you please respond by tomorrow?", "Need signature on the form before Friday.", ], size=(1200, 750)) make_doc(SAMPLES / "synthetic_sensitive_form.png", [ "Sample Government Form - Fake Data", "Applicant: Test Person", "SSN: 123-45-6789", "Signature required", "Submit by Jan 15, 2027", ], blur=False) make_doc(SAMPLES / "synthetic_blurry.png", [ "Low resolution blurred sample", "No action required", ], size=(360, 250), blur=True) # PIL can save a simple local PDF from a synthetic page. This is non-private. pdf_img = Image.open(SAMPLES / "synthetic_invoice.png").convert("RGB") pdf_img.save(SAMPLES / "synthetic_invoice.pdf", "PDF") (SAMPLES / "synthetic_invoice.pdf.txt").write_text((SAMPLES / "synthetic_invoice.png.txt").read_text()) print(f"wrote samples under {SAMPLES}") return 0 if __name__ == "__main__": raise SystemExit(main())