unitforge/scripts/validate_config.py

#!/usr/bin/env python3
"""
Environment Configuration Validation Script for UnitForge

This script validates the environment configuration and checks for common
configuration issues. It can be used during development, deployment, and
troubleshooting to ensure all settings are properly configured.

Usage:
    python scripts/validate_config.py
    python scripts/validate_config.py --env-file .env.production
    python scripts/validate_config.py --check-all
"""

import argparse
import json
import os
import re
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple


class ConfigValidator:
    """Validates UnitForge environment configuration."""

    def __init__(self, env_file: Optional[str] = None):
        """Initialize validator with optional environment file."""
        self.env_file = env_file
        self.errors: List[str] = []
        self.warnings: List[str] = []
        self.info: List[str] = []
        self.config: Dict[str, Any] = {}

        # Store original environment variables before loading file
        self.original_env = dict(os.environ)

        # Load environment file if specified
        if env_file and Path(env_file).exists():
            self._load_env_file(env_file)

    def _load_env_file(self, env_file: str) -> None:
        """Load environment variables from file."""
        try:
            with open(env_file, "r", encoding="utf-8") as f:
                for line_num, line in enumerate(f, 1):
                    line = line.strip()
                    if line and not line.startswith("#") and "=" in line:
                        key, value = line.split("=", 1)
                        key = key.strip()
                        value = value.strip().strip("\"'")

                        # Remove inline comments
                        if "#" in value:
                            value = value.split("#")[0].strip()

                        # Don't override existing environment variables
                        if key not in self.original_env:
                            os.environ[key] = value
                        self.config[key] = value
            self.info.append(f"Loaded environment file: {env_file}")
        except Exception as e:
            self.errors.append(f"Failed to load environment file {env_file}: {e}")

    def _get_env_value(self, key: str, default: str = "") -> str:
        """Get environment variable value."""
        return os.getenv(key, default)

    def _validate_boolean(self, key: str, value: str) -> bool:
        """Validate boolean environment variable."""
        if not value:
            return True  # Empty is valid (uses default)

        valid_true = ("true", "1", "yes", "on")
        valid_false = ("false", "0", "no", "off")

        if value.lower() not in valid_true + valid_false:
            valid_options = valid_true + valid_false
            self.errors.append(
                f"{key}: Invalid boolean value '{value}'. " f"Use: {valid_options}"
            )
            return False
        return True

    def _validate_integer(
        self, key: str, value: str, min_val: int = 0, max_val: Optional[int] = None
    ) -> bool:
        """Validate integer environment variable."""
        if not value:
            return True  # Empty is valid (uses default)

        try:
            int_val = int(value)
            if int_val < min_val:
                self.errors.append(f"{key}: Value {int_val} is below minimum {min_val}")
                return False
            if max_val is not None and int_val > max_val:
                self.errors.append(f"{key}: Value {int_val} is above maximum {max_val}")
                return False
            return True
        except ValueError:
            self.errors.append(f"{key}: Invalid integer value '{value}'")
            return False

    def _validate_url(self, key: str, value: str) -> bool:
        """Validate URL format."""
        if not value:
            return True  # Empty is valid for optional URLs

        url_pattern = re.compile(
            r"^https?://"  # http:// or https://
            r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|"
            r"localhost|"  # localhost
            r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})"  # IP
            r"(?::\d+)?"  # optional port
            r"(?:/?|[/?]\S+)$",
            re.IGNORECASE,
        )

        if not url_pattern.match(value):
            self.errors.append(f"{key}: Invalid URL format '{value}'")
            return False
        return True

    def _validate_email(self, key: str, value: str) -> bool:
        """Validate email format."""
        if not value:
            return True  # Empty is valid for optional emails

        email_pattern = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
        if not email_pattern.match(value):
            self.errors.append(f"{key}: Invalid email format '{value}'")
            return False
        return True

    def _validate_json_array(self, key: str, value: str) -> bool:
        """Validate JSON array format."""
        if not value or value == "*":
            return True  # Empty or wildcard is valid

        try:
            parsed = json.loads(value)
            if not isinstance(parsed, list):
                self.errors.append(
                    f"{key}: Must be a JSON array, got {type(parsed).__name__}"
                )
                return False
            return True
        except json.JSONDecodeError as e:
            # Try comma-separated format
            if "," in value and not value.startswith("["):
                self.warnings.append(
                    f"{key}: Using comma-separated format. "
                    "Consider JSON array format for clarity"
                )
                return True
            self.errors.append(f"{key}: Invalid JSON array format: {e}")
            return False

    def _validate_log_level(self, key: str, value: str) -> bool:
        """Validate log level."""
        if not value:
            return True

        valid_levels = ("debug", "info", "warning", "error", "critical")
        if value.lower() not in valid_levels:
            self.errors.append(
                f"{key}: Invalid log level '{value}'. Valid levels: {valid_levels}"
            )
            return False
        return True

    def _validate_environment(self, key: str, value: str) -> bool:
        """Validate environment name."""
        if not value:
            return True

        valid_envs = ("development", "staging", "production", "test")
        if value.lower() not in valid_envs:
            self.warnings.append(
                f"{key}: Unusual environment '{value}'. Common values: {valid_envs}"
            )
        return True

    def _validate_file_extensions(self, key: str, value: str) -> bool:
        """Validate file extensions format."""
        if not value:
            return True

        try:
            if value.startswith("["):
                # JSON array format
                extensions = json.loads(value)
                if not isinstance(extensions, list):
                    self.errors.append(f"{key}: Must be a list of extensions")
                    return False
            else:
                # Comma-separated format
                extensions = [ext.strip() for ext in value.split(",")]

            for ext in extensions:
                if not ext.startswith("."):
                    self.errors.append(
                        f"{key}: Extension '{ext}' should start with '.'"
                    )
                    return False
            return True
        except Exception as e:
            self.errors.append(f"{key}: Invalid extensions format: {e}")
            return False

    def validate_basic_config(self) -> None:
        """Validate basic application configuration."""
        print("🔍 Validating basic configuration...")

        # Application Information
        app_name = self._get_env_value("APP_NAME")
        if not app_name:
            self.warnings.append("APP_NAME: Not set, using default")
        elif len(app_name) > 100:
            self.warnings.append("APP_NAME: Very long name (>100 chars)")

        app_version = self._get_env_value("APP_VERSION")
        if app_version and not re.match(r"^\d+\.\d+\.\d+", app_version):
            self.warnings.append(f"APP_VERSION: Unusual version format '{app_version}'")

        # URLs
        self._validate_url("GITHUB_URL", self._get_env_value("GITHUB_URL"))
        self._validate_url(
            "DOCUMENTATION_URL", self._get_env_value("DOCUMENTATION_URL")
        )
        self._validate_url("BUG_REPORTS_URL", self._get_env_value("BUG_REPORTS_URL"))

        # Email
        self._validate_email("CONTACT_EMAIL", self._get_env_value("CONTACT_EMAIL"))

    def validate_server_config(self) -> None:
        """Validate server configuration."""
        print("🔍 Validating server configuration...")

        # Boolean values
        self._validate_boolean("DEBUG", self._get_env_value("DEBUG"))
        self._validate_boolean("RELOAD", self._get_env_value("RELOAD"))

        # Environment
        self._validate_environment("ENVIRONMENT", self._get_env_value("ENVIRONMENT"))
        self._validate_log_level("LOG_LEVEL", self._get_env_value("LOG_LEVEL"))

        # Integer values
        self._validate_integer("PORT", self._get_env_value("PORT"), 1, 65535)
        self._validate_integer("WORKERS", self._get_env_value("WORKERS"), 1, 64)

        # Host validation
        host = self._get_env_value("HOST")
        valid_hosts = ("0.0.0.0", "127.0.0.1", "localhost")  # nosec B104
        if host and host not in valid_hosts:
            # Basic IP validation
            if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", host):
                self.warnings.append(f"HOST: Unusual host value '{host}'")

    def validate_security_config(self) -> None:
        """Validate security configuration."""
        print("🔍 Validating security configuration...")

        # Secret key
        secret_key = self._get_env_value("SECRET_KEY")
        env = self._get_env_value("ENVIRONMENT", "production").lower()

        if secret_key:
            if len(secret_key) < 32:
                self.warnings.append(
                    "SECRET_KEY: Too short, use at least 32 characters"
                )
            example_keys = (
                "your-secret-key-here",
                "dev-secret-key-change-in-production",
            )
            if secret_key in example_keys:
                if env == "production":
                    self.errors.append(
                        "SECRET_KEY: Using default/example secret key " "in production!"
                    )
                else:
                    self.warnings.append(
                        "SECRET_KEY: Using example secret key " "(OK for development)"
                    )
        elif env == "production":
            self.warnings.append("SECRET_KEY: Not set in production environment")

        # Boolean security settings
        self._validate_boolean(
            "SECURITY_HEADERS", self._get_env_value("SECURITY_HEADERS")
        )
        self._validate_boolean("CSP_ENABLED", self._get_env_value("CSP_ENABLED"))

        # HSTS settings
        self._validate_integer("HSTS_MAX_AGE", self._get_env_value("HSTS_MAX_AGE"), 0)

        # CORS
        cors_origins = self._get_env_value("CORS_ORIGINS")
        if cors_origins != "*":
            self._validate_json_array("CORS_ORIGINS", cors_origins)

        # Allowed hosts
        self._validate_json_array("ALLOWED_HOSTS", self._get_env_value("ALLOWED_HOSTS"))

    def validate_feature_flags(self) -> None:
        """Validate feature flags."""
        print("🔍 Validating feature flags...")

        feature_flags = [
            "ENABLE_API_METRICS",
            "ENABLE_REQUEST_LOGGING",
            "ENABLE_TEMPLATE_CACHING",
            "ENABLE_VALIDATION_CACHING",
            "HEALTH_CHECK_ENABLED",
            "METRICS_ENABLED",
            "TRACING_ENABLED",
            "API_DOCS_ENABLED",
            "SWAGGER_UI_ENABLED",
            "REDOC_ENABLED",
            "HOT_RELOAD",
            "SOURCE_MAPS",
            "MINIFY_ASSETS",
            "COMPRESS_RESPONSES",
        ]

        for flag in feature_flags:
            self._validate_boolean(flag, self._get_env_value(flag))

    def validate_performance_config(self) -> None:
        """Validate performance configuration."""
        print("🔍 Validating performance configuration...")

        # Timeout settings
        self._validate_integer(
            "REQUEST_TIMEOUT", self._get_env_value("REQUEST_TIMEOUT"), 1, 3600
        )
        self._validate_integer(
            "KEEPALIVE_TIMEOUT", self._get_env_value("KEEPALIVE_TIMEOUT"), 1, 300
        )
        self._validate_integer(
            "MAX_CONNECTIONS", self._get_env_value("MAX_CONNECTIONS"), 1, 10000
        )

        # Cache settings
        self._validate_integer(
            "TEMPLATE_CACHE_TTL", self._get_env_value("TEMPLATE_CACHE_TTL"), 0
        )
        self._validate_integer(
            "VALIDATION_CACHE_TTL", self._get_env_value("VALIDATION_CACHE_TTL"), 0
        )

        # File upload
        self._validate_integer(
            "MAX_UPLOAD_SIZE", self._get_env_value("MAX_UPLOAD_SIZE"), 1024
        )
        self._validate_file_extensions(
            "ALLOWED_EXTENSIONS", self._get_env_value("ALLOWED_EXTENSIONS")
        )

    def validate_production_readiness(self) -> None:
        """Validate production readiness."""
        print("🔍 Validating production readiness...")

        env = self._get_env_value("ENVIRONMENT", "production").lower()
        debug = self._get_env_value("DEBUG", "false").lower() in (
            "true",
            "1",
            "yes",
            "on",
        )

        if env == "production":
            if debug:
                self.errors.append("Production environment should not have DEBUG=true")

            if not self._get_env_value("SECRET_KEY"):
                self.errors.append("Production environment must have SECRET_KEY set")

            cors_origins = self._get_env_value("CORS_ORIGINS", "*")
            if cors_origins == "*":
                self.warnings.append(
                    "Production environment using wildcard CORS origins"
                )

            security_headers = self._get_env_value("SECURITY_HEADERS", "true").lower()
            if security_headers not in ("true", "1", "yes", "on"):
                self.warnings.append(
                    "Production environment should enable SECURITY_HEADERS"
                )

            workers = int(self._get_env_value("WORKERS", "4"))
            if workers < 2:
                self.warnings.append(
                    "Production environment should use multiple workers"
                )

    def check_environment_consistency(self) -> None:
        """Check for environment consistency issues."""
        print("🔍 Checking environment consistency...")

        # Debug mode consistency
        debug = self._get_env_value("DEBUG", "false").lower() in (
            "true",
            "1",
            "yes",
            "on",
        )
        env = self._get_env_value("ENVIRONMENT", "production").lower()
        log_level = self._get_env_value("LOG_LEVEL", "info").lower()

        if debug and env == "production":
            self.warnings.append("DEBUG mode enabled in production environment")

        if debug and log_level not in ("debug", "info"):
            self.warnings.append("DEBUG mode with non-debug log level")

        # Performance consistency
        hot_reload = self._get_env_value("HOT_RELOAD", "false").lower() in (
            "true",
            "1",
            "yes",
            "on",
        )
        minify = self._get_env_value("MINIFY_ASSETS", "true").lower() in (
            "true",
            "1",
            "yes",
            "on",
        )

        if hot_reload and minify:
            self.warnings.append(
                "HOT_RELOAD and MINIFY_ASSETS both enabled " "(unusual for development)"
            )

    def validate_all(self) -> Tuple[int, int, int]:
        """Run all validations and return counts."""
        print("🚀 Starting UnitForge configuration validation...\n")

        self.validate_basic_config()
        self.validate_server_config()
        self.validate_security_config()
        self.validate_feature_flags()
        self.validate_performance_config()
        self.validate_production_readiness()
        self.check_environment_consistency()

        return len(self.errors), len(self.warnings), len(self.info)

    def print_results(self) -> None:
        """Print validation results."""
        print("\n" + "=" * 60)
        print("🔍 CONFIGURATION VALIDATION RESULTS")
        print("=" * 60)

        if self.info:
            print(f"\n📋 Information ({len(self.info)}):")
            for msg in self.info:
                print(f"   ℹ️  {msg}")

        if self.warnings:
            print(f"\n⚠️  Warnings ({len(self.warnings)}):")
            for msg in self.warnings:
                print(f"   ⚠️  {msg}")

        if self.errors:
            print(f"\n❌ Errors ({len(self.errors)}):")
            for msg in self.errors:
                print(f"   ❌ {msg}")

        print("\n📊 Summary:")
        print(f"   • Errors: {len(self.errors)}")
        print(f"   • Warnings: {len(self.warnings)}")
        print(f"   • Info: {len(self.info)}")

        if len(self.errors) == 0 and len(self.warnings) == 0:
            print("\n✅ Configuration validation passed!")
        elif len(self.errors) == 0:
            print("\n⚠️ Configuration validation passed with warnings.")
        else:
            print("\n❌ Configuration validation failed.")

        print("=" * 60)


def main() -> None:
    """Main function."""
    parser = argparse.ArgumentParser(
        description="Validate UnitForge environment configuration",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        "--env-file", type=str, help="Path to environment file to load (default: .env)"
    )
    parser.add_argument(
        "--check-all",
        action="store_true",
        help="Run all validation checks including production readiness",
    )
    parser.add_argument(
        "--quiet", action="store_true", help="Suppress informational output"
    )

    args = parser.parse_args()

    # Default to .env if it exists
    env_file = args.env_file
    if not env_file and Path(".env").exists():
        env_file = ".env"

    validator = ConfigValidator(env_file)

    if not args.quiet:
        if env_file:
            print(f"📄 Using environment file: {env_file}")
        else:
            print("📄 Using system environment variables only")
        print()

    # Run validation
    errors, warnings, info = validator.validate_all()

    # Print results
    if not args.quiet:
        validator.print_results()

    # Exit with appropriate code
    if errors > 0:
        sys.exit(1)
    else:
        sys.exit(0)


if __name__ == "__main__":
    main()