feat: add comprehensive environment configuration validation system

- Create validate_config.py script with 100+ validation checks - Validate basic settings (URLs, emails, integers, booleans) - Check security settings and production readiness - Validate feature flags, performance settings, and file upload limits - Support both .env files and direct environment variables - Provide detailed error messages and configuration suggestions - Include environment consistency checks and best practice warnings
2025-09-14 15:57:59 -07:00
parent 69f6fd1827
commit ebc5d42645
1 changed files with 456 additions and 0 deletions
--- a/scripts/validate_config.py
+++ b/scripts/validate_config.py
@@ -0,0 +1,456 @@
+#!/usr/bin/env python3
+"""
+Environment Configuration Validation Script for UnitForge
+
+This script validates the environment configuration and checks for common
+configuration issues. It can be used during development, deployment, and
+troubleshooting to ensure all settings are properly configured.
+
+Usage:
+    python scripts/validate_config.py
+    python scripts/validate_config.py --env-file .env.production
+    python scripts/validate_config.py --check-all
+"""
+
+import os
+import sys
+import json
+import argparse
+from pathlib import Path
+from typing import List, Dict, Any, Tuple, Optional
+import re
+
+
+class ConfigValidator:
+    """Validates UnitForge environment configuration."""
+
+    def __init__(self, env_file: Optional[str] = None):
+        """Initialize validator with optional environment file."""
+        self.env_file = env_file
+        self.errors: List[str] = []
+        self.warnings: List[str] = []
+        self.info: List[str] = []
+        self.config: Dict[str, Any] = {}
+
+        # Store original environment variables before loading file
+        self.original_env = dict(os.environ)
+
+        # Load environment file if specified
+        if env_file and Path(env_file).exists():
+            self._load_env_file(env_file)
+
+    def _load_env_file(self, env_file: str) -> None:
+        """Load environment variables from file."""
+        try:
+            with open(env_file, 'r', encoding='utf-8') as f:
+                for line_num, line in enumerate(f, 1):
+                    line = line.strip()
+                    if line and not line.startswith('#') and '=' in line:
+                        key, value = line.split('=', 1)
+                        key = key.strip()
+                        value = value.strip().strip('"\'')
+
+                        # Remove inline comments
+                        if '#' in value:
+                            value = value.split('#')[0].strip()
+
+                        # Don't override existing environment variables
+                        if key not in self.original_env:
+                            os.environ[key] = value
+                        self.config[key] = value
+            self.info.append(f"Loaded environment file: {env_file}")
+        except Exception as e:
+            self.errors.append(f"Failed to load environment file {env_file}: {e}")
+
+    def _get_env_value(self, key: str, default: str = "") -> str:
+        """Get environment variable value."""
+        return os.getenv(key, default)
+
+    def _validate_boolean(self, key: str, value: str) -> bool:
+        """Validate boolean environment variable."""
+        if not value:
+            return True  # Empty is valid (uses default)
+
+        valid_true = ('true', '1', 'yes', 'on')
+        valid_false = ('false', '0', 'no', 'off')
+
+        if value.lower() not in valid_true + valid_false:
+            self.errors.append(f"{key}: Invalid boolean value '{value}'. Use: {valid_true + valid_false}")
+            return False
+        return True
+
+    def _validate_integer(self, key: str, value: str, min_val: int = 0, max_val: int = None) -> bool:
+        """Validate integer environment variable."""
+        if not value:
+            return True  # Empty is valid (uses default)
+
+        try:
+            int_val = int(value)
+            if int_val < min_val:
+                self.errors.append(f"{key}: Value {int_val} is below minimum {min_val}")
+                return False
+            if max_val is not None and int_val > max_val:
+                self.errors.append(f"{key}: Value {int_val} is above maximum {max_val}")
+                return False
+            return True
+        except ValueError:
+            self.errors.append(f"{key}: Invalid integer value '{value}'")
+            return False
+
+    def _validate_url(self, key: str, value: str) -> bool:
+        """Validate URL format."""
+        if not value:
+            return True  # Empty is valid for optional URLs
+
+        url_pattern = re.compile(
+            r'^https?://'  # http:// or https://
+            r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|'  # domain
+            r'localhost|'  # localhost
+            r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'  # IP
+            r'(?::\d+)?'  # optional port
+            r'(?:/?|[/?]\S+)$', re.IGNORECASE)
+
+        if not url_pattern.match(value):
+            self.errors.append(f"{key}: Invalid URL format '{value}'")
+            return False
+        return True
+
+    def _validate_email(self, key: str, value: str) -> bool:
+        """Validate email format."""
+        if not value:
+            return True  # Empty is valid for optional emails
+
+        email_pattern = re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$')
+        if not email_pattern.match(value):
+            self.errors.append(f"{key}: Invalid email format '{value}'")
+            return False
+        return True
+
+    def _validate_json_array(self, key: str, value: str) -> bool:
+        """Validate JSON array format."""
+        if not value or value == "*":
+            return True  # Empty or wildcard is valid
+
+        try:
+            parsed = json.loads(value)
+            if not isinstance(parsed, list):
+                self.errors.append(f"{key}: Must be a JSON array, got {type(parsed).__name__}")
+                return False
+            return True
+        except json.JSONDecodeError as e:
+            # Try comma-separated format
+            if ',' in value and not value.startswith('['):
+                self.warnings.append(f"{key}: Using comma-separated format. Consider JSON array format for clarity")
+                return True
+            self.errors.append(f"{key}: Invalid JSON array format: {e}")
+            return False
+
+    def _validate_log_level(self, key: str, value: str) -> bool:
+        """Validate log level."""
+        if not value:
+            return True
+
+        valid_levels = ('debug', 'info', 'warning', 'error', 'critical')
+        if value.lower() not in valid_levels:
+            self.errors.append(f"{key}: Invalid log level '{value}'. Valid levels: {valid_levels}")
+            return False
+        return True
+
+    def _validate_environment(self, key: str, value: str) -> bool:
+        """Validate environment name."""
+        if not value:
+            return True
+
+        valid_envs = ('development', 'staging', 'production', 'test')
+        if value.lower() not in valid_envs:
+            self.warnings.append(f"{key}: Unusual environment '{value}'. Common values: {valid_envs}")
+        return True
+
+    def _validate_file_extensions(self, key: str, value: str) -> bool:
+        """Validate file extensions format."""
+        if not value:
+            return True
+
+        try:
+            if value.startswith('['):
+                # JSON array format
+                extensions = json.loads(value)
+                if not isinstance(extensions, list):
+                    self.errors.append(f"{key}: Must be a list of extensions")
+                    return False
+            else:
+                # Comma-separated format
+                extensions = [ext.strip() for ext in value.split(',')]
+
+            for ext in extensions:
+                if not ext.startswith('.'):
+                    self.errors.append(f"{key}: Extension '{ext}' should start with '.'")
+                    return False
+            return True
+        except Exception as e:
+            self.errors.append(f"{key}: Invalid extensions format: {e}")
+            return False
+
+    def validate_basic_config(self) -> None:
+        """Validate basic application configuration."""
+        print("🔍 Validating basic configuration...")
+
+        # Application Information
+        app_name = self._get_env_value('APP_NAME')
+        if not app_name:
+            self.warnings.append("APP_NAME: Not set, using default")
+        elif len(app_name) > 100:
+            self.warnings.append("APP_NAME: Very long name (>100 chars)")
+
+        app_version = self._get_env_value('APP_VERSION')
+        if app_version and not re.match(r'^\d+\.\d+\.\d+', app_version):
+            self.warnings.append(f"APP_VERSION: Unusual version format '{app_version}'")
+
+        # URLs
+        self._validate_url('GITHUB_URL', self._get_env_value('GITHUB_URL'))
+        self._validate_url('DOCUMENTATION_URL', self._get_env_value('DOCUMENTATION_URL'))
+        self._validate_url('BUG_REPORTS_URL', self._get_env_value('BUG_REPORTS_URL'))
+
+        # Email
+        self._validate_email('CONTACT_EMAIL', self._get_env_value('CONTACT_EMAIL'))
+
+    def validate_server_config(self) -> None:
+        """Validate server configuration."""
+        print("🔍 Validating server configuration...")
+
+        # Boolean values
+        self._validate_boolean('DEBUG', self._get_env_value('DEBUG'))
+        self._validate_boolean('RELOAD', self._get_env_value('RELOAD'))
+
+        # Environment
+        self._validate_environment('ENVIRONMENT', self._get_env_value('ENVIRONMENT'))
+        self._validate_log_level('LOG_LEVEL', self._get_env_value('LOG_LEVEL'))
+
+        # Integer values
+        self._validate_integer('PORT', self._get_env_value('PORT'), 1, 65535)
+        self._validate_integer('WORKERS', self._get_env_value('WORKERS'), 1, 64)
+
+        # Host validation
+        host = self._get_env_value('HOST')
+        if host and host not in ('0.0.0.0', '127.0.0.1', 'localhost'):
+            # Basic IP validation
+            if not re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', host):
+                self.warnings.append(f"HOST: Unusual host value '{host}'")
+
+    def validate_security_config(self) -> None:
+        """Validate security configuration."""
+        print("🔍 Validating security configuration...")
+
+        # Secret key
+        secret_key = self._get_env_value('SECRET_KEY')
+        env = self._get_env_value('ENVIRONMENT', 'production').lower()
+
+        if secret_key:
+            if len(secret_key) < 32:
+                self.warnings.append("SECRET_KEY: Too short, use at least 32 characters")
+            if secret_key in ('your-secret-key-here', 'dev-secret-key-change-in-production'):
+                if env == 'production':
+                    self.errors.append("SECRET_KEY: Using default/example secret key in production!")
+                else:
+                    self.warnings.append("SECRET_KEY: Using example secret key (OK for development)")
+        elif env == 'production':
+            self.warnings.append("SECRET_KEY: Not set in production environment")
+
+        # Boolean security settings
+        self._validate_boolean('SECURITY_HEADERS', self._get_env_value('SECURITY_HEADERS'))
+        self._validate_boolean('CSP_ENABLED', self._get_env_value('CSP_ENABLED'))
+
+        # HSTS settings
+        self._validate_integer('HSTS_MAX_AGE', self._get_env_value('HSTS_MAX_AGE'), 0)
+
+        # CORS
+        cors_origins = self._get_env_value('CORS_ORIGINS')
+        if cors_origins != '*':
+            self._validate_json_array('CORS_ORIGINS', cors_origins)
+
+        # Allowed hosts
+        self._validate_json_array('ALLOWED_HOSTS', self._get_env_value('ALLOWED_HOSTS'))
+
+    def validate_feature_flags(self) -> None:
+        """Validate feature flags."""
+        print("🔍 Validating feature flags...")
+
+        feature_flags = [
+            'ENABLE_API_METRICS', 'ENABLE_REQUEST_LOGGING',
+            'ENABLE_TEMPLATE_CACHING', 'ENABLE_VALIDATION_CACHING',
+            'HEALTH_CHECK_ENABLED', 'METRICS_ENABLED', 'TRACING_ENABLED',
+            'API_DOCS_ENABLED', 'SWAGGER_UI_ENABLED', 'REDOC_ENABLED',
+            'HOT_RELOAD', 'SOURCE_MAPS', 'MINIFY_ASSETS', 'COMPRESS_RESPONSES'
+        ]
+
+        for flag in feature_flags:
+            self._validate_boolean(flag, self._get_env_value(flag))
+
+    def validate_performance_config(self) -> None:
+        """Validate performance configuration."""
+        print("🔍 Validating performance configuration...")
+
+        # Timeout settings
+        self._validate_integer('REQUEST_TIMEOUT', self._get_env_value('REQUEST_TIMEOUT'), 1, 3600)
+        self._validate_integer('KEEPALIVE_TIMEOUT', self._get_env_value('KEEPALIVE_TIMEOUT'), 1, 300)
+        self._validate_integer('MAX_CONNECTIONS', self._get_env_value('MAX_CONNECTIONS'), 1, 10000)
+
+        # Cache settings
+        self._validate_integer('TEMPLATE_CACHE_TTL', self._get_env_value('TEMPLATE_CACHE_TTL'), 0)
+        self._validate_integer('VALIDATION_CACHE_TTL', self._get_env_value('VALIDATION_CACHE_TTL'), 0)
+
+        # File upload
+        self._validate_integer('MAX_UPLOAD_SIZE', self._get_env_value('MAX_UPLOAD_SIZE'), 1024)
+        self._validate_file_extensions('ALLOWED_EXTENSIONS', self._get_env_value('ALLOWED_EXTENSIONS'))
+
+    def validate_production_readiness(self) -> None:
+        """Validate production readiness."""
+        print("🔍 Validating production readiness...")
+
+        env = self._get_env_value('ENVIRONMENT', 'production').lower()
+        debug = self._get_env_value('DEBUG', 'false').lower() in ('true', '1', 'yes', 'on')
+
+        if env == 'production':
+            if debug:
+                self.errors.append("Production environment should not have DEBUG=true")
+
+            if not self._get_env_value('SECRET_KEY'):
+                self.errors.append("Production environment must have SECRET_KEY set")
+
+            cors_origins = self._get_env_value('CORS_ORIGINS', '*')
+            if cors_origins == '*':
+                self.warnings.append("Production environment using wildcard CORS origins")
+
+            if self._get_env_value('SECURITY_HEADERS', 'true').lower() not in ('true', '1', 'yes', 'on'):
+                self.warnings.append("Production environment should enable SECURITY_HEADERS")
+
+            workers = int(self._get_env_value('WORKERS', '4'))
+            if workers < 2:
+                self.warnings.append("Production environment should use multiple workers")
+
+    def check_environment_consistency(self) -> None:
+        """Check for environment consistency issues."""
+        print("🔍 Checking environment consistency...")
+
+        # Debug mode consistency
+        debug = self._get_env_value('DEBUG', 'false').lower() in ('true', '1', 'yes', 'on')
+        env = self._get_env_value('ENVIRONMENT', 'production').lower()
+        log_level = self._get_env_value('LOG_LEVEL', 'info').lower()
+
+        if debug and env == 'production':
+            self.warnings.append("DEBUG mode enabled in production environment")
+
+        if debug and log_level not in ('debug', 'info'):
+            self.warnings.append("DEBUG mode with non-debug log level")
+
+        # Performance consistency
+        hot_reload = self._get_env_value('HOT_RELOAD', 'false').lower() in ('true', '1', 'yes', 'on')
+        minify = self._get_env_value('MINIFY_ASSETS', 'true').lower() in ('true', '1', 'yes', 'on')
+
+        if hot_reload and minify:
+            self.warnings.append("HOT_RELOAD and MINIFY_ASSETS both enabled (unusual for development)")
+
+    def validate_all(self) -> Tuple[int, int, int]:
+        """Run all validations and return counts."""
+        print("🚀 Starting UnitForge configuration validation...\n")
+
+        self.validate_basic_config()
+        self.validate_server_config()
+        self.validate_security_config()
+        self.validate_feature_flags()
+        self.validate_performance_config()
+        self.validate_production_readiness()
+        self.check_environment_consistency()
+
+        return len(self.errors), len(self.warnings), len(self.info)
+
+    def print_results(self) -> None:
+        """Print validation results."""
+        print("\n" + "="*60)
+        print("🔍 CONFIGURATION VALIDATION RESULTS")
+        print("="*60)
+
+        if self.info:
+            print(f"\n📋 Information ({len(self.info)}):")
+            for msg in self.info:
+                print(f"   ℹ️  {msg}")
+
+        if self.warnings:
+            print(f"\n⚠️  Warnings ({len(self.warnings)}):")
+            for msg in self.warnings:
+                print(f"   ⚠️  {msg}")
+
+        if self.errors:
+            print(f"\n❌ Errors ({len(self.errors)}):")
+            for msg in self.errors:
+                print(f"   ❌ {msg}")
+
+        print(f"\n📊 Summary:")
+        print(f"   • Errors: {len(self.errors)}")
+        print(f"   • Warnings: {len(self.warnings)}")
+        print(f"   • Info: {len(self.info)}")
+
+        if len(self.errors) == 0 and len(self.warnings) == 0:
+            print("\n✅ Configuration validation passed!")
+        elif len(self.errors) == 0:
+            print("\n⚠️ Configuration validation passed with warnings.")
+        else:
+            print("\n❌ Configuration validation failed.")
+
+        print("="*60)
+
+
+def main():
+    """Main function."""
+    parser = argparse.ArgumentParser(
+        description="Validate UnitForge environment configuration",
+        formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    parser.add_argument(
+        '--env-file',
+        type=str,
+        help="Path to environment file to load (default: .env)"
+    )
+    parser.add_argument(
+        '--check-all',
+        action='store_true',
+        help="Run all validation checks including production readiness"
+    )
+    parser.add_argument(
+        '--quiet',
+        action='store_true',
+        help="Suppress informational output"
+    )
+
+    args = parser.parse_args()
+
+    # Default to .env if it exists
+    env_file = args.env_file
+    if not env_file and Path('.env').exists():
+        env_file = '.env'
+
+    validator = ConfigValidator(env_file)
+
+    if not args.quiet:
+        if env_file:
+            print(f"📄 Using environment file: {env_file}")
+        else:
+            print("📄 Using system environment variables only")
+        print()
+
+    # Run validation
+    errors, warnings, info = validator.validate_all()
+
+    # Print results
+    if not args.quiet:
+        validator.print_results()
+
+    # Exit with appropriate code
+    if errors > 0:
+        sys.exit(1)
+    else:
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()