"""
Security module for MiniMax Client

Provides comprehensive input validation, sanitization, and security utilities
for safe file operations and user input handling.
"""

import os
import re
import secrets
import logging
import hashlib
from pathlib import Path
from typing import Optional, List, Dict, Any, Union
from urllib.parse import urlparse


logger = logging.getLogger(__name__)


class SecurityError(Exception):
    """Base exception for security-related errors."""
    pass


class PathTraversalError(SecurityError):
    """Raised when a directory traversal attack is detected."""
    pass


class InvalidInputError(SecurityError):
    """Raised when user input fails validation."""
    pass


class SecurityValidator:
    """Main security validation and sanitization class."""
    
    def __init__(self):
        self.allowed_extensions = {
            'code': ['.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c', '.h', '.hpp', 
                    '.cs', '.go', '.rs', '.rb', '.php', '.swift', '.kt', '.scala', '.r', '.m'],
            'data': ['.json', '.yaml', '.yml', '.xml', '.csv', '.txt', '.md', '.rst'],
            'config': ['.ini', '.conf', '.config', '.env', '.toml', '.lock'],
            'docs': ['.md', '.rst', '.txt', '.pdf', '.docx'],
        }
        
        self.dangerous_patterns = [
            r'\.\./+',  # Directory traversal
            r'\/\.\./',  # Directory traversal
            r'~/',      # Home directory
            r'\$\{.*\}',  # Variable expansion
            r'`.*`',    # Command substitution
            r'\$\(.*\)',  # Command substitution
            r'<%.*%>',  # Template injection
            r'{{.*}}',  # Template injection
        ]
        
        self.max_path_length = 4096
        self.max_input_length = 100000  # 100KB
        self.max_file_size = 10 * 1024 * 1024  # 10MB
        
    def sanitize_file_path(self, file_path: Union[str, Path], 
                          base_dir: Optional[Union[str, Path]] = None,
                          allow_absolute: bool = False) -> Path:
        """
        Sanitize and validate a file path to prevent directory traversal attacks.
        
        Args:
            file_path: The file path to sanitize
            base_dir: Base directory to restrict access to
            allow_absolute: Whether to allow absolute paths
            
        Returns:
            Sanitized Path object
            
        Raises:
            PathTraversalError: If path contains traversal attempts
            InvalidInputError: If path is invalid or too long
        """
        if not file_path:
            raise InvalidInputError("File path cannot be empty")
            
        # Convert to string and normalize
        path_str = str(file_path).strip()
        
        # Check length
        if len(path_str) > self.max_path_length:
            raise InvalidInputError(f"Path too long: {len(path_str)} > {self.max_path_length}")
            
        # Check for dangerous patterns
        for pattern in self.dangerous_patterns:
            if re.search(pattern, path_str, re.IGNORECASE):
                logger.warning(f"Dangerous pattern detected in path: {path_str}")
                raise PathTraversalError(f"Path contains dangerous pattern: {pattern}")
        
        # Convert to Path object and resolve
        try:
            path = Path(path_str)
        except (ValueError, OSError) as e:
            raise InvalidInputError(f"Invalid path format: {e}")
            
        # Check for absolute paths if not allowed
        if not allow_absolute and path.is_absolute():
            raise PathTraversalError("Absolute paths are not allowed")
            
        # Resolve the path to handle '..' and '.' components
        try:
            resolved_path = path.resolve()
        except (OSError, RuntimeError) as e:
            raise InvalidInputError(f"Cannot resolve path: {e}")
            
        # If base_dir is specified, ensure path is within it
        if base_dir:
            base_path = Path(base_dir).resolve()
            try:
                resolved_path.relative_to(base_path)
            except ValueError:
                logger.warning(f"Path traversal attempt: {resolved_path} outside {base_path}")
                raise PathTraversalError(f"Path is outside allowed directory: {base_path}")
                
        # Additional security checks
        self._check_path_security(resolved_path)
        
        return resolved_path
        
    def _check_path_security(self, path: Path) -> None:
        """Perform additional security checks on a path."""
        path_str = str(path)
        
        # Check for null bytes
        if '\x00' in path_str:
            raise PathTraversalError("Path contains null bytes")
            
        # Check for control characters
        if any(ord(c) < 32 for c in path_str if c not in '\t\n\r'):
            raise PathTraversalError("Path contains control characters")
            
        # Check for extremely long filename components
        for part in path.parts:
            if len(part) > 255:  # Max filename length on most filesystems
                raise InvalidInputError(f"Filename component too long: {part}")
                
    def validate_file_extension(self, file_path: Union[str, Path], 
                               allowed_categories: Optional[List[str]] = None) -> bool:
        """
        Validate that a file has an allowed extension.
        
        Args:
            file_path: Path to validate
            allowed_categories: List of allowed extension categories
            
        Returns:
            True if extension is allowed
            
        Raises:
            InvalidInputError: If extension is not allowed
        """
        path = Path(file_path)
        extension = path.suffix.lower()
        
        if not extension:
            raise InvalidInputError("File has no extension")
            
        if allowed_categories is None:
            allowed_categories = ['code', 'data', 'config', 'docs']
            
        allowed_extensions = set()
        for category in allowed_categories:
            if category in self.allowed_extensions:
                allowed_extensions.update(self.allowed_extensions[category])
                
        if extension not in allowed_extensions:
            raise InvalidInputError(f"File extension '{extension}' is not allowed")
            
        return True
        
    def validate_user_input(self, user_input: str, 
                           max_length: Optional[int] = None,
                           allow_code: bool = True) -> str:
        """
        Validate and sanitize user input.
        
        Args:
            user_input: Input string to validate
            max_length: Maximum allowed length
            allow_code: Whether to allow code-like patterns
            
        Returns:
            Sanitized input string
            
        Raises:
            InvalidInputError: If input is invalid
        """
        if not isinstance(user_input, str):
            raise InvalidInputError("Input must be a string")
            
        if max_length is None:
            max_length = self.max_input_length
            
        # Check length
        if len(user_input) > max_length:
            raise InvalidInputError(f"Input too long: {len(user_input)} > {max_length}")
            
        # Check for null bytes
        if '\x00' in user_input:
            raise InvalidInputError("Input contains null bytes")
            
        # Check for excessive control characters
        control_chars = sum(1 for c in user_input if ord(c) < 32 and c not in '\t\n\r')
        if control_chars > len(user_input) * 0.1:  # Allow up to 10% control characters
            raise InvalidInputError("Input contains too many control characters")
            
        # If code is not allowed, check for potentially dangerous patterns
        if not allow_code:
            dangerous_code_patterns = [
                r'<script[^>]*>',  # Script tags
                r'javascript:',   # JavaScript URLs
                r'data:.*base64',  # Data URLs with base64
                r'vbscript:',     # VBScript URLs
            ]
            
            for pattern in dangerous_code_patterns:
                if re.search(pattern, user_input, re.IGNORECASE):
                    logger.warning(f"Dangerous code pattern detected: {pattern}")
                    raise InvalidInputError(f"Input contains potentially dangerous code")
                    
        return user_input.strip()
        
    def check_directory_traversal(self, path: str) -> bool:
        """
        Check if a path contains directory traversal attempts.
        
        Args:
            path: Path string to check
            
        Returns:
            True if traversal is detected
        """
        try:
            self.sanitize_file_path(path)
            return False
        except (PathTraversalError, InvalidInputError):
            return True
            
    def secure_backup_filename(self, original_path: Union[str, Path], 
                              suffix: str = 'backup') -> str:
        """
        Generate a secure backup filename.
        
        Args:
            original_path: Original file path
            suffix: Suffix to add to backup filename
            
        Returns:
            Secure backup filename
        """
        path = Path(original_path)
        timestamp = secrets.token_hex(8)  # More secure than timestamp
        
        # Sanitize the original filename
        safe_stem = re.sub(r'[^\w\-_.]', '_', path.stem)
        safe_suffix = re.sub(r'[^\w\-_.]', '_', path.suffix)
        
        backup_name = f"{safe_stem}_{suffix}_{timestamp}{safe_suffix}"
        
        # Ensure the backup name is not too long
        if len(backup_name) > 200:
            # Truncate the stem but keep the suffix and timestamp
            max_stem_length = 200 - len(f"_{suffix}_{timestamp}{safe_suffix}")
            safe_stem = safe_stem[:max_stem_length]
            backup_name = f"{safe_stem}_{suffix}_{timestamp}{safe_suffix}"
            
        return backup_name
        
    def validate_file_size(self, file_path: Union[str, Path]) -> bool:
        """
        Validate that a file is not too large.
        
        Args:
            file_path: Path to file to check
            
        Returns:
            True if file size is acceptable
            
        Raises:
            InvalidInputError: If file is too large
        """
        try:
            size = Path(file_path).stat().st_size
            if size > self.max_file_size:
                raise InvalidInputError(f"File too large: {size} > {self.max_file_size}")
            return True
        except OSError as e:
            raise InvalidInputError(f"Cannot check file size: {e}")
            
    def sanitize_url(self, url: str) -> str:
        """
        Sanitize and validate a URL.
        
        Args:
            url: URL to sanitize
            
        Returns:
            Sanitized URL
            
        Raises:
            InvalidInputError: If URL is invalid or dangerous
        """
        if not url or not isinstance(url, str):
            raise InvalidInputError("URL must be a non-empty string")
            
        url = url.strip()
        
        # Parse URL
        try:
            parsed = urlparse(url)
        except Exception as e:
            raise InvalidInputError(f"Invalid URL format: {e}")
            
        # Check scheme
        allowed_schemes = ['http', 'https']
        if parsed.scheme.lower() not in allowed_schemes:
            raise InvalidInputError(f"URL scheme '{parsed.scheme}' not allowed")
            
        # Check for dangerous patterns
        if any(pattern in url.lower() for pattern in ['javascript:', 'data:', 'vbscript:']):
            raise InvalidInputError("URL contains dangerous scheme")
            
        return url
        
    def log_security_event(self, event_type: str, details: Dict[str, Any]) -> None:
        """
        Log a security event for monitoring.
        
        Args:
            event_type: Type of security event
            details: Additional details about the event
        """
        logger.warning(f"Security event: {event_type}", extra={
            'event_type': event_type,
            'details': details,
            'timestamp': logger.getEffectiveLevel()  # Will be replaced by actual timestamp
        })


# Global security validator instance
security_validator = SecurityValidator()

# Convenience functions
def sanitize_file_path(file_path: Union[str, Path], 
                      base_dir: Optional[Union[str, Path]] = None,
                      allow_absolute: bool = False) -> Path:
    """Convenience function for path sanitization."""
    return security_validator.sanitize_file_path(file_path, base_dir, allow_absolute)

def validate_user_input(user_input: str, 
                       max_length: Optional[int] = None,
                       allow_code: bool = True) -> str:
    """Convenience function for input validation."""
    return security_validator.validate_user_input(user_input, max_length, allow_code)

def check_directory_traversal(path: str) -> bool:
    """Convenience function for traversal checking."""
    return security_validator.check_directory_traversal(path)

def secure_backup_filename(original_path: Union[str, Path], 
                          suffix: str = 'backup') -> str:
    """Convenience function for secure backup filename generation."""
    return security_validator.secure_backup_filename(original_path, suffix)