"""
Python wrapper for Yelp's detect-secrets package, compiled to WebAssembly.
This module provides a bridge between JavaScript and the detect-secrets Python library.
"""
import json
import os
import sys
import tempfile
from typing import Dict, List, Any, Optional

class YelpDetectSecretsScanner:
    """
    A scanner that uses Yelp's detect-secrets library to find secrets in code.
    """
    
    def __init__(self):
        """Initialize the scanner."""
        self.results = {}
    
    async def install_dependencies(self):
        """Install the detect-secrets package and its dependencies."""
        import micropip
        
        # Install detect-secrets and its dependencies
        await micropip.install('detect-secrets')
        
        # Import after installation to verify it worked
        try:
            import detect_secrets
            return True
        except ImportError as e:
            print(f"Failed to import detect-secrets: {e}")
            return False
        except Exception as e:
            print(f"Error during initialization: {e}")
            return False
    
    def scan_content(self, file_content: str, file_path: str, check_missed: bool = False) -> Dict[str, Any]:
        """
        Scan a file content for secrets using Yelp's detect-secrets.
        
        Args:
            file_content: The content of the file to scan
            file_path: The path of the file (used for reporting)
            check_missed: Whether to check for potentially missed secrets
            
        Returns:
            A dictionary with the scan results
        """
        # Import detect-secrets modules
        from detect_secrets.core.secrets_collection import SecretsCollection
        from detect_secrets.settings import transient_settings
        
        # Create a temporary file to scan
        temp_file = tempfile.NamedTemporaryFile(mode='w+', delete=False)
        temp_file_path = temp_file.name
        
        try:
            # Write content to the temporary file
            temp_file.write(file_content)
            temp_file.close()  # Close the file to ensure content is written
            
            # Initialize the secrets collection with all plugins enabled
            plugins_config = [
                {'name': 'AWSKeyDetector'},
                {'name': 'ArtifactoryDetector'},
                {'name': 'AzureStorageKeyDetector'},
                {'name': 'Base64HighEntropyString', 'limit': 4.5},
                {'name': 'BasicAuthDetector'},
                {'name': 'CloudantDetector'},
                {'name': 'GitHubTokenDetector'},
                {'name': 'HexHighEntropyString', 'limit': 3.0},
                {'name': 'IbmCloudIamDetector'},
                {'name': 'IbmCosHmacDetector'},
                {'name': 'JwtTokenDetector'},
                {'name': 'KeywordDetector', 'keyword_exclude': []},
                {'name': 'MailchimpDetector'},
                {'name': 'NpmDetector'},
                {'name': 'PrivateKeyDetector'},
                {'name': 'SendGridDetector'},
                {'name': 'SlackDetector'},
                {'name': 'SoftlayerDetector'},
                {'name': 'SquareOAuthDetector'},
                {'name': 'StripeDetector'},
                {'name': 'TwilioKeyDetector'}
            ]
            
            with transient_settings({
                'plugins_used': plugins_config,
            }) as settings:
                # Create a new secrets collection with the settings
                secrets = SecretsCollection()
                
                # Scan the temporary file
                secrets.scan_file(temp_file_path)
                
                # Convert results to our format
                formatted_results = self._format_results(secrets, file_path, check_missed)
                
                return formatted_results
        except Exception as e:
            import traceback
            error_msg = traceback.format_exc()
            print(f"Error during scanning: {str(e)}\n{error_msg}")
            raise
        finally:
            # Clean up the temporary file
            if os.path.exists(temp_file_path):
                os.unlink(temp_file_path)
    
    def _format_results(self, secrets_collection, original_file_path: str, check_missed: bool) -> Dict[str, Any]:
        """
        Format the results from detect-secrets into our standard format.
        
        Args:
            secrets_collection: The SecretsCollection object from detect-secrets
            original_file_path: The original file path to use in results
            check_missed: Whether to check for potentially missed secrets
            
        Returns:
            Formatted results dictionary
        """
        formatted_results = {
            'secrets': [],
            'missed_secrets': []
        }
        
        # Get the results from the secrets collection
        results_dict = secrets_collection.json()
        
        # The results are keyed by filename, but we used a temp file
        # So we need to extract the results and remap them to the original file path
        for temp_file_path, file_results in results_dict.items():
            # Group secrets by line number to combine multiple detections on the same line
            secrets_by_line = {}
            
            for secret in file_results:
                line_number = secret.get('line_number', 0)
                secret_type = secret.get('type', 'Unknown Secret')
                hashed_secret = secret.get('hashed_secret', '')
                
                if line_number not in secrets_by_line:
                    secrets_by_line[line_number] = {
                        'file': original_file_path,
                        'line': line_number,
                        'types': [secret_type],
                        'is_false_positive': self._is_likely_false_positive(original_file_path, secret),
                        'hashed_secret': hashed_secret
                    }
                else:
                    # Add the type if it's not already in the list
                    if secret_type not in secrets_by_line[line_number]['types']:
                        secrets_by_line[line_number]['types'].append(secret_type)
            
            # Add all grouped secrets to the results
            formatted_results['secrets'].extend(secrets_by_line.values())
        
        # If requested, check for potentially missed secrets
        if check_missed:
            missed_secrets = self._check_for_missed_secrets(original_file_path)
            formatted_results['missed_secrets'] = missed_secrets
        
        return formatted_results
    
    def _is_likely_false_positive(self, file_path: str, secret: Dict[str, Any]) -> bool:
        """
        Check if a detected secret is likely a false positive.
        
        Args:
            file_path: Path to the file containing the secret
            secret: Secret information dictionary
        
        Returns:
            Boolean indicating if it's likely a false positive
        """
        # Check file extension for common test or example files
        if any(ext in file_path.lower() for ext in ['.test.', '.example.', '.sample.', 'test_']):
            return True
        
        # Get the type of secret
        type_of_secret = secret.get('type', '')
        
        # Check for example keys in AWS credentials
        if 'AWS' in type_of_secret and 'EXAMPLE' in str(secret):
            return True
        
        # Check for high entropy strings in documentation files
        if any(ext in file_path.lower() for ext in ['.md', '.html', '.txt', '.rst']):
            return True
        
        # Don't mark everything as a false positive by default
        return False
    
    def _check_for_missed_secrets(self, file_path: str) -> List[Dict[str, Any]]:
        """
        Placeholder for additional checks for missed secrets.
        This could be extended with custom logic for specific types of secrets.
        
        Args:
            file_path: The path of the file to check
            
        Returns:
            List of potentially missed secrets
        """
        # This is a placeholder - in a real implementation, you might add
        # additional checks for specific types of secrets that detect-secrets
        # might miss
        return []

# Create a global instance
scanner = YelpDetectSecretsScanner()

# Function to be called from JavaScript
async def initialize():
    """Initialize the scanner and install dependencies."""
    return await scanner.install_dependencies()

def scan_file(file_content, file_path, check_missed=False):
    """
    Scan a file for secrets using Yelp's detect-secrets.
    
    Args:
        file_content: The content of the file to scan
        file_path: The path of the file (used for reporting)
        check_missed: Whether to check for potentially missed secrets
        
    Returns:
        A JSON string with the scan results
    """
    try:
        # Scan the content
        scan_results = scanner.scan_content(file_content, file_path, check_missed)
        
        # Return the results as a JSON string
        return json.dumps(scan_results)
    except Exception as e:
        import traceback
        error_msg = traceback.format_exc()
        print(f"Python error: {str(e)}\n{error_msg}")
        
        # Return an error result as JSON
        error_result = {
            "error": str(e),
            "traceback": error_msg,
            "secrets": [],
            "missed_secrets": []
        }
        return json.dumps(error_result) 