import random import re from hashes.hashes import LOREM_IPSUM_COMMENTS class Obfuscator: def obfuscate_html(html_content): # Generate random strings def generate_random_string(length=8): chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' return ''.join(random.choice(chars) for _ in range(length)) # Protect original content protected_blocks = [] def protect_scripts(match): protected_blocks.append(match.group(0)) return f'' def protect_styles(match): protected_blocks.append(match.group(0)) return f'' # First pass: protect critical content temp_html = re.sub(r']*>.*?', protect_scripts, html_content, flags=re.DOTALL) temp_html = re.sub(r']*>.*?', protect_styles, temp_html, flags=re.DOTALL) # Protect tables def protect_tables(match): protected_blocks.append(match.group(0)) return f'' temp_html = re.sub(r']*>.*?', protect_tables, temp_html, flags=re.DOTALL) # Clean up HTML - remove extra whitespace lines = [] for line in temp_html.split('\n'): cleaned_line = ' '.join(line.split()) if cleaned_line: lines.append(cleaned_line) # Add comments between lines (but not too many) obfuscated_lines = [] for i, line in enumerate(lines): # Add comment before some lines if line and not line.startswith('') obfuscated_lines.append(line) # Add comment after some lines if line and not line.startswith('') obfuscated = '\n'.join(obfuscated_lines) # Inject random comments between SOME elements (not all) def inject_some_comments(html): # Only inject between certain safe elements safe_patterns = [ (r'()', r'\1'), (r'(

)', r'\1'), (r'()', r'\1'), (r'()', r'\1'), (r'()', r'\1'), (r'()', r'\1'), ] for pattern, replacement in safe_patterns: if random.random() > 0.5: # 50% chance to apply each pattern html = re.sub(pattern, replacement, html) return html obfuscated = inject_some_comments(obfuscated) # Add header comments (fewer to avoid breaking the document) header_comments = [ 'auto-obfuscated-' + generate_random_string(10), 'generated-' + generate_random_string(8), ] header_block = '\n'.join([f'' for comment in header_comments]) obfuscated = header_block + '\n' + obfuscated # Add footer comments footer_comments = [ 'end-' + generate_random_string(10), 'completed-' + generate_random_string(8), ] footer_block = '\n'.join([f'' for comment in footer_comments]) obfuscated = obfuscated + '\n' + footer_block # Minimal invisible characters - only in text content, not in tags invisible_chars = ['\u200B', '\u200C'] def add_minimal_invisible(match): text = match.group(1) # NEVER obfuscate script-like content if any(keyword in text for keyword in ['function', 'const ', 'var ', 'let ', 'document.', 'window.', 'getElement', 'querySelector', 'addEventListener']): return '>' + text + '<' # Only add to plain text content if len(text) > 10: # Only on longer text blocks result = [] for i, char in enumerate(text): result.append(char) # Very rarely add invisible chars if i % 8 == 0 and i > 0 and random.random() > 0.8: result.append(random.choice(invisible_chars)) return '>' + ''.join(result) + '<' return '>' + text + '<' obfuscated = re.sub(r'>([^<]+)<', add_minimal_invisible, obfuscated) # Restore protected content exactly as-is for i, protected_content in enumerate(protected_blocks): obfuscated = obfuscated.replace(f'', protected_content) obfuscated = obfuscated.replace(f'', protected_content) obfuscated = obfuscated.replace(f'', protected_content) # Final cleanup - ensure no double newlines and remove extra spaces obfuscated = re.sub(r'\n\n+', '\n', obfuscated) obfuscated = re.sub(r'[ \t]+', ' ', obfuscated) # Remove extra spaces and tabs return obfuscated