import random
import re
from hashes.hashes import LOREM_IPSUM_COMMENTS
class Obfuscator:
def obfuscate_html(html_content):
# Generate random strings
def generate_random_string(length=8):
chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
return ''.join(random.choice(chars) for _ in range(length))
# Protect original content
protected_blocks = []
def protect_scripts(match):
protected_blocks.append(match.group(0))
return f''
def protect_styles(match):
protected_blocks.append(match.group(0))
return f''
# First pass: protect critical content
temp_html = re.sub(r'', protect_scripts, html_content, flags=re.DOTALL)
temp_html = re.sub(r'', protect_styles, temp_html, flags=re.DOTALL)
# Protect tables
def protect_tables(match):
protected_blocks.append(match.group(0))
return f''
temp_html = re.sub(r'
', protect_tables, temp_html, flags=re.DOTALL)
# Clean up HTML - remove extra whitespace
lines = []
for line in temp_html.split('\n'):
cleaned_line = ' '.join(line.split())
if cleaned_line:
lines.append(cleaned_line)
# Add comments between lines (but not too many)
obfuscated_lines = []
for i, line in enumerate(lines):
# Add comment before some lines
if line and not line.startswith('')
obfuscated_lines.append(line)
# Add comment after some lines
if line and not line.startswith('')
obfuscated = '\n'.join(obfuscated_lines)
# Inject random comments between SOME elements (not all)
def inject_some_comments(html):
# Only inject between certain safe elements
safe_patterns = [
(r'()', r'\1'),
(r'()', r'\1'),
(r'()', r'\1'),
(r'()', r'\1'),
(r'()', r'\1'),
(r'()', r'\1'),
]
for pattern, replacement in safe_patterns:
if random.random() > 0.5: # 50% chance to apply each pattern
html = re.sub(pattern, replacement, html)
return html
obfuscated = inject_some_comments(obfuscated)
# Add header comments (fewer to avoid breaking the document)
header_comments = [
'auto-obfuscated-' + generate_random_string(10),
'generated-' + generate_random_string(8),
]
header_block = '\n'.join([f'' for comment in header_comments])
obfuscated = header_block + '\n' + obfuscated
# Add footer comments
footer_comments = [
'end-' + generate_random_string(10),
'completed-' + generate_random_string(8),
]
footer_block = '\n'.join([f'' for comment in footer_comments])
obfuscated = obfuscated + '\n' + footer_block
# Minimal invisible characters - only in text content, not in tags
invisible_chars = ['\u200B', '\u200C']
def add_minimal_invisible(match):
text = match.group(1)
# NEVER obfuscate script-like content
if any(keyword in text for keyword in ['function', 'const ', 'var ', 'let ', 'document.', 'window.', 'getElement', 'querySelector', 'addEventListener']):
return '>' + text + '<'
# Only add to plain text content
if len(text) > 10: # Only on longer text blocks
result = []
for i, char in enumerate(text):
result.append(char)
# Very rarely add invisible chars
if i % 8 == 0 and i > 0 and random.random() > 0.8:
result.append(random.choice(invisible_chars))
return '>' + ''.join(result) + '<'
return '>' + text + '<'
obfuscated = re.sub(r'>([^<]+)<', add_minimal_invisible, obfuscated)
# Restore protected content exactly as-is
for i, protected_content in enumerate(protected_blocks):
obfuscated = obfuscated.replace(f'', protected_content)
obfuscated = obfuscated.replace(f'', protected_content)
obfuscated = obfuscated.replace(f'', protected_content)
# Final cleanup - ensure no double newlines and remove extra spaces
obfuscated = re.sub(r'\n\n+', '\n', obfuscated)
obfuscated = re.sub(r'[ \t]+', ' ', obfuscated) # Remove extra spaces and tabs
return obfuscated