131 lines
5.7 KiB
Python
131 lines
5.7 KiB
Python
import random
|
|
import re
|
|
|
|
from hashes.hashes import LOREM_IPSUM_COMMENTS
|
|
|
|
class Obfuscator:
|
|
def obfuscate_html(html_content):
|
|
# Generate random strings
|
|
def generate_random_string(length=8):
|
|
chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
|
|
return ''.join(random.choice(chars) for _ in range(length))
|
|
|
|
# Protect original content
|
|
protected_blocks = []
|
|
|
|
def protect_scripts(match):
|
|
protected_blocks.append(match.group(0))
|
|
return f'<!-- PROTECTED_SCRIPT_{len(protected_blocks)-1} -->'
|
|
|
|
def protect_styles(match):
|
|
protected_blocks.append(match.group(0))
|
|
return f'<!-- PROTECTED_STYLE_{len(protected_blocks)-1} -->'
|
|
|
|
# First pass: protect critical content
|
|
temp_html = re.sub(r'<script[^>]*>.*?</script>', protect_scripts, html_content, flags=re.DOTALL)
|
|
temp_html = re.sub(r'<style[^>]*>.*?</style>', protect_styles, temp_html, flags=re.DOTALL)
|
|
|
|
# Protect tables
|
|
def protect_tables(match):
|
|
protected_blocks.append(match.group(0))
|
|
return f'<!-- PROTECTED_TABLE_{len(protected_blocks)-1} -->'
|
|
|
|
temp_html = re.sub(r'<table[^>]*>.*?</table>', protect_tables, temp_html, flags=re.DOTALL)
|
|
|
|
# Clean up HTML - remove extra whitespace
|
|
lines = []
|
|
for line in temp_html.split('\n'):
|
|
cleaned_line = ' '.join(line.split())
|
|
if cleaned_line:
|
|
lines.append(cleaned_line)
|
|
|
|
# Add comments between lines (but not too many)
|
|
obfuscated_lines = []
|
|
|
|
for i, line in enumerate(lines):
|
|
# Add comment before some lines
|
|
if line and not line.startswith('<!--') and random.random() > 0.7:
|
|
lorem_comment = random.choice(LOREM_IPSUM_COMMENTS)
|
|
obfuscated_lines.append(f'<!-- {lorem_comment} -->')
|
|
|
|
obfuscated_lines.append(line)
|
|
|
|
# Add comment after some lines
|
|
if line and not line.startswith('<!--') and random.random() > 0.8:
|
|
lorem_comment = random.choice(LOREM_IPSUM_COMMENTS)
|
|
obfuscated_lines.append(f'<!-- {lorem_comment} -->')
|
|
|
|
obfuscated = '\n'.join(obfuscated_lines)
|
|
|
|
# Inject random comments between SOME elements (not all)
|
|
def inject_some_comments(html):
|
|
# Only inject between certain safe elements
|
|
safe_patterns = [
|
|
(r'(</div>)', r'\1<!--' + generate_random_string(6) + '-->'),
|
|
(r'(</p>)', r'\1<!--' + generate_random_string(6) + '-->'),
|
|
(r'(</span>)', r'\1<!--' + generate_random_string(6) + '-->'),
|
|
(r'(</h1>)', r'\1<!--' + generate_random_string(6) + '-->'),
|
|
(r'(</h2>)', r'\1<!--' + generate_random_string(6) + '-->'),
|
|
(r'(</h3>)', r'\1<!--' + generate_random_string(6) + '-->'),
|
|
]
|
|
|
|
for pattern, replacement in safe_patterns:
|
|
if random.random() > 0.5: # 50% chance to apply each pattern
|
|
html = re.sub(pattern, replacement, html)
|
|
|
|
return html
|
|
|
|
obfuscated = inject_some_comments(obfuscated)
|
|
|
|
# Add header comments (fewer to avoid breaking the document)
|
|
header_comments = [
|
|
'auto-obfuscated-' + generate_random_string(10),
|
|
'generated-' + generate_random_string(8),
|
|
]
|
|
|
|
header_block = '\n'.join([f'<!-- {comment} -->' for comment in header_comments])
|
|
obfuscated = header_block + '\n' + obfuscated
|
|
|
|
# Add footer comments
|
|
footer_comments = [
|
|
'end-' + generate_random_string(10),
|
|
'completed-' + generate_random_string(8),
|
|
]
|
|
|
|
footer_block = '\n'.join([f'<!-- {comment} -->' for comment in footer_comments])
|
|
obfuscated = obfuscated + '\n' + footer_block
|
|
|
|
# Minimal invisible characters - only in text content, not in tags
|
|
invisible_chars = ['\u200B', '\u200C']
|
|
|
|
def add_minimal_invisible(match):
|
|
text = match.group(1)
|
|
# NEVER obfuscate script-like content
|
|
if any(keyword in text for keyword in ['function', 'const ', 'var ', 'let ', 'document.', 'window.', 'getElement', 'querySelector', 'addEventListener']):
|
|
return '>' + text + '<'
|
|
|
|
# Only add to plain text content
|
|
if len(text) > 10: # Only on longer text blocks
|
|
result = []
|
|
for i, char in enumerate(text):
|
|
result.append(char)
|
|
# Very rarely add invisible chars
|
|
if i % 8 == 0 and i > 0 and random.random() > 0.8:
|
|
result.append(random.choice(invisible_chars))
|
|
return '>' + ''.join(result) + '<'
|
|
|
|
return '>' + text + '<'
|
|
|
|
obfuscated = re.sub(r'>([^<]+)<', add_minimal_invisible, obfuscated)
|
|
|
|
# Restore protected content exactly as-is
|
|
for i, protected_content in enumerate(protected_blocks):
|
|
obfuscated = obfuscated.replace(f'<!-- PROTECTED_SCRIPT_{i} -->', protected_content)
|
|
obfuscated = obfuscated.replace(f'<!-- PROTECTED_STYLE_{i} -->', protected_content)
|
|
obfuscated = obfuscated.replace(f'<!-- PROTECTED_TABLE_{i} -->', protected_content)
|
|
|
|
# Final cleanup - ensure no double newlines and remove extra spaces
|
|
obfuscated = re.sub(r'\n\n+', '\n', obfuscated)
|
|
obfuscated = re.sub(r'[ \t]+', ' ', obfuscated) # Remove extra spaces and tabs
|
|
|
|
return obfuscated |