FaF - Ws

What does this mean? Fast as Fuck WebServer. Now we compress and cache like hell. so its fast as fuck. 1 new requirement which isnt too bad: Pillow, for images! Clear cache and restart the server!
2025-10-13 18:20:27 +02:00
parent cee104f1f5
commit 5c170a195e
4 changed files with 233 additions and 81 deletions
@@ -45,12 +45,12 @@ class Paragraph(marko.block.Paragraph):
 class Renderer:
    def render_block_formula(self, element):
        # MathJax compatible block math
-        logger.log_debug(f"render_block_formula@LaTeXRenderer.py returned => {element}")
+        # logger.log_debug(f"render_block_formula@LaTeXRenderer.py returned => {element}")
        return f'\n<div class="math-block">$$\n{self.render_children(element)}\n$$</div>\n'

    def render_inline_formula(self, element):
        # MathJax compatible inline math
-        logger.log_debug(f"render_inline_formula@LaTeXRenderer.py returned => {element}")
+        # logger.log_debug(f"render_inline_formula@LaTeXRenderer.py returned => {element}")
        return f'\\({self.render_children(element)}\\)'

 class LaTeXExtension:
@@ -10,7 +10,11 @@ from pathlib import Path
 import requests
 from functools import lru_cache
 import hashlib
-from typing import Optional, Tuple
+from typing import Optional, Tuple, Dict
+import gzip
+import time
+from PIL import Image
+from io import BytesIO

 from log.Logger import *
 from lua import plugin_manager
@@ -27,9 +31,16 @@ BASE_FILE = os.path.join(HTML_DIR, "base", "index.html")
 LUA_DIR = Path(PROJECT_ROOT) / "lua" / "plugins"
 CACHE_DIR = os.path.join(PROJECT_ROOT, "cache")
 CDN_CACHE_DIR = os.path.join(CACHE_DIR, "cdn")
+IMAGE_CACHE_DIR = os.path.join(CACHE_DIR, "images")
+
+# Image optimization settings
+IMAGE_EXTENSIONS = {'.webp', '.jpg', '.jpeg', '.png', '.gif', '.ico', '.svg'}
+ICON_MAX_SIZE = (128, 128)            # Max dimensions for icons
+STANDARD_IMAGE_MAX_SIZE = (1920, 1080)  # Max dimensions for regular images
+WEBP_QUALITY = 65                       # Quality for WebP conversion
+ICON_QUALITY = 90                       # Higher quality for icons to preserve detail

 # CDN Resources to fetch and cache
-# If you modify the HTML Template to use any CDN include it here 
 CDN_RESOURCES = {
    "/package/css/prism.min.css": "https://cdn.jsdelivr.net/npm/prismjs/themes/prism.min.css",
    "/package/js/prism.min.js": "https://cdn.jsdelivr.net/npm/prismjs/prism.min.js",
@@ -38,59 +49,72 @@ CDN_RESOURCES = {
    "/package/js/mathjax.js": "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js",
 }

-# CDN base URLs for dynamic resource fetching
 CDN_BASES = {
    "mathjax": "https://cdn.jsdelivr.net/npm/mathjax@3/es5"
 }

-# File extensions to cache aggressively
 CACHEABLE_EXTENSIONS = {'.css', '.js', '.webp', '.jpg', '.jpeg', '.png', '.gif', '.svg', '.woff', '.woff2', '.ttf', '.eot', '.ico'}

+# Compression settings
+COMPRESS_MIME_TYPES = {'text/html', 'text/css', 'application/javascript', 'application/json', 'text/markdown', 'text/x-lua'}
+MIN_COMPRESS_SIZE = 1024  # Only compress files larger than 1KB
+
+# Session for connection pooling
+session = requests.Session()
+session.mount('https://', requests.adapters.HTTPAdapter(
+    pool_connections=10,
+    pool_maxsize=20,
+    max_retries=3
+))
+
 def ensure_cache_dirs():
    """Ensure cache directories exist"""
    os.makedirs(CDN_CACHE_DIR, exist_ok=True)
-    logger.log_info(f"Cache directory ready: {CDN_CACHE_DIR}")
+    os.makedirs(IMAGE_CACHE_DIR, exist_ok=True)
+    logger.log_info(f"Cache directories ready: {CDN_CACHE_DIR}, {IMAGE_CACHE_DIR}")

 def fetch_cdn_resources():
-    """Fetch all CDN resources on startup and cache them"""
    ensure_cache_dirs()
    logger.log_info("Fetching CDN resources...")
    
-    for local_path, cdn_url in CDN_RESOURCES.items():
+    def fetch_single_resource(local_path, cdn_url):
        try:
-            # Create filename from hash of URL for safe storage
            url_hash = hashlib.md5(local_path.encode()).hexdigest()
            cache_file = os.path.join(CDN_CACHE_DIR, url_hash)
            
-            # Check if already cached
            if os.path.exists(cache_file):
                logger.log_debug(f"CDN resource already cached: {local_path}")
-                continue
+                return True
            
-            # Fetch resource
            logger.log_info(f"Fetching {cdn_url}...")
-            response = requests.get(cdn_url, timeout=30)
+            response = session.get(cdn_url, timeout=30)
            response.raise_for_status()
            
-            # Save to cache
            with open(cache_file, 'wb') as f:
                f.write(response.content)
            
            logger.log_info(f"Cached CDN resource: {local_path} ({len(response.content)} bytes)")
+            return True
            
        except Exception as e:
            logger.log_error(f"Failed to fetch CDN resource {cdn_url}: {e}")
+            return False
+    
+    # Parallel fetch with threads
+    threads = []
+    for local_path, cdn_url in CDN_RESOURCES.items():
+        t = threading.Thread(target=fetch_single_resource, args=(local_path, cdn_url))
+        t.start()
+        threads.append(t)
+    
+    for t in threads:
+        t.join()

 def fetch_cdn_resource_on_demand(local_path: str) -> Optional[bytes]:
-    """
-    Fetch a CDN resource on-demand if not already cached.
-    Used for dynamically loaded resources like MathJax dependencies.
-    """
-    # Try to map the request to a known CDN base
+    """ On demand fetching of a CDN """
    if local_path.startswith("/package/js/"):
-        relative_path = local_path[12:]  # Remove "/package/js/"
+        relative_path = local_path[12:]
        
-        # MathJax resources
        if any(x in relative_path for x in ["a11y/", "input/", "output/", "ui/", "sre"]):
            cdn_url = f"{CDN_BASES['mathjax']}/{relative_path}"
        else:
@@ -100,17 +124,14 @@ def fetch_cdn_resource_on_demand(local_path: str) -> Optional[bytes]:
            url_hash = hashlib.md5(local_path.encode()).hexdigest()
            cache_file = os.path.join(CDN_CACHE_DIR, url_hash)
            
-            # Check cache first
            if os.path.exists(cache_file):
                with open(cache_file, 'rb') as f:
                    return f.read()
            
-            # Fetch from CDN
            logger.log_info(f"Fetching on-demand: {cdn_url}")
-            response = requests.get(cdn_url, timeout=10)
+            response = session.get(cdn_url, timeout=10)
            response.raise_for_status()
            
-            # Cache it
            with open(cache_file, 'wb') as f:
                f.write(response.content)
            
@@ -123,12 +144,12 @@ def fetch_cdn_resource_on_demand(local_path: str) -> Optional[bytes]:
    
    return None

-@lru_cache(maxsize=1024)
-def load_file_cached(file_path: str, is_js: bool = False) -> Tuple[bytes, str]:
-    """
-    LRU cached file loader for static assets.
-    Returns (content, mime_type)
-    """
+@lru_cache(maxsize=2048)
+def load_file_cached(file_path: str, is_js: bool = False, optimize_img: bool = False) -> Tuple[bytes, str]:
+    # Handle image optimization
+    if optimize_img and should_optimize_image(file_path):
+        return optimize_image(file_path)
+    
    mime_type, _ = mimetypes.guess_type(file_path)
    if mime_type is None:
        mime_type = "application/octet-stream"
@@ -145,8 +166,107 @@ def load_file_cached(file_path: str, is_js: bool = False) -> Tuple[bytes, str]:
    
    return content, mime_type

+@lru_cache(maxsize=1024)
+def compress_content(content: bytes) -> bytes:
+    """LRU cached gzip compression"""
+    return gzip.compress(content, compresslevel=6)
+
+def is_icon(file_path: str) -> bool:
+    """Determine if file is an icon based on path or name"""
+    lower_path = file_path.lower()
+    return (
+        'icon' in lower_path or
+        'favicon' in lower_path or
+        file_path.endswith('.ico') or
+        '/icons/' in lower_path
+    )
+
+def get_image_cache_path(file_path: str) -> str:
+    """Generate cache path for optimized image"""
+    file_hash = hashlib.md5(file_path.encode()).hexdigest()
+    file_stat = os.stat(file_path)
+    # Include mtime in hash to invalidate cache when file changes
+    cache_key = f"{file_hash}_{int(file_stat.st_mtime)}"
+    return os.path.join(IMAGE_CACHE_DIR, cache_key + ".webp")
+
+def optimize_image(file_path: str) -> Tuple[bytes, str]:
+    try:
+        # Check cache first
+        cache_path = get_image_cache_path(file_path)
+        if os.path.exists(cache_path):
+            with open(cache_path, 'rb') as f:
+                return f.read(), "image/webp"
+        
+        # Open and process image
+        with Image.open(file_path) as img:
+            # Preserve transparency by converting to RGBA if needed
+            if img.mode == 'P':
+                # Palette mode - convert to RGBA to preserve transparency
+                img = img.convert('RGBA')
+            elif img.mode == 'LA':
+                # Grayscale with alpha - convert to RGBA
+                img = img.convert('RGBA')
+            elif img.mode not in ('RGBA', 'RGB', 'L'):
+                # Other modes - try to preserve alpha if present
+                if 'transparency' in img.info:
+                    img = img.convert('RGBA')
+                else:
+                    img = img.convert('RGB')
+            # If already RGBA or RGB, keep as is
+            
+            # Determine if it's an icon and resize accordingly
+            if is_icon(file_path):
+                max_size = ICON_MAX_SIZE
+                quality = ICON_QUALITY
+            else:
+                max_size = STANDARD_IMAGE_MAX_SIZE
+                quality = WEBP_QUALITY
+            
+            # Resize if image is larger than max size
+            if img.size[0] > max_size[0] or img.size[1] > max_size[1]:
+                img.thumbnail(max_size, Image.Resampling.LANCZOS)
+                logger.log_debug(f"Resized image {file_path} to {img.size}")
+            
+            # Save as WebP to BytesIO with lossless for transparency
+            output = BytesIO()
+            # Use lossless=True for images with alpha channel to preserve transparency perfectly
+            if img.mode == 'RGBA':
+                img.save(output, format='WEBP', quality=quality, method=6, lossless=False)
+            else:
+                img.save(output, format='WEBP', quality=quality, method=6)
+            optimized_content = output.getvalue()
+            
+            # Cache the optimized image
+            with open(cache_path, 'wb') as f:
+                f.write(optimized_content)
+            
+            original_size = os.path.getsize(file_path)
+            optimized_size = len(optimized_content)
+            savings = ((original_size - optimized_size) / original_size) * 100
+            logger.log_info(f"Optimized {file_path}: {original_size} to {optimized_size} bytes ({savings:.1f}% reduction)")
+            
+            return optimized_content, "image/webp"
+            
+    except Exception as e:
+        logger.log_error(f"Error compressing image {file_path}: {e}")
+        # Fall back to original file
+        with open(file_path, 'rb') as f:
+            content = f.read()
+        mime_type, _ = mimetypes.guess_type(file_path)
+        return content, mime_type or "application/octet-stream"
+
+def prewarm_image_cache():
+    for root, _, files in os.walk(PROJECT_ROOT):
+        for f in files:
+            if should_optimize_image(f):
+                optimize_image(os.path.join(root, f))
+
+def should_optimize_image(file_path: str) -> bool:
+    ext = os.path.splitext(file_path)[1].lower()
+    # If its a svg then just return. SVG is good
+    return ext in IMAGE_EXTENSIONS and ext != '.svg'
+
 def should_cache_file(file_path: str) -> bool:
-    """Determine if a file should be LRU cached based on extension"""
    ext = os.path.splitext(file_path)[1].lower()
    return ext in CACHEABLE_EXTENSIONS

@@ -158,8 +278,17 @@ def get_html_files(directory=HTML_DIR):
            html_files.append(entry)
    return html_files

+_index_cache = {"content": None, "timestamp": 0}
+INDEX_CACHE_TTL = 300  # 300/60 = 5min

-def build_index_page() -> str:
+def build_index_page(force_refresh: bool = False) -> str:
+    """Build index page with caching"""
+    global _index_cache
+    
+    current_time = time.time()
+    if not force_refresh and _index_cache["content"] and (current_time - _index_cache["timestamp"]) < INDEX_CACHE_TTL:
+        return _index_cache["content"]
+    
    with open(BASE_FILE, "r", encoding="utf-8") as f:
        base_html = f.read()

@@ -193,12 +322,16 @@ def build_index_page() -> str:
            continue

    full_content = "\n".join(articles) + "</main>" + index_footer()
-    return base_html.replace("<!-- CONTENT -->", full_content)
-
+    content = base_html.replace("<!-- CONTENT -->", full_content)
+    
+    # Update cache
+    _index_cache["content"] = content
+    _index_cache["timestamp"] = current_time
+    
+    return content

 import base64
 import random
-import time

 from hashes.hashes import hash_list

@@ -210,9 +343,6 @@ H2 = random.choice(H2_CANDIDATES) if H2_CANDIDATES else H1
 def index_footer() -> str:
    tor_link = "http://7uhuxits7qfmiagkmpazxvh3rtk6aijs6pbawge3fl77y4xqjixlhkqd.onion/"
    return f"""
-<!-- Footer styling doesnt need to work with
-    flex, or anything else, because pagnation.
-->
 <div class="footer">
    <footer>
    <p>
@@ -235,9 +365,41 @@ def index_footer() -> str:
 """

 class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
+    def log_message(self, format, *args):
+        pass
+    
+    def _supports_gzip(self) -> bool:
+        accept_encoding = self.headers.get('Accept-Encoding', '')
+        return 'gzip' in accept_encoding.lower()
+    
+    def _send_compressed_response(self, content: bytes, mime_type: str, cache_control: str = None):
+        should_compress = (
+            self._supports_gzip() and
+            mime_type in COMPRESS_MIME_TYPES and
+            len(content) >= MIN_COMPRESS_SIZE
+        )
+        
+        if should_compress:
+            # Use cached compression
+            compressed = compress_content(content)
+            self.send_response(200)
+            self.send_header("Content-type", mime_type)
+            self.send_header("Content-Encoding", "gzip")
+            self.send_header("Content-Length", len(compressed))
+            if cache_control:
+                self.send_header("Cache-Control", cache_control)
+            self.end_headers()
+            self.wfile.write(compressed)
+        else:
+            self.send_response(200)
+            self.send_header("Content-type", mime_type)
+            self.send_header("Content-Length", len(content))
+            if cache_control:
+                self.send_header("Cache-Control", cache_control)
+            self.end_headers()
+            self.wfile.write(content)
+    
    def _parse_post_data(self):
-        """Parse POST request body"""
-        import json
        content_length = int(self.headers.get('Content-Length', 0))
        if content_length == 0:
            return {}
@@ -259,7 +421,7 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
            return {"raw": post_data}

    def do_POST(self):
-        """Handle POST requests - primarily for plugin routes"""
+        """Handle POST for Plugins"""
        req_path = self.path.lstrip("/")
        
        post_data = self._parse_post_data()
@@ -296,20 +458,20 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
    def do_GET(self):
        req_path = self.path.lstrip("/")

-        # Handle root/index
+        # Handle root/index with caching
        if req_path == "" or req_path == "index.html":
            content = build_index_page()
-            self.send_response(200)
-            self.send_header("Content-type", "text/html")
-            self.end_headers()
-            self.wfile.write(content.encode("utf-8"))
+            self._send_compressed_response(
+                content.encode("utf-8"),
+                "text/html",
+                "public, max-age=300"  # Cache for 5 minutes
+            )
            return

        # Handle CDN package requests
        if req_path.startswith("package/"):
            cdn_path = "/" + req_path
            
-            # Try to get from pre-fetched cache
            if cdn_path in CDN_RESOURCES:
                url_hash = hashlib.md5(cdn_path.encode()).hexdigest()
                cache_file = os.path.join(CDN_CACHE_DIR, url_hash)
@@ -320,11 +482,9 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
                else:
                    cached_content = None
            else:
-                # Try on-demand fetching for dynamic resources
                cached_content = fetch_cdn_resource_on_demand(cdn_path)
            
            if cached_content:
-                # Determine mime type
                if cdn_path.endswith('.css'):
                    mime_type = "text/css"
                elif cdn_path.endswith('.js'):
@@ -339,7 +499,7 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
                self.send_response(200)
                self.send_header("Content-type", mime_type)
                self.send_header("Cache-Control", "public, max-age=86400")
-                self.send_header("Access-Control-Allow-Origin", "*")  # CORS for CDN resources
+                self.send_header("Access-Control-Allow-Origin", "*")
                self.end_headers()
                self.wfile.write(cached_content)
                return
@@ -350,7 +510,7 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
                self.wfile.write(b"404 - CDN resource not available")
                return

-        # CHECK PLUGIN ROUTES
+        # check the plugins routes
        plugin_result = plugin_manager.handle_request("/" + req_path, {"path": self.path})
        if plugin_result is not None:
            status, headers, body = plugin_result
@@ -391,11 +551,7 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
                with open(markdown_file_path, "rb") as f:
                    content = f.read()
                
-                self.send_response(200)
-                self.send_header("Content-type", "text/markdown")
-                self.send_header("Content-Disposition", f'attachment; filename="{markdown_filename}"')
-                self.end_headers()
-                self.wfile.write(content)
+                self._send_compressed_response(content, "text/markdown")
                logger.log_info(f"Served markdown file: {markdown_filename}")
                return
                
@@ -436,11 +592,7 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
                with open(lua_file_path, "rb") as f:
                    content = f.read()

-                self.send_response(200)
-                self.send_header("Content-type", "text/x-lua")
-                self.send_header("Content-Disposition", f'attachment; filename="{lua_filename}"')
-                self.end_headers()
-                self.wfile.write(content)
+                self._send_compressed_response(content, "text/x-lua")
                logger.log_info(f"Served Lua file: {lua_filename}")
                return

@@ -461,18 +613,17 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):

        if os.path.isfile(file_path):
            try:
-                # Use LRU cache for cacheable files
                if should_cache_file(file_path):
                    is_js = file_path.endswith('.js')
-                    content, mime_type = load_file_cached(file_path, is_js)
+                    optimize_img = should_optimize_image(file_path)
+                    content, mime_type = load_file_cached(file_path, is_js, optimize_img)
                    
-                    self.send_response(200)
-                    self.send_header("Content-type", mime_type)
-                    self.send_header("Cache-Control", "public, max-age=3600")  # Cache for 1 hour
-                    self.end_headers()
-                    self.wfile.write(content)
+                    self._send_compressed_response(
+                        content,
+                        mime_type,
+                        "public, max-age=3600"
+                    )
                else:
-                    # Non-cacheable files (e.g., HTML)
                    mime_type, _ = mimetypes.guess_type(file_path)
                    if mime_type is None:
                        mime_type = "application/octet-stream"
@@ -480,17 +631,13 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
                    with open(file_path, "rb") as f:
                        content = f.read()

-                    # Minify JS even if not cached
                    if mime_type == "application/javascript" or file_path.endswith(".js"):
                        try:
                            content = jsmin(content.decode("utf-8")).encode("utf-8")
                        except Exception as err:
                            logger.log_error(f"Error minifying JS file {file_path}: {err}")

-                    self.send_response(200)
-                    self.send_header("Content-type", mime_type)
-                    self.end_headers()
-                    self.wfile.write(content)
+                    self._send_compressed_response(content, mime_type)
                
                return
                
@@ -513,20 +660,25 @@ def run_pypost():

 if __name__ == "__main__":
    try:
-        # Fetch CDN resources on startup
-        logger.log_info("Initializing CDN resource cache...")
+        logger.log_info("Initializing cache directories")
+        ensure_cache_dirs()
+        
+        logger.log_info("Initializing CDN resource cache")
        fetch_cdn_resources()
        logger.log_info("CDN resources ready!")
        
-        # Start PyPost watcher
        threading.Thread(target=run_pypost, daemon=True).start()
        logger.log_debug("Started PyPost.py in background watcher thread.")

        server_address = ("localhost", 8000)
        httpd: HTTPServer = HTTPServer(server_address, WebServerHTTPRequestHandler)
        logger.log_info(f"Serving on http://{server_address[0]}:{server_address[1]}")
-        logger.log_info(f"LRU cache enabled for static assets (max 512 files)")
+        logger.log_info(f"Icon max size: {ICON_MAX_SIZE} \n Image max size: {STANDARD_IMAGE_MAX_SIZE}")
        httpd.serve_forever()
+        prewarm_image_cache()
    except (Exception, KeyboardInterrupt) as e:
-        logger.log_info(f"Shutting down server.\n Reason: {e}")
+        if KeyboardInterrupt:
+            logger.log_info(f"Shutting down server.\n Reason: KeyboardInterrupt")
+        else:
+            logger.log_info(f"Shutting down server.\n Reason: {e}")
        httpd.server_close()