PyPost/webserver.py

import os
import sys
import threading
import subprocess
from http.server import BaseHTTPRequestHandler, HTTPServer
import mimetypes
import json
from jsmin import jsmin
from pathlib import Path
import requests
from functools import lru_cache
import hashlib
from typing import Optional, Tuple

from log.Logger import *
from lua import plugin_manager
from PyPost import extract_summary

logger = Logger()
plugin_manager = plugin_manager.PluginManager()
plugin_manager.load_all()

PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
HTML_DIR = os.path.join(PROJECT_ROOT, "html")
MARKDOWN_DIR = os.path.join(PROJECT_ROOT, "markdown")
BASE_FILE = os.path.join(HTML_DIR, "base", "index.html")
LUA_DIR = Path(PROJECT_ROOT) / "lua" / "plugins"
CACHE_DIR = os.path.join(PROJECT_ROOT, "cache")
CDN_CACHE_DIR = os.path.join(CACHE_DIR, "cdn")

# CDN Resources to fetch and cache
CDN_RESOURCES = {
    "/package/css/prism.min.css": "https://cdn.jsdelivr.net/npm/prismjs/themes/prism.min.css",
    "/package/js/prism.min.js": "https://cdn.jsdelivr.net/npm/prismjs/prism.min.js",
    "/package/js/prism-python.min.js": "https://cdn.jsdelivr.net/npm/prismjs/components/prism-python.min.js",
    "/package/js/prism-javascript.min.js": "https://cdn.jsdelivr.net/npm/prismjs/components/prism-javascript.min.js",
    "/package/js/mathjax.js": "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js",
}

# CDN base URLs for dynamic resource fetching
CDN_BASES = {
    "mathjax": "https://cdn.jsdelivr.net/npm/mathjax@3/es5"
}

# File extensions to cache aggressively
CACHEABLE_EXTENSIONS = {'.css', '.js', '.webp', '.jpg', '.jpeg', '.png', '.gif', '.svg', '.woff', '.woff2', '.ttf', '.eot', '.ico'}

def ensure_cache_dirs():
    """Ensure cache directories exist"""
    os.makedirs(CDN_CACHE_DIR, exist_ok=True)
    logger.log_info(f"Cache directory ready: {CDN_CACHE_DIR}")

def fetch_cdn_resources():
    """Fetch all CDN resources on startup and cache them"""
    ensure_cache_dirs()
    logger.log_info("Fetching CDN resources...")

    for local_path, cdn_url in CDN_RESOURCES.items():
        try:
            # Create filename from hash of URL for safe storage
            url_hash = hashlib.md5(local_path.encode()).hexdigest()
            cache_file = os.path.join(CDN_CACHE_DIR, url_hash)

            # Check if already cached
            if os.path.exists(cache_file):
                logger.log_debug(f"CDN resource already cached: {local_path}")
                continue

            # Fetch resource
            logger.log_info(f"Fetching {cdn_url}...")
            response = requests.get(cdn_url, timeout=30)
            response.raise_for_status()

            # Save to cache
            with open(cache_file, 'wb') as f:
                f.write(response.content)

            logger.log_info(f"Cached CDN resource: {local_path} ({len(response.content)} bytes)")

        except Exception as e:
            logger.log_error(f"Failed to fetch CDN resource {cdn_url}: {e}")

def fetch_cdn_resource_on_demand(local_path: str) -> Optional[bytes]:
    """
    Fetch a CDN resource on-demand if not already cached.
    Used for dynamically loaded resources like MathJax dependencies.
    """
    # Try to map the request to a known CDN base
    if local_path.startswith("/package/js/"):
        relative_path = local_path[12:]  # Remove "/package/js/"

        # MathJax resources
        if any(x in relative_path for x in ["a11y/", "input/", "output/", "ui/", "sre"]):
            cdn_url = f"{CDN_BASES['mathjax']}/{relative_path}"
        else:
            return None

        try:
            url_hash = hashlib.md5(local_path.encode()).hexdigest()
            cache_file = os.path.join(CDN_CACHE_DIR, url_hash)

            # Check cache first
            if os.path.exists(cache_file):
                with open(cache_file, 'rb') as f:
                    return f.read()

            # Fetch from CDN
            logger.log_info(f"Fetching on-demand: {cdn_url}")
            response = requests.get(cdn_url, timeout=10)
            response.raise_for_status()

            # Cache it
            with open(cache_file, 'wb') as f:
                f.write(response.content)

            logger.log_info(f"Cached on-demand: {local_path}")
            return response.content

        except Exception as e:
            logger.log_error(f"Failed to fetch on-demand resource {cdn_url}: {e}")
            return None

    return None

@lru_cache(maxsize=1024)
def load_file_cached(file_path: str, is_js: bool = False) -> Tuple[bytes, str]:
    """
    LRU cached file loader for static assets.
    Returns (content, mime_type)
    """
    mime_type, _ = mimetypes.guess_type(file_path)
    if mime_type is None:
        mime_type = "application/octet-stream"

    with open(file_path, "rb") as f:
        content = f.read()

    # Minify JS files
    if is_js or mime_type == "application/javascript" or file_path.endswith(".js"):
        try:
            content = jsmin(content.decode("utf-8")).encode("utf-8")
        except Exception as err:
            logger.log_error(f"Error minifying JS file {file_path}: {err}")

    return content, mime_type

def should_cache_file(file_path: str) -> bool:
    """Determine if a file should be LRU cached based on extension"""
    ext = os.path.splitext(file_path)[1].lower()
    return ext in CACHEABLE_EXTENSIONS

def get_html_files(directory=HTML_DIR):
    html_files = []
    for entry in os.listdir(directory):
        full_path = os.path.join(directory, entry)
        if os.path.isfile(full_path) and entry.endswith(".html"):
            html_files.append(entry)
    return html_files


def build_index_page() -> str:
    with open(BASE_FILE, "r", encoding="utf-8") as f:
        base_html = f.read()

    articles = []
    for md_path in Path(MARKDOWN_DIR).rglob("*.md"):
        try:
            summary_data = extract_summary(md_path)
            if summary_data:
                html_name, summary = summary_data
            else:
                html_name = md_path.stem + ".html"
                summary = "No Summary for this Article!"

            text = md_path.read_text(encoding="utf-8")
            title = md_path.stem
            for line in text.splitlines():
                if line.startswith("# "):
                    title = line[2:].strip()
                    break

            article_html = f"""
<article>
    <h3><a href="/html/{html_name}">{title}</a></h3>
    <p>{summary}</p>
</article>
"""
            articles.append(article_html)

        except Exception as e:
            logger.log_warning(f"Exception with summary: {e} at {md_path}")
            continue

    full_content = "\n".join(articles) + "</main>" + index_footer()
    return base_html.replace("<!-- CONTENT -->", full_content)


import base64
import random
import time

from hashes.hashes import hash_list

# Generate hashes only once at server start
H1 = random.choice(hash_list)
H2_CANDIDATES = [h for h in hash_list if h != H1]
H2 = random.choice(H2_CANDIDATES) if H2_CANDIDATES else H1

def index_footer() -> str:
    tor_link = "http://7uhuxits7qfmiagkmpazxvh3rtk6aijs6pbawge3fl77y4xqjixlhkqd.onion/"
    return f"""
<!-- Footer styling doesnt need to work with
    flex, or anything else, because pagnation.
-->
<div class="footer">
    <footer>
    <p>
        <!-- Server Time -->
        <img src="../css/icons/date.webp" width="16" height="16" alt="date" loading="lazy" style="vertical-align: middle;" />
        Server-Time (CET ; GMT+2): <i>{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}</i><br />
        <!-- Hashes -->
        <img src="../css/icons/magnifier.webp" width="16" height="16" alt="Hash2" loading="lazy" style="display:inline; vertical-align:middle;" />
        Hash 1 (<b>UTF-8</b>)<i>:{base64.b64encode(H1.encode("utf-8")).decode("utf-8")}</i><br />
        <img src="../css/icons/magnifier.webp" width="16" height="16" alt="Hash2" loading="lazy" style="display:inline; vertical-align:middle;" />
        Hash 2 (<b>Windows-1252</b>)<i>:{base64.b64encode(H2.encode("windows-1252")).decode("windows-1252")}</i><br />
        <!-- Git Repository Link -->
        <img src="../css/icons/written.webp" width="16" height="16" alt="Hash2" loading="lazy" style="display:inline; vertical-align:middle;" />
        <a style="text-decoration:none;color:#0066cc;font-style:italic;padding-top:5px;" href="https://rattatwinko.servecounterstrike.com/gitea/rattatwinko/PyPost">View Git-Repository</a><br />
        <img src="../css/icons/script.webp" width="16" height="16" alt="Hash2" loading="lazy" style="display:inline; vertical-align:middle;" />
        <a style="text-decoration:none;color:#0066cc;font-style:italic;padding-top:5px;" href="{tor_link}">View Tor Site</a>
    </p>
    </footer>
</div>
"""

class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
    def _parse_post_data(self):
        """Parse POST request body"""
        import json
        content_length = int(self.headers.get('Content-Length', 0))
        if content_length == 0:
            return {}

        post_data = self.rfile.read(content_length)
        content_type = self.headers.get('Content-Type', '')

        try:
            if 'application/json' in content_type:
                return json.loads(post_data.decode('utf-8'))
            elif 'application/x-www-form-urlencoded' in content_type:
                from urllib.parse import parse_qs
                parsed = parse_qs(post_data.decode('utf-8'))
                return {k: v[0] if len(v) == 1 else v for k, v in parsed.items()}
            else:
                return {"raw": post_data}
        except Exception as e:
            logger.log_error(f"Error parsing POST data: {e}")
            return {"raw": post_data}

    def do_POST(self):
        """Handle POST requests - primarily for plugin routes"""
        req_path = self.path.lstrip("/")

        post_data = self._parse_post_data()

        request_data = {
            "path": self.path,
            "headers": dict(self.headers),
            "data": post_data,
            "method": "POST"
        }

        plugin_result = plugin_manager.handle_request("/" + req_path, request_data, method="POST")
        if plugin_result is not None:
            status, headers, body = plugin_result
            self.send_response(status)
            for key, value in headers.items():
                self.send_header(key, value)
            self.end_headers()

            if isinstance(body, str):
                self.wfile.write(body.encode("utf-8"))
            elif isinstance(body, bytes):
                self.wfile.write(body)
            else:
                self.wfile.write(str(body).encode("utf-8"))
            return

        self.send_response(404)
        self.send_header("Content-type", "application/json")
        self.end_headers()
        error_response = json.dumps({"error": "Route not found"})
        self.wfile.write(error_response.encode("utf-8"))

    def do_GET(self):
        req_path = self.path.lstrip("/")

        # Handle root/index
        if req_path == "" or req_path == "index.html":
            content = build_index_page()
            self.send_response(200)
            self.send_header("Content-type", "text/html")
            self.end_headers()
            self.wfile.write(content.encode("utf-8"))
            return

        # Handle CDN package requests
        if req_path.startswith("package/"):
            cdn_path = "/" + req_path

            # Try to get from pre-fetched cache
            if cdn_path in CDN_RESOURCES:
                url_hash = hashlib.md5(cdn_path.encode()).hexdigest()
                cache_file = os.path.join(CDN_CACHE_DIR, url_hash)

                if os.path.exists(cache_file):
                    with open(cache_file, 'rb') as f:
                        cached_content = f.read()
                else:
                    cached_content = None
            else:
                # Try on-demand fetching for dynamic resources
                cached_content = fetch_cdn_resource_on_demand(cdn_path)

            if cached_content:
                # Determine mime type
                if cdn_path.endswith('.css'):
                    mime_type = "text/css"
                elif cdn_path.endswith('.js'):
                    mime_type = "application/javascript"
                elif cdn_path.endswith('.wasm'):
                    mime_type = "application/wasm"
                elif cdn_path.endswith('.json'):
                    mime_type = "application/json"
                else:
                    mime_type = "application/octet-stream"

                self.send_response(200)
                self.send_header("Content-type", mime_type)
                self.send_header("Cache-Control", "public, max-age=86400")
                self.send_header("Access-Control-Allow-Origin", "*")  # CORS for CDN resources
                self.end_headers()
                self.wfile.write(cached_content)
                return
            else:
                logger.log_warning(f"CDN resource not found: {cdn_path}")
                self.send_response(404)
                self.end_headers()
                self.wfile.write(b"404 - CDN resource not available")
                return

        # CHECK PLUGIN ROUTES
        plugin_result = plugin_manager.handle_request("/" + req_path, {"path": self.path})
        if plugin_result is not None:
            status, headers, body = plugin_result
            self.send_response(status)
            for key, value in headers.items():
                self.send_header(key, value)
            self.end_headers()
            self.wfile.write(body.encode("utf-8") if isinstance(body, str) else body)
            return

        # Handle markdown file downloads
        if req_path.startswith("markdown/"):
            markdown_filename = req_path[9:]

            if not markdown_filename.endswith(".md") or ".." in markdown_filename or "/" in markdown_filename:
                self.send_response(403)
                self.end_headers()
                self.wfile.write(b"403 - Forbidden: Only .md files allowed")
                return

            markdown_file_path = os.path.join(MARKDOWN_DIR, markdown_filename)

            if not os.path.exists(markdown_file_path) or not os.path.isfile(markdown_file_path):
                self.send_response(404)
                self.end_headers()
                self.wfile.write(b"404 - Markdown file not found")
                return

            resolved_path = os.path.realpath(markdown_file_path)
            resolved_markdown_dir = os.path.realpath(MARKDOWN_DIR)
            if not resolved_path.startswith(resolved_markdown_dir):
                self.send_response(403)
                self.end_headers()
                self.wfile.write(b"403 - Forbidden")
                return

            try:
                with open(markdown_file_path, "rb") as f:
                    content = f.read()

                self.send_response(200)
                self.send_header("Content-type", "text/markdown")
                self.send_header("Content-Disposition", f'attachment; filename="{markdown_filename}"')
                self.end_headers()
                self.wfile.write(content)
                logger.log_info(f"Served markdown file: {markdown_filename}")
                return

            except Exception as err:
                logger.log_error(f"Error serving markdown file {markdown_filename}: {err}")
                self.send_response(500)
                self.end_headers()
                self.wfile.write(b"500 - Internal Server Error")
                return

        # Handle Lua file downloads
        if req_path.startswith("lua/"):
            lua_filename = req_path[4:]

            if not lua_filename.endswith(".lua") or ".." in lua_filename or "/" in lua_filename:
                self.send_response(403)
                self.end_headers()
                self.wfile.write(b"403 - Forbidden: Only .lua files allowed")
                return

            lua_file_path = os.path.join(LUA_DIR, lua_filename)

            if not os.path.exists(lua_file_path) or not os.path.isfile(lua_file_path):
                self.send_response(404)
                self.end_headers()
                self.wfile.write(b"404 - Lua file not found")
                return

            resolved_path = os.path.realpath(lua_file_path)
            resolved_lua_dir = os.path.realpath(LUA_DIR)
            if not resolved_path.startswith(resolved_lua_dir):
                self.send_response(403)
                self.end_headers()
                self.wfile.write(b"403 - Forbidden")
                return

            try:
                with open(lua_file_path, "rb") as f:
                    content = f.read()

                self.send_response(200)
                self.send_header("Content-type", "text/x-lua")
                self.send_header("Content-Disposition", f'attachment; filename="{lua_filename}"')
                self.end_headers()
                self.wfile.write(content)
                logger.log_info(f"Served Lua file: {lua_filename}")
                return

            except Exception as err:
                logger.log_error(f"Error serving Lua file {lua_filename}: {err}")
                self.send_response(500)
                self.end_headers()
                self.wfile.write(b"500 - Internal Server Error")
                return

        # Handle other files with LRU caching for static assets
        file_path = os.path.normpath(os.path.join(PROJECT_ROOT, req_path))
        if not file_path.startswith(PROJECT_ROOT):
            self.send_response(403)
            self.end_headers()
            self.wfile.write(b"403 - Forbidden")
            return

        if os.path.isfile(file_path):
            try:
                # Use LRU cache for cacheable files
                if should_cache_file(file_path):
                    is_js = file_path.endswith('.js')
                    content, mime_type = load_file_cached(file_path, is_js)

                    self.send_response(200)
                    self.send_header("Content-type", mime_type)
                    self.send_header("Cache-Control", "public, max-age=3600")  # Cache for 1 hour
                    self.end_headers()
                    self.wfile.write(content)
                else:
                    # Non-cacheable files (e.g., HTML)
                    mime_type, _ = mimetypes.guess_type(file_path)
                    if mime_type is None:
                        mime_type = "application/octet-stream"

                    with open(file_path, "rb") as f:
                        content = f.read()

                    # Minify JS even if not cached
                    if mime_type == "application/javascript" or file_path.endswith(".js"):
                        try:
                            content = jsmin(content.decode("utf-8")).encode("utf-8")
                        except Exception as err:
                            logger.log_error(f"Error minifying JS file {file_path}: {err}")

                    self.send_response(200)
                    self.send_header("Content-type", mime_type)
                    self.end_headers()
                    self.wfile.write(content)

                return

            except Exception as err:
                logger.log_error(f"Error serving file {file_path}: {err}")
                self.send_response(500)
                self.end_headers()
                self.wfile.write(b"500 - Internal Server Error")
                return

        self.send_response(404)
        self.end_headers()
        self.wfile.write(b"404 - Not Found")


def run_pypost():
    """Run PyPost.py in a separate process."""
    script = os.path.join(PROJECT_ROOT, "PyPost.py")
    subprocess.run([sys.executable, script])

if __name__ == "__main__":
    try:
        # Fetch CDN resources on startup
        logger.log_info("Initializing CDN resource cache...")
        fetch_cdn_resources()
        logger.log_info("CDN resources ready!")

        # Start PyPost watcher
        threading.Thread(target=run_pypost, daemon=True).start()
        logger.log_debug("Started PyPost.py in background watcher thread.")

        server_address = ("localhost", 8000)
        httpd: HTTPServer = HTTPServer(server_address, WebServerHTTPRequestHandler)
        logger.log_info(f"Serving on http://{server_address[0]}:{server_address[1]}")
        logger.log_info(f"LRU cache enabled for static assets (max 512 files)")
        httpd.serve_forever()
    except (Exception, KeyboardInterrupt) as e:
        logger.log_info(f"Shutting down server.\n Reason: {e}")
        httpd.server_close()