caching system with better performance and local CDN Package fetcher. changed stuff in template to use the lcdn

This commit is contained in:
2025-10-10 20:56:48 +02:00
parent 3494298330
commit a7847f6bff
7 changed files with 301 additions and 238 deletions

View File

@@ -5,8 +5,12 @@ import subprocess
from http.server import BaseHTTPRequestHandler, HTTPServer
import mimetypes
import json
from jsmin import jsmin # pip install jsmin
from jsmin import jsmin
from pathlib import Path
import requests
from functools import lru_cache
import hashlib
from typing import Optional, Tuple
from log.Logger import *
from lua import plugin_manager
@@ -14,13 +18,136 @@ from PyPost import extract_summary
logger = Logger()
plugin_manager = plugin_manager.PluginManager()
plugin_manager.load_all() # load all plugins
plugin_manager.load_all()
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
HTML_DIR = os.path.join(PROJECT_ROOT, "html")
MARKDOWN_DIR = os.path.join(PROJECT_ROOT, "markdown")
BASE_FILE = os.path.join(HTML_DIR, "base", "index.html")
LUA_DIR = Path(PROJECT_ROOT) / "lua" / "plugins"
CACHE_DIR = os.path.join(PROJECT_ROOT, "cache")
CDN_CACHE_DIR = os.path.join(CACHE_DIR, "cdn")
# CDN Resources to fetch and cache
CDN_RESOURCES = {
"/package/css/prism.min.css": "https://cdn.jsdelivr.net/npm/prismjs/themes/prism.min.css",
"/package/js/prism.min.js": "https://cdn.jsdelivr.net/npm/prismjs/prism.min.js",
"/package/js/prism-python.min.js": "https://cdn.jsdelivr.net/npm/prismjs/components/prism-python.min.js",
"/package/js/prism-javascript.min.js": "https://cdn.jsdelivr.net/npm/prismjs/components/prism-javascript.min.js",
"/package/js/mathjax.js": "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js",
}
# CDN base URLs for dynamic resource fetching
CDN_BASES = {
"mathjax": "https://cdn.jsdelivr.net/npm/mathjax@3/es5"
}
# File extensions to cache aggressively
CACHEABLE_EXTENSIONS = {'.css', '.js', '.webp', '.jpg', '.jpeg', '.png', '.gif', '.svg', '.woff', '.woff2', '.ttf', '.eot', '.ico'}
def ensure_cache_dirs():
"""Ensure cache directories exist"""
os.makedirs(CDN_CACHE_DIR, exist_ok=True)
logger.log_info(f"Cache directory ready: {CDN_CACHE_DIR}")
def fetch_cdn_resources():
"""Fetch all CDN resources on startup and cache them"""
ensure_cache_dirs()
logger.log_info("Fetching CDN resources...")
for local_path, cdn_url in CDN_RESOURCES.items():
try:
# Create filename from hash of URL for safe storage
url_hash = hashlib.md5(local_path.encode()).hexdigest()
cache_file = os.path.join(CDN_CACHE_DIR, url_hash)
# Check if already cached
if os.path.exists(cache_file):
logger.log_debug(f"CDN resource already cached: {local_path}")
continue
# Fetch resource
logger.log_info(f"Fetching {cdn_url}...")
response = requests.get(cdn_url, timeout=30)
response.raise_for_status()
# Save to cache
with open(cache_file, 'wb') as f:
f.write(response.content)
logger.log_info(f"Cached CDN resource: {local_path} ({len(response.content)} bytes)")
except Exception as e:
logger.log_error(f"Failed to fetch CDN resource {cdn_url}: {e}")
def fetch_cdn_resource_on_demand(local_path: str) -> Optional[bytes]:
"""
Fetch a CDN resource on-demand if not already cached.
Used for dynamically loaded resources like MathJax dependencies.
"""
# Try to map the request to a known CDN base
if local_path.startswith("/package/js/"):
relative_path = local_path[12:] # Remove "/package/js/"
# MathJax resources
if any(x in relative_path for x in ["a11y/", "input/", "output/", "ui/", "sre"]):
cdn_url = f"{CDN_BASES['mathjax']}/{relative_path}"
else:
return None
try:
url_hash = hashlib.md5(local_path.encode()).hexdigest()
cache_file = os.path.join(CDN_CACHE_DIR, url_hash)
# Check cache first
if os.path.exists(cache_file):
with open(cache_file, 'rb') as f:
return f.read()
# Fetch from CDN
logger.log_info(f"Fetching on-demand: {cdn_url}")
response = requests.get(cdn_url, timeout=10)
response.raise_for_status()
# Cache it
with open(cache_file, 'wb') as f:
f.write(response.content)
logger.log_info(f"Cached on-demand: {local_path}")
return response.content
except Exception as e:
logger.log_error(f"Failed to fetch on-demand resource {cdn_url}: {e}")
return None
return None
@lru_cache(maxsize=1024)
def load_file_cached(file_path: str, is_js: bool = False) -> Tuple[bytes, str]:
"""
LRU cached file loader for static assets.
Returns (content, mime_type)
"""
mime_type, _ = mimetypes.guess_type(file_path)
if mime_type is None:
mime_type = "application/octet-stream"
with open(file_path, "rb") as f:
content = f.read()
# Minify JS files
if is_js or mime_type == "application/javascript" or file_path.endswith(".js"):
try:
content = jsmin(content.decode("utf-8")).encode("utf-8")
except Exception as err:
logger.log_error(f"Error minifying JS file {file_path}: {err}")
return content, mime_type
def should_cache_file(file_path: str) -> bool:
"""Determine if a file should be LRU cached based on extension"""
ext = os.path.splitext(file_path)[1].lower()
return ext in CACHEABLE_EXTENSIONS
def get_html_files(directory=HTML_DIR):
html_files = []
@@ -68,9 +195,9 @@ def build_index_page() -> str:
return base_html.replace("<!-- CONTENT -->", full_content)
import base64
import random
import time
from hashes.hashes import hash_list
@@ -79,7 +206,6 @@ H1 = random.choice(hash_list)
H2_CANDIDATES = [h for h in hash_list if h != H1]
H2 = random.choice(H2_CANDIDATES) if H2_CANDIDATES else H1
# cahcing was a bad, idea, servertime got stuck. it is now a variable ;)
def index_footer() -> str:
tor_link = "http://7uhuxits7qfmiagkmpazxvh3rtk6aijs6pbawge3fl77y4xqjixlhkqd.onion/"
return f"""
@@ -108,71 +234,66 @@ def index_footer() -> str:
"""
class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
# This is a Helper Function for the POST Endpoints
def _parse_post_data(self):
"""Parse POST request body"""
import json
content_length = int(self.headers.get('Content-Length', 0))
if content_length == 0:
return {}
post_data = self.rfile.read(content_length)
content_type = self.headers.get('Content-Type', '')
try:
if 'application/json' in content_type:
return json.loads(post_data.decode('utf-8'))
elif 'application/x-www-form-urlencoded' in content_type:
from urllib.parse import parse_qs
parsed = parse_qs(post_data.decode('utf-8'))
return {k: v[0] if len(v) == 1 else v for k, v in parsed.items()}
else:
return {"raw": post_data}
except Exception as e:
logger.log_error(f"Error parsing POST data: {e}")
"""Parse POST request body"""
import json
content_length = int(self.headers.get('Content-Length', 0))
if content_length == 0:
return {}
post_data = self.rfile.read(content_length)
content_type = self.headers.get('Content-Type', '')
try:
if 'application/json' in content_type:
return json.loads(post_data.decode('utf-8'))
elif 'application/x-www-form-urlencoded' in content_type:
from urllib.parse import parse_qs
parsed = parse_qs(post_data.decode('utf-8'))
return {k: v[0] if len(v) == 1 else v for k, v in parsed.items()}
else:
return {"raw": post_data}
except Exception as e:
logger.log_error(f"Error parsing POST data: {e}")
return {"raw": post_data}
def do_POST(self):
"""Handle POST requests - primarily for plugin routes"""
req_path = self.path.lstrip("/")
# Parse POST data
post_data = self._parse_post_data()
# Add additional request info
request_data = {
"path": self.path,
"headers": dict(self.headers),
"data": post_data,
"method": "POST"
}
# Check plugin routes
plugin_result = plugin_manager.handle_request("/" + req_path, request_data, method="POST")
if plugin_result is not None:
status, headers, body = plugin_result
self.send_response(status)
for key, value in headers.items():
self.send_header(key, value)
self.end_headers()
if isinstance(body, str):
self.wfile.write(body.encode("utf-8"))
elif isinstance(body, bytes):
self.wfile.write(body)
else:
self.wfile.write(str(body).encode("utf-8"))
return
# No plugin handled this POST request
self.send_response(404)
self.send_header("Content-type", "application/json")
"""Handle POST requests - primarily for plugin routes"""
req_path = self.path.lstrip("/")
post_data = self._parse_post_data()
request_data = {
"path": self.path,
"headers": dict(self.headers),
"data": post_data,
"method": "POST"
}
plugin_result = plugin_manager.handle_request("/" + req_path, request_data, method="POST")
if plugin_result is not None:
status, headers, body = plugin_result
self.send_response(status)
for key, value in headers.items():
self.send_header(key, value)
self.end_headers()
error_response = json.dumps({"error": "Route not found"})
self.wfile.write(error_response.encode("utf-8"))
if isinstance(body, str):
self.wfile.write(body.encode("utf-8"))
elif isinstance(body, bytes):
self.wfile.write(body)
else:
self.wfile.write(str(body).encode("utf-8"))
return
self.send_response(404)
self.send_header("Content-type", "application/json")
self.end_headers()
error_response = json.dumps({"error": "Route not found"})
self.wfile.write(error_response.encode("utf-8"))
def do_GET(self):
req_path = self.path.lstrip("/") # normalize leading /
req_path = self.path.lstrip("/")
# Handle root/index
if req_path == "" or req_path == "index.html":
@@ -183,7 +304,52 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
self.wfile.write(content.encode("utf-8"))
return
# CHECK PLUGIN ROUTES FIRST
# Handle CDN package requests
if req_path.startswith("package/"):
cdn_path = "/" + req_path
# Try to get from pre-fetched cache
if cdn_path in CDN_RESOURCES:
url_hash = hashlib.md5(cdn_path.encode()).hexdigest()
cache_file = os.path.join(CDN_CACHE_DIR, url_hash)
if os.path.exists(cache_file):
with open(cache_file, 'rb') as f:
cached_content = f.read()
else:
cached_content = None
else:
# Try on-demand fetching for dynamic resources
cached_content = fetch_cdn_resource_on_demand(cdn_path)
if cached_content:
# Determine mime type
if cdn_path.endswith('.css'):
mime_type = "text/css"
elif cdn_path.endswith('.js'):
mime_type = "application/javascript"
elif cdn_path.endswith('.wasm'):
mime_type = "application/wasm"
elif cdn_path.endswith('.json'):
mime_type = "application/json"
else:
mime_type = "application/octet-stream"
self.send_response(200)
self.send_header("Content-type", mime_type)
self.send_header("Cache-Control", "public, max-age=86400")
self.send_header("Access-Control-Allow-Origin", "*") # CORS for CDN resources
self.end_headers()
self.wfile.write(cached_content)
return
else:
logger.log_warning(f"CDN resource not found: {cdn_path}")
self.send_response(404)
self.end_headers()
self.wfile.write(b"404 - CDN resource not available")
return
# CHECK PLUGIN ROUTES
plugin_result = plugin_manager.handle_request("/" + req_path, {"path": self.path})
if plugin_result is not None:
status, headers, body = plugin_result
@@ -196,9 +362,8 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
# Handle markdown file downloads
if req_path.startswith("markdown/"):
markdown_filename = req_path[9:] # Remove "markdown/" prefix
markdown_filename = req_path[9:]
# Security check
if not markdown_filename.endswith(".md") or ".." in markdown_filename or "/" in markdown_filename:
self.send_response(403)
self.end_headers()
@@ -242,9 +407,8 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
# Handle Lua file downloads
if req_path.startswith("lua/"):
lua_filename = req_path[4:] # Remove "lua/" prefix
lua_filename = req_path[4:]
# Security check
if not lua_filename.endswith(".lua") or ".." in lua_filename or "/" in lua_filename:
self.send_response(403)
self.end_headers()
@@ -286,7 +450,7 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
self.wfile.write(b"500 - Internal Server Error")
return
# Handle other files (existing functionality)
# Handle other files with LRU caching for static assets
file_path = os.path.normpath(os.path.join(PROJECT_ROOT, req_path))
if not file_path.startswith(PROJECT_ROOT):
self.send_response(403)
@@ -295,25 +459,46 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
return
if os.path.isfile(file_path):
mime_type, _ = mimetypes.guess_type(file_path)
if mime_type is None:
mime_type = "application/octet-stream"
try:
# Use LRU cache for cacheable files
if should_cache_file(file_path):
is_js = file_path.endswith('.js')
content, mime_type = load_file_cached(file_path, is_js)
self.send_response(200)
self.send_header("Content-type", mime_type)
self.send_header("Cache-Control", "public, max-age=3600") # Cache for 1 hour
self.end_headers()
self.wfile.write(content)
else:
# Non-cacheable files (e.g., HTML)
mime_type, _ = mimetypes.guess_type(file_path)
if mime_type is None:
mime_type = "application/octet-stream"
with open(file_path, "rb") as f:
content = f.read()
with open(file_path, "rb") as f:
content = f.read()
# Obfuscate JS on the fly
if mime_type == "application/javascript" or file_path.endswith(".js"):
try:
content = jsmin(content.decode("utf-8")).encode("utf-8")
except Exception as err:
logger.log_error(f"Error minifying JS file {file_path}: {err}")
# Minify JS even if not cached
if mime_type == "application/javascript" or file_path.endswith(".js"):
try:
content = jsmin(content.decode("utf-8")).encode("utf-8")
except Exception as err:
logger.log_error(f"Error minifying JS file {file_path}: {err}")
self.send_response(200)
self.send_header("Content-type", mime_type)
self.end_headers()
self.wfile.write(content)
return
self.send_response(200)
self.send_header("Content-type", mime_type)
self.end_headers()
self.wfile.write(content)
return
except Exception as err:
logger.log_error(f"Error serving file {file_path}: {err}")
self.send_response(500)
self.end_headers()
self.wfile.write(b"500 - Internal Server Error")
return
self.send_response(404)
self.end_headers()
@@ -327,12 +512,19 @@ def run_pypost():
if __name__ == "__main__":
try:
# Fetch CDN resources on startup
logger.log_info("Initializing CDN resource cache...")
fetch_cdn_resources()
logger.log_info("CDN resources ready!")
# Start PyPost watcher
threading.Thread(target=run_pypost, daemon=True).start()
logger.log_debug("Started PyPost.py in background watcher thread.")
server_address = ("localhost", 8000)
httpd: HTTPServer = HTTPServer(server_address, WebServerHTTPRequestHandler) # type: ignore[arg-type]
httpd: HTTPServer = HTTPServer(server_address, WebServerHTTPRequestHandler)
logger.log_info(f"Serving on http://{server_address[0]}:{server_address[1]}")
logger.log_info(f"LRU cache enabled for static assets (max 512 files)")
httpd.serve_forever()
except (Exception, KeyboardInterrupt) as e:
logger.log_info(f"Shutting down server.\n Reason: {e}")