caching system with better performance and local CDN Package fetcher. changed stuff in template to use the lcdn
This commit is contained in:
362
webserver.py
362
webserver.py
@@ -5,8 +5,12 @@ import subprocess
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
import mimetypes
|
||||
import json
|
||||
from jsmin import jsmin # pip install jsmin
|
||||
from jsmin import jsmin
|
||||
from pathlib import Path
|
||||
import requests
|
||||
from functools import lru_cache
|
||||
import hashlib
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from log.Logger import *
|
||||
from lua import plugin_manager
|
||||
@@ -14,13 +18,136 @@ from PyPost import extract_summary
|
||||
|
||||
logger = Logger()
|
||||
plugin_manager = plugin_manager.PluginManager()
|
||||
plugin_manager.load_all() # load all plugins
|
||||
plugin_manager.load_all()
|
||||
|
||||
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
|
||||
HTML_DIR = os.path.join(PROJECT_ROOT, "html")
|
||||
MARKDOWN_DIR = os.path.join(PROJECT_ROOT, "markdown")
|
||||
BASE_FILE = os.path.join(HTML_DIR, "base", "index.html")
|
||||
LUA_DIR = Path(PROJECT_ROOT) / "lua" / "plugins"
|
||||
CACHE_DIR = os.path.join(PROJECT_ROOT, "cache")
|
||||
CDN_CACHE_DIR = os.path.join(CACHE_DIR, "cdn")
|
||||
|
||||
# CDN Resources to fetch and cache
|
||||
CDN_RESOURCES = {
|
||||
"/package/css/prism.min.css": "https://cdn.jsdelivr.net/npm/prismjs/themes/prism.min.css",
|
||||
"/package/js/prism.min.js": "https://cdn.jsdelivr.net/npm/prismjs/prism.min.js",
|
||||
"/package/js/prism-python.min.js": "https://cdn.jsdelivr.net/npm/prismjs/components/prism-python.min.js",
|
||||
"/package/js/prism-javascript.min.js": "https://cdn.jsdelivr.net/npm/prismjs/components/prism-javascript.min.js",
|
||||
"/package/js/mathjax.js": "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js",
|
||||
}
|
||||
|
||||
# CDN base URLs for dynamic resource fetching
|
||||
CDN_BASES = {
|
||||
"mathjax": "https://cdn.jsdelivr.net/npm/mathjax@3/es5"
|
||||
}
|
||||
|
||||
# File extensions to cache aggressively
|
||||
CACHEABLE_EXTENSIONS = {'.css', '.js', '.webp', '.jpg', '.jpeg', '.png', '.gif', '.svg', '.woff', '.woff2', '.ttf', '.eot', '.ico'}
|
||||
|
||||
def ensure_cache_dirs():
|
||||
"""Ensure cache directories exist"""
|
||||
os.makedirs(CDN_CACHE_DIR, exist_ok=True)
|
||||
logger.log_info(f"Cache directory ready: {CDN_CACHE_DIR}")
|
||||
|
||||
def fetch_cdn_resources():
|
||||
"""Fetch all CDN resources on startup and cache them"""
|
||||
ensure_cache_dirs()
|
||||
logger.log_info("Fetching CDN resources...")
|
||||
|
||||
for local_path, cdn_url in CDN_RESOURCES.items():
|
||||
try:
|
||||
# Create filename from hash of URL for safe storage
|
||||
url_hash = hashlib.md5(local_path.encode()).hexdigest()
|
||||
cache_file = os.path.join(CDN_CACHE_DIR, url_hash)
|
||||
|
||||
# Check if already cached
|
||||
if os.path.exists(cache_file):
|
||||
logger.log_debug(f"CDN resource already cached: {local_path}")
|
||||
continue
|
||||
|
||||
# Fetch resource
|
||||
logger.log_info(f"Fetching {cdn_url}...")
|
||||
response = requests.get(cdn_url, timeout=30)
|
||||
response.raise_for_status()
|
||||
|
||||
# Save to cache
|
||||
with open(cache_file, 'wb') as f:
|
||||
f.write(response.content)
|
||||
|
||||
logger.log_info(f"Cached CDN resource: {local_path} ({len(response.content)} bytes)")
|
||||
|
||||
except Exception as e:
|
||||
logger.log_error(f"Failed to fetch CDN resource {cdn_url}: {e}")
|
||||
|
||||
def fetch_cdn_resource_on_demand(local_path: str) -> Optional[bytes]:
|
||||
"""
|
||||
Fetch a CDN resource on-demand if not already cached.
|
||||
Used for dynamically loaded resources like MathJax dependencies.
|
||||
"""
|
||||
# Try to map the request to a known CDN base
|
||||
if local_path.startswith("/package/js/"):
|
||||
relative_path = local_path[12:] # Remove "/package/js/"
|
||||
|
||||
# MathJax resources
|
||||
if any(x in relative_path for x in ["a11y/", "input/", "output/", "ui/", "sre"]):
|
||||
cdn_url = f"{CDN_BASES['mathjax']}/{relative_path}"
|
||||
else:
|
||||
return None
|
||||
|
||||
try:
|
||||
url_hash = hashlib.md5(local_path.encode()).hexdigest()
|
||||
cache_file = os.path.join(CDN_CACHE_DIR, url_hash)
|
||||
|
||||
# Check cache first
|
||||
if os.path.exists(cache_file):
|
||||
with open(cache_file, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
# Fetch from CDN
|
||||
logger.log_info(f"Fetching on-demand: {cdn_url}")
|
||||
response = requests.get(cdn_url, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
# Cache it
|
||||
with open(cache_file, 'wb') as f:
|
||||
f.write(response.content)
|
||||
|
||||
logger.log_info(f"Cached on-demand: {local_path}")
|
||||
return response.content
|
||||
|
||||
except Exception as e:
|
||||
logger.log_error(f"Failed to fetch on-demand resource {cdn_url}: {e}")
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
@lru_cache(maxsize=1024)
|
||||
def load_file_cached(file_path: str, is_js: bool = False) -> Tuple[bytes, str]:
|
||||
"""
|
||||
LRU cached file loader for static assets.
|
||||
Returns (content, mime_type)
|
||||
"""
|
||||
mime_type, _ = mimetypes.guess_type(file_path)
|
||||
if mime_type is None:
|
||||
mime_type = "application/octet-stream"
|
||||
|
||||
with open(file_path, "rb") as f:
|
||||
content = f.read()
|
||||
|
||||
# Minify JS files
|
||||
if is_js or mime_type == "application/javascript" or file_path.endswith(".js"):
|
||||
try:
|
||||
content = jsmin(content.decode("utf-8")).encode("utf-8")
|
||||
except Exception as err:
|
||||
logger.log_error(f"Error minifying JS file {file_path}: {err}")
|
||||
|
||||
return content, mime_type
|
||||
|
||||
def should_cache_file(file_path: str) -> bool:
|
||||
"""Determine if a file should be LRU cached based on extension"""
|
||||
ext = os.path.splitext(file_path)[1].lower()
|
||||
return ext in CACHEABLE_EXTENSIONS
|
||||
|
||||
def get_html_files(directory=HTML_DIR):
|
||||
html_files = []
|
||||
@@ -68,9 +195,9 @@ def build_index_page() -> str:
|
||||
return base_html.replace("<!-- CONTENT -->", full_content)
|
||||
|
||||
|
||||
|
||||
import base64
|
||||
import random
|
||||
import time
|
||||
|
||||
from hashes.hashes import hash_list
|
||||
|
||||
@@ -79,7 +206,6 @@ H1 = random.choice(hash_list)
|
||||
H2_CANDIDATES = [h for h in hash_list if h != H1]
|
||||
H2 = random.choice(H2_CANDIDATES) if H2_CANDIDATES else H1
|
||||
|
||||
# cahcing was a bad, idea, servertime got stuck. it is now a variable ;)
|
||||
def index_footer() -> str:
|
||||
tor_link = "http://7uhuxits7qfmiagkmpazxvh3rtk6aijs6pbawge3fl77y4xqjixlhkqd.onion/"
|
||||
return f"""
|
||||
@@ -108,71 +234,66 @@ def index_footer() -> str:
|
||||
"""
|
||||
|
||||
class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
# This is a Helper Function for the POST Endpoints
|
||||
def _parse_post_data(self):
|
||||
"""Parse POST request body"""
|
||||
import json
|
||||
content_length = int(self.headers.get('Content-Length', 0))
|
||||
if content_length == 0:
|
||||
return {}
|
||||
|
||||
post_data = self.rfile.read(content_length)
|
||||
content_type = self.headers.get('Content-Type', '')
|
||||
|
||||
try:
|
||||
if 'application/json' in content_type:
|
||||
return json.loads(post_data.decode('utf-8'))
|
||||
elif 'application/x-www-form-urlencoded' in content_type:
|
||||
from urllib.parse import parse_qs
|
||||
parsed = parse_qs(post_data.decode('utf-8'))
|
||||
return {k: v[0] if len(v) == 1 else v for k, v in parsed.items()}
|
||||
else:
|
||||
return {"raw": post_data}
|
||||
except Exception as e:
|
||||
logger.log_error(f"Error parsing POST data: {e}")
|
||||
"""Parse POST request body"""
|
||||
import json
|
||||
content_length = int(self.headers.get('Content-Length', 0))
|
||||
if content_length == 0:
|
||||
return {}
|
||||
|
||||
post_data = self.rfile.read(content_length)
|
||||
content_type = self.headers.get('Content-Type', '')
|
||||
|
||||
try:
|
||||
if 'application/json' in content_type:
|
||||
return json.loads(post_data.decode('utf-8'))
|
||||
elif 'application/x-www-form-urlencoded' in content_type:
|
||||
from urllib.parse import parse_qs
|
||||
parsed = parse_qs(post_data.decode('utf-8'))
|
||||
return {k: v[0] if len(v) == 1 else v for k, v in parsed.items()}
|
||||
else:
|
||||
return {"raw": post_data}
|
||||
except Exception as e:
|
||||
logger.log_error(f"Error parsing POST data: {e}")
|
||||
return {"raw": post_data}
|
||||
|
||||
def do_POST(self):
|
||||
"""Handle POST requests - primarily for plugin routes"""
|
||||
req_path = self.path.lstrip("/")
|
||||
|
||||
# Parse POST data
|
||||
post_data = self._parse_post_data()
|
||||
|
||||
# Add additional request info
|
||||
request_data = {
|
||||
"path": self.path,
|
||||
"headers": dict(self.headers),
|
||||
"data": post_data,
|
||||
"method": "POST"
|
||||
}
|
||||
|
||||
# Check plugin routes
|
||||
plugin_result = plugin_manager.handle_request("/" + req_path, request_data, method="POST")
|
||||
if plugin_result is not None:
|
||||
status, headers, body = plugin_result
|
||||
self.send_response(status)
|
||||
for key, value in headers.items():
|
||||
self.send_header(key, value)
|
||||
self.end_headers()
|
||||
|
||||
if isinstance(body, str):
|
||||
self.wfile.write(body.encode("utf-8"))
|
||||
elif isinstance(body, bytes):
|
||||
self.wfile.write(body)
|
||||
else:
|
||||
self.wfile.write(str(body).encode("utf-8"))
|
||||
return
|
||||
|
||||
# No plugin handled this POST request
|
||||
self.send_response(404)
|
||||
self.send_header("Content-type", "application/json")
|
||||
"""Handle POST requests - primarily for plugin routes"""
|
||||
req_path = self.path.lstrip("/")
|
||||
|
||||
post_data = self._parse_post_data()
|
||||
|
||||
request_data = {
|
||||
"path": self.path,
|
||||
"headers": dict(self.headers),
|
||||
"data": post_data,
|
||||
"method": "POST"
|
||||
}
|
||||
|
||||
plugin_result = plugin_manager.handle_request("/" + req_path, request_data, method="POST")
|
||||
if plugin_result is not None:
|
||||
status, headers, body = plugin_result
|
||||
self.send_response(status)
|
||||
for key, value in headers.items():
|
||||
self.send_header(key, value)
|
||||
self.end_headers()
|
||||
error_response = json.dumps({"error": "Route not found"})
|
||||
self.wfile.write(error_response.encode("utf-8"))
|
||||
|
||||
if isinstance(body, str):
|
||||
self.wfile.write(body.encode("utf-8"))
|
||||
elif isinstance(body, bytes):
|
||||
self.wfile.write(body)
|
||||
else:
|
||||
self.wfile.write(str(body).encode("utf-8"))
|
||||
return
|
||||
|
||||
self.send_response(404)
|
||||
self.send_header("Content-type", "application/json")
|
||||
self.end_headers()
|
||||
error_response = json.dumps({"error": "Route not found"})
|
||||
self.wfile.write(error_response.encode("utf-8"))
|
||||
|
||||
def do_GET(self):
|
||||
req_path = self.path.lstrip("/") # normalize leading /
|
||||
req_path = self.path.lstrip("/")
|
||||
|
||||
# Handle root/index
|
||||
if req_path == "" or req_path == "index.html":
|
||||
@@ -183,7 +304,52 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
self.wfile.write(content.encode("utf-8"))
|
||||
return
|
||||
|
||||
# CHECK PLUGIN ROUTES FIRST
|
||||
# Handle CDN package requests
|
||||
if req_path.startswith("package/"):
|
||||
cdn_path = "/" + req_path
|
||||
|
||||
# Try to get from pre-fetched cache
|
||||
if cdn_path in CDN_RESOURCES:
|
||||
url_hash = hashlib.md5(cdn_path.encode()).hexdigest()
|
||||
cache_file = os.path.join(CDN_CACHE_DIR, url_hash)
|
||||
|
||||
if os.path.exists(cache_file):
|
||||
with open(cache_file, 'rb') as f:
|
||||
cached_content = f.read()
|
||||
else:
|
||||
cached_content = None
|
||||
else:
|
||||
# Try on-demand fetching for dynamic resources
|
||||
cached_content = fetch_cdn_resource_on_demand(cdn_path)
|
||||
|
||||
if cached_content:
|
||||
# Determine mime type
|
||||
if cdn_path.endswith('.css'):
|
||||
mime_type = "text/css"
|
||||
elif cdn_path.endswith('.js'):
|
||||
mime_type = "application/javascript"
|
||||
elif cdn_path.endswith('.wasm'):
|
||||
mime_type = "application/wasm"
|
||||
elif cdn_path.endswith('.json'):
|
||||
mime_type = "application/json"
|
||||
else:
|
||||
mime_type = "application/octet-stream"
|
||||
|
||||
self.send_response(200)
|
||||
self.send_header("Content-type", mime_type)
|
||||
self.send_header("Cache-Control", "public, max-age=86400")
|
||||
self.send_header("Access-Control-Allow-Origin", "*") # CORS for CDN resources
|
||||
self.end_headers()
|
||||
self.wfile.write(cached_content)
|
||||
return
|
||||
else:
|
||||
logger.log_warning(f"CDN resource not found: {cdn_path}")
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
self.wfile.write(b"404 - CDN resource not available")
|
||||
return
|
||||
|
||||
# CHECK PLUGIN ROUTES
|
||||
plugin_result = plugin_manager.handle_request("/" + req_path, {"path": self.path})
|
||||
if plugin_result is not None:
|
||||
status, headers, body = plugin_result
|
||||
@@ -196,9 +362,8 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
|
||||
# Handle markdown file downloads
|
||||
if req_path.startswith("markdown/"):
|
||||
markdown_filename = req_path[9:] # Remove "markdown/" prefix
|
||||
markdown_filename = req_path[9:]
|
||||
|
||||
# Security check
|
||||
if not markdown_filename.endswith(".md") or ".." in markdown_filename or "/" in markdown_filename:
|
||||
self.send_response(403)
|
||||
self.end_headers()
|
||||
@@ -242,9 +407,8 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
|
||||
# Handle Lua file downloads
|
||||
if req_path.startswith("lua/"):
|
||||
lua_filename = req_path[4:] # Remove "lua/" prefix
|
||||
lua_filename = req_path[4:]
|
||||
|
||||
# Security check
|
||||
if not lua_filename.endswith(".lua") or ".." in lua_filename or "/" in lua_filename:
|
||||
self.send_response(403)
|
||||
self.end_headers()
|
||||
@@ -286,7 +450,7 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
self.wfile.write(b"500 - Internal Server Error")
|
||||
return
|
||||
|
||||
# Handle other files (existing functionality)
|
||||
# Handle other files with LRU caching for static assets
|
||||
file_path = os.path.normpath(os.path.join(PROJECT_ROOT, req_path))
|
||||
if not file_path.startswith(PROJECT_ROOT):
|
||||
self.send_response(403)
|
||||
@@ -295,25 +459,46 @@ class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
return
|
||||
|
||||
if os.path.isfile(file_path):
|
||||
mime_type, _ = mimetypes.guess_type(file_path)
|
||||
if mime_type is None:
|
||||
mime_type = "application/octet-stream"
|
||||
try:
|
||||
# Use LRU cache for cacheable files
|
||||
if should_cache_file(file_path):
|
||||
is_js = file_path.endswith('.js')
|
||||
content, mime_type = load_file_cached(file_path, is_js)
|
||||
|
||||
self.send_response(200)
|
||||
self.send_header("Content-type", mime_type)
|
||||
self.send_header("Cache-Control", "public, max-age=3600") # Cache for 1 hour
|
||||
self.end_headers()
|
||||
self.wfile.write(content)
|
||||
else:
|
||||
# Non-cacheable files (e.g., HTML)
|
||||
mime_type, _ = mimetypes.guess_type(file_path)
|
||||
if mime_type is None:
|
||||
mime_type = "application/octet-stream"
|
||||
|
||||
with open(file_path, "rb") as f:
|
||||
content = f.read()
|
||||
with open(file_path, "rb") as f:
|
||||
content = f.read()
|
||||
|
||||
# Obfuscate JS on the fly
|
||||
if mime_type == "application/javascript" or file_path.endswith(".js"):
|
||||
try:
|
||||
content = jsmin(content.decode("utf-8")).encode("utf-8")
|
||||
except Exception as err:
|
||||
logger.log_error(f"Error minifying JS file {file_path}: {err}")
|
||||
# Minify JS even if not cached
|
||||
if mime_type == "application/javascript" or file_path.endswith(".js"):
|
||||
try:
|
||||
content = jsmin(content.decode("utf-8")).encode("utf-8")
|
||||
except Exception as err:
|
||||
logger.log_error(f"Error minifying JS file {file_path}: {err}")
|
||||
|
||||
self.send_response(200)
|
||||
self.send_header("Content-type", mime_type)
|
||||
self.end_headers()
|
||||
self.wfile.write(content)
|
||||
return
|
||||
self.send_response(200)
|
||||
self.send_header("Content-type", mime_type)
|
||||
self.end_headers()
|
||||
self.wfile.write(content)
|
||||
|
||||
return
|
||||
|
||||
except Exception as err:
|
||||
logger.log_error(f"Error serving file {file_path}: {err}")
|
||||
self.send_response(500)
|
||||
self.end_headers()
|
||||
self.wfile.write(b"500 - Internal Server Error")
|
||||
return
|
||||
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
@@ -327,12 +512,19 @@ def run_pypost():
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
# Fetch CDN resources on startup
|
||||
logger.log_info("Initializing CDN resource cache...")
|
||||
fetch_cdn_resources()
|
||||
logger.log_info("CDN resources ready!")
|
||||
|
||||
# Start PyPost watcher
|
||||
threading.Thread(target=run_pypost, daemon=True).start()
|
||||
logger.log_debug("Started PyPost.py in background watcher thread.")
|
||||
|
||||
server_address = ("localhost", 8000)
|
||||
httpd: HTTPServer = HTTPServer(server_address, WebServerHTTPRequestHandler) # type: ignore[arg-type]
|
||||
httpd: HTTPServer = HTTPServer(server_address, WebServerHTTPRequestHandler)
|
||||
logger.log_info(f"Serving on http://{server_address[0]}:{server_address[1]}")
|
||||
logger.log_info(f"LRU cache enabled for static assets (max 512 files)")
|
||||
httpd.serve_forever()
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
logger.log_info(f"Shutting down server.\n Reason: {e}")
|
||||
|
||||
Reference in New Issue
Block a user