Files
PyPost/webserver.py

531 lines
21 KiB
Python

import os
import sys
import threading
import subprocess
from http.server import BaseHTTPRequestHandler, HTTPServer
import mimetypes
import json
from jsmin import jsmin
from pathlib import Path
import requests
from functools import lru_cache
import hashlib
from typing import Optional, Tuple
from log.Logger import *
from lua import plugin_manager
from PyPost import extract_summary
logger = Logger()
plugin_manager = plugin_manager.PluginManager()
plugin_manager.load_all()
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
HTML_DIR = os.path.join(PROJECT_ROOT, "html")
MARKDOWN_DIR = os.path.join(PROJECT_ROOT, "markdown")
BASE_FILE = os.path.join(HTML_DIR, "base", "index.html")
LUA_DIR = Path(PROJECT_ROOT) / "lua" / "plugins"
CACHE_DIR = os.path.join(PROJECT_ROOT, "cache")
CDN_CACHE_DIR = os.path.join(CACHE_DIR, "cdn")
# CDN Resources to fetch and cache
CDN_RESOURCES = {
"/package/css/prism.min.css": "https://cdn.jsdelivr.net/npm/prismjs/themes/prism.min.css",
"/package/js/prism.min.js": "https://cdn.jsdelivr.net/npm/prismjs/prism.min.js",
"/package/js/prism-python.min.js": "https://cdn.jsdelivr.net/npm/prismjs/components/prism-python.min.js",
"/package/js/prism-javascript.min.js": "https://cdn.jsdelivr.net/npm/prismjs/components/prism-javascript.min.js",
"/package/js/mathjax.js": "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js",
}
# CDN base URLs for dynamic resource fetching
CDN_BASES = {
"mathjax": "https://cdn.jsdelivr.net/npm/mathjax@3/es5"
}
# File extensions to cache aggressively
CACHEABLE_EXTENSIONS = {'.css', '.js', '.webp', '.jpg', '.jpeg', '.png', '.gif', '.svg', '.woff', '.woff2', '.ttf', '.eot', '.ico'}
def ensure_cache_dirs():
"""Ensure cache directories exist"""
os.makedirs(CDN_CACHE_DIR, exist_ok=True)
logger.log_info(f"Cache directory ready: {CDN_CACHE_DIR}")
def fetch_cdn_resources():
"""Fetch all CDN resources on startup and cache them"""
ensure_cache_dirs()
logger.log_info("Fetching CDN resources...")
for local_path, cdn_url in CDN_RESOURCES.items():
try:
# Create filename from hash of URL for safe storage
url_hash = hashlib.md5(local_path.encode()).hexdigest()
cache_file = os.path.join(CDN_CACHE_DIR, url_hash)
# Check if already cached
if os.path.exists(cache_file):
logger.log_debug(f"CDN resource already cached: {local_path}")
continue
# Fetch resource
logger.log_info(f"Fetching {cdn_url}...")
response = requests.get(cdn_url, timeout=30)
response.raise_for_status()
# Save to cache
with open(cache_file, 'wb') as f:
f.write(response.content)
logger.log_info(f"Cached CDN resource: {local_path} ({len(response.content)} bytes)")
except Exception as e:
logger.log_error(f"Failed to fetch CDN resource {cdn_url}: {e}")
def fetch_cdn_resource_on_demand(local_path: str) -> Optional[bytes]:
"""
Fetch a CDN resource on-demand if not already cached.
Used for dynamically loaded resources like MathJax dependencies.
"""
# Try to map the request to a known CDN base
if local_path.startswith("/package/js/"):
relative_path = local_path[12:] # Remove "/package/js/"
# MathJax resources
if any(x in relative_path for x in ["a11y/", "input/", "output/", "ui/", "sre"]):
cdn_url = f"{CDN_BASES['mathjax']}/{relative_path}"
else:
return None
try:
url_hash = hashlib.md5(local_path.encode()).hexdigest()
cache_file = os.path.join(CDN_CACHE_DIR, url_hash)
# Check cache first
if os.path.exists(cache_file):
with open(cache_file, 'rb') as f:
return f.read()
# Fetch from CDN
logger.log_info(f"Fetching on-demand: {cdn_url}")
response = requests.get(cdn_url, timeout=10)
response.raise_for_status()
# Cache it
with open(cache_file, 'wb') as f:
f.write(response.content)
logger.log_info(f"Cached on-demand: {local_path}")
return response.content
except Exception as e:
logger.log_error(f"Failed to fetch on-demand resource {cdn_url}: {e}")
return None
return None
@lru_cache(maxsize=1024)
def load_file_cached(file_path: str, is_js: bool = False) -> Tuple[bytes, str]:
"""
LRU cached file loader for static assets.
Returns (content, mime_type)
"""
mime_type, _ = mimetypes.guess_type(file_path)
if mime_type is None:
mime_type = "application/octet-stream"
with open(file_path, "rb") as f:
content = f.read()
# Minify JS files
if is_js or mime_type == "application/javascript" or file_path.endswith(".js"):
try:
content = jsmin(content.decode("utf-8")).encode("utf-8")
except Exception as err:
logger.log_error(f"Error minifying JS file {file_path}: {err}")
return content, mime_type
def should_cache_file(file_path: str) -> bool:
"""Determine if a file should be LRU cached based on extension"""
ext = os.path.splitext(file_path)[1].lower()
return ext in CACHEABLE_EXTENSIONS
def get_html_files(directory=HTML_DIR):
html_files = []
for entry in os.listdir(directory):
full_path = os.path.join(directory, entry)
if os.path.isfile(full_path) and entry.endswith(".html"):
html_files.append(entry)
return html_files
def build_index_page() -> str:
with open(BASE_FILE, "r", encoding="utf-8") as f:
base_html = f.read()
articles = []
for md_path in Path(MARKDOWN_DIR).rglob("*.md"):
try:
summary_data = extract_summary(md_path)
if summary_data:
html_name, summary = summary_data
else:
html_name = md_path.stem + ".html"
summary = "No Summary for this Article!"
text = md_path.read_text(encoding="utf-8")
title = md_path.stem
for line in text.splitlines():
if line.startswith("# "):
title = line[2:].strip()
break
article_html = f"""
<article>
<h3><a href="/html/{html_name}">{title}</a></h3>
<p>{summary}</p>
</article>
"""
articles.append(article_html)
except Exception as e:
logger.log_warning(f"Exception with summary: {e} at {md_path}")
continue
full_content = "\n".join(articles) + "</main>" + index_footer()
return base_html.replace("<!-- CONTENT -->", full_content)
import base64
import random
import time
from hashes.hashes import hash_list
# Generate hashes only once at server start
H1 = random.choice(hash_list)
H2_CANDIDATES = [h for h in hash_list if h != H1]
H2 = random.choice(H2_CANDIDATES) if H2_CANDIDATES else H1
def index_footer() -> str:
tor_link = "http://7uhuxits7qfmiagkmpazxvh3rtk6aijs6pbawge3fl77y4xqjixlhkqd.onion/"
return f"""
<!-- Footer styling doesnt need to work with
flex, or anything else, because pagnation.
-->
<div class="footer">
<footer>
<p>
<!-- Server Time -->
<img src="../css/icons/date.webp" width="16" height="16" alt="date" loading="lazy" style="vertical-align: middle;" />
Server-Time (CET ; GMT+2): <i>{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}</i><br />
<!-- Hashes -->
<img src="../css/icons/magnifier.webp" width="16" height="16" alt="Hash2" loading="lazy" style="display:inline; vertical-align:middle;" />
Hash 1 (<b>UTF-8</b>)<i>:{base64.b64encode(H1.encode("utf-8")).decode("utf-8")}</i><br />
<img src="../css/icons/magnifier.webp" width="16" height="16" alt="Hash2" loading="lazy" style="display:inline; vertical-align:middle;" />
Hash 2 (<b>Windows-1252</b>)<i>:{base64.b64encode(H2.encode("windows-1252")).decode("windows-1252")}</i><br />
<!-- Git Repository Link -->
<img src="../css/icons/written.webp" width="16" height="16" alt="Hash2" loading="lazy" style="display:inline; vertical-align:middle;" />
<a style="text-decoration:none;color:#0066cc;font-style:italic;padding-top:5px;" href="https://rattatwinko.servecounterstrike.com/gitea/rattatwinko/PyPost">View Git-Repository</a><br />
<img src="../css/icons/script.webp" width="16" height="16" alt="Hash2" loading="lazy" style="display:inline; vertical-align:middle;" />
<a style="text-decoration:none;color:#0066cc;font-style:italic;padding-top:5px;" href="{tor_link}">View Tor Site</a>
</p>
</footer>
</div>
"""
class WebServerHTTPRequestHandler(BaseHTTPRequestHandler):
def _parse_post_data(self):
"""Parse POST request body"""
import json
content_length = int(self.headers.get('Content-Length', 0))
if content_length == 0:
return {}
post_data = self.rfile.read(content_length)
content_type = self.headers.get('Content-Type', '')
try:
if 'application/json' in content_type:
return json.loads(post_data.decode('utf-8'))
elif 'application/x-www-form-urlencoded' in content_type:
from urllib.parse import parse_qs
parsed = parse_qs(post_data.decode('utf-8'))
return {k: v[0] if len(v) == 1 else v for k, v in parsed.items()}
else:
return {"raw": post_data}
except Exception as e:
logger.log_error(f"Error parsing POST data: {e}")
return {"raw": post_data}
def do_POST(self):
"""Handle POST requests - primarily for plugin routes"""
req_path = self.path.lstrip("/")
post_data = self._parse_post_data()
request_data = {
"path": self.path,
"headers": dict(self.headers),
"data": post_data,
"method": "POST"
}
plugin_result = plugin_manager.handle_request("/" + req_path, request_data, method="POST")
if plugin_result is not None:
status, headers, body = plugin_result
self.send_response(status)
for key, value in headers.items():
self.send_header(key, value)
self.end_headers()
if isinstance(body, str):
self.wfile.write(body.encode("utf-8"))
elif isinstance(body, bytes):
self.wfile.write(body)
else:
self.wfile.write(str(body).encode("utf-8"))
return
self.send_response(404)
self.send_header("Content-type", "application/json")
self.end_headers()
error_response = json.dumps({"error": "Route not found"})
self.wfile.write(error_response.encode("utf-8"))
def do_GET(self):
req_path = self.path.lstrip("/")
# Handle root/index
if req_path == "" or req_path == "index.html":
content = build_index_page()
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(content.encode("utf-8"))
return
# Handle CDN package requests
if req_path.startswith("package/"):
cdn_path = "/" + req_path
# Try to get from pre-fetched cache
if cdn_path in CDN_RESOURCES:
url_hash = hashlib.md5(cdn_path.encode()).hexdigest()
cache_file = os.path.join(CDN_CACHE_DIR, url_hash)
if os.path.exists(cache_file):
with open(cache_file, 'rb') as f:
cached_content = f.read()
else:
cached_content = None
else:
# Try on-demand fetching for dynamic resources
cached_content = fetch_cdn_resource_on_demand(cdn_path)
if cached_content:
# Determine mime type
if cdn_path.endswith('.css'):
mime_type = "text/css"
elif cdn_path.endswith('.js'):
mime_type = "application/javascript"
elif cdn_path.endswith('.wasm'):
mime_type = "application/wasm"
elif cdn_path.endswith('.json'):
mime_type = "application/json"
else:
mime_type = "application/octet-stream"
self.send_response(200)
self.send_header("Content-type", mime_type)
self.send_header("Cache-Control", "public, max-age=86400")
self.send_header("Access-Control-Allow-Origin", "*") # CORS for CDN resources
self.end_headers()
self.wfile.write(cached_content)
return
else:
logger.log_warning(f"CDN resource not found: {cdn_path}")
self.send_response(404)
self.end_headers()
self.wfile.write(b"404 - CDN resource not available")
return
# CHECK PLUGIN ROUTES
plugin_result = plugin_manager.handle_request("/" + req_path, {"path": self.path})
if plugin_result is not None:
status, headers, body = plugin_result
self.send_response(status)
for key, value in headers.items():
self.send_header(key, value)
self.end_headers()
self.wfile.write(body.encode("utf-8") if isinstance(body, str) else body)
return
# Handle markdown file downloads
if req_path.startswith("markdown/"):
markdown_filename = req_path[9:]
if not markdown_filename.endswith(".md") or ".." in markdown_filename or "/" in markdown_filename:
self.send_response(403)
self.end_headers()
self.wfile.write(b"403 - Forbidden: Only .md files allowed")
return
markdown_file_path = os.path.join(MARKDOWN_DIR, markdown_filename)
if not os.path.exists(markdown_file_path) or not os.path.isfile(markdown_file_path):
self.send_response(404)
self.end_headers()
self.wfile.write(b"404 - Markdown file not found")
return
resolved_path = os.path.realpath(markdown_file_path)
resolved_markdown_dir = os.path.realpath(MARKDOWN_DIR)
if not resolved_path.startswith(resolved_markdown_dir):
self.send_response(403)
self.end_headers()
self.wfile.write(b"403 - Forbidden")
return
try:
with open(markdown_file_path, "rb") as f:
content = f.read()
self.send_response(200)
self.send_header("Content-type", "text/markdown")
self.send_header("Content-Disposition", f'attachment; filename="{markdown_filename}"')
self.end_headers()
self.wfile.write(content)
logger.log_info(f"Served markdown file: {markdown_filename}")
return
except Exception as err:
logger.log_error(f"Error serving markdown file {markdown_filename}: {err}")
self.send_response(500)
self.end_headers()
self.wfile.write(b"500 - Internal Server Error")
return
# Handle Lua file downloads
if req_path.startswith("lua/"):
lua_filename = req_path[4:]
if not lua_filename.endswith(".lua") or ".." in lua_filename or "/" in lua_filename:
self.send_response(403)
self.end_headers()
self.wfile.write(b"403 - Forbidden: Only .lua files allowed")
return
lua_file_path = os.path.join(LUA_DIR, lua_filename)
if not os.path.exists(lua_file_path) or not os.path.isfile(lua_file_path):
self.send_response(404)
self.end_headers()
self.wfile.write(b"404 - Lua file not found")
return
resolved_path = os.path.realpath(lua_file_path)
resolved_lua_dir = os.path.realpath(LUA_DIR)
if not resolved_path.startswith(resolved_lua_dir):
self.send_response(403)
self.end_headers()
self.wfile.write(b"403 - Forbidden")
return
try:
with open(lua_file_path, "rb") as f:
content = f.read()
self.send_response(200)
self.send_header("Content-type", "text/x-lua")
self.send_header("Content-Disposition", f'attachment; filename="{lua_filename}"')
self.end_headers()
self.wfile.write(content)
logger.log_info(f"Served Lua file: {lua_filename}")
return
except Exception as err:
logger.log_error(f"Error serving Lua file {lua_filename}: {err}")
self.send_response(500)
self.end_headers()
self.wfile.write(b"500 - Internal Server Error")
return
# Handle other files with LRU caching for static assets
file_path = os.path.normpath(os.path.join(PROJECT_ROOT, req_path))
if not file_path.startswith(PROJECT_ROOT):
self.send_response(403)
self.end_headers()
self.wfile.write(b"403 - Forbidden")
return
if os.path.isfile(file_path):
try:
# Use LRU cache for cacheable files
if should_cache_file(file_path):
is_js = file_path.endswith('.js')
content, mime_type = load_file_cached(file_path, is_js)
self.send_response(200)
self.send_header("Content-type", mime_type)
self.send_header("Cache-Control", "public, max-age=3600") # Cache for 1 hour
self.end_headers()
self.wfile.write(content)
else:
# Non-cacheable files (e.g., HTML)
mime_type, _ = mimetypes.guess_type(file_path)
if mime_type is None:
mime_type = "application/octet-stream"
with open(file_path, "rb") as f:
content = f.read()
# Minify JS even if not cached
if mime_type == "application/javascript" or file_path.endswith(".js"):
try:
content = jsmin(content.decode("utf-8")).encode("utf-8")
except Exception as err:
logger.log_error(f"Error minifying JS file {file_path}: {err}")
self.send_response(200)
self.send_header("Content-type", mime_type)
self.end_headers()
self.wfile.write(content)
return
except Exception as err:
logger.log_error(f"Error serving file {file_path}: {err}")
self.send_response(500)
self.end_headers()
self.wfile.write(b"500 - Internal Server Error")
return
self.send_response(404)
self.end_headers()
self.wfile.write(b"404 - Not Found")
def run_pypost():
"""Run PyPost.py in a separate process."""
script = os.path.join(PROJECT_ROOT, "PyPost.py")
subprocess.run([sys.executable, script])
if __name__ == "__main__":
try:
# Fetch CDN resources on startup
logger.log_info("Initializing CDN resource cache...")
fetch_cdn_resources()
logger.log_info("CDN resources ready!")
# Start PyPost watcher
threading.Thread(target=run_pypost, daemon=True).start()
logger.log_debug("Started PyPost.py in background watcher thread.")
server_address = ("localhost", 8000)
httpd: HTTPServer = HTTPServer(server_address, WebServerHTTPRequestHandler)
logger.log_info(f"Serving on http://{server_address[0]}:{server_address[1]}")
logger.log_info(f"LRU cache enabled for static assets (max 512 files)")
httpd.serve_forever()
except (Exception, KeyboardInterrupt) as e:
logger.log_info(f"Shutting down server.\n Reason: {e}")
httpd.server_close()