rust parser ; yaaayy i can now claim this project as fast.

This commit is contained in:
2025-09-23 17:47:30 +02:00
parent c7f0fae19b
commit a149009559
7 changed files with 211 additions and 9 deletions

4
.gitignore vendored
View File

@@ -1,2 +1,4 @@
__pycache__
env
env
target
Cargo.lock

156
PyPost.py
View File

@@ -2,6 +2,8 @@
import os
import sys
import time
import subprocess
import platform
from pathlib import Path
import marko
@@ -18,14 +20,69 @@ ROOT = Path(os.path.abspath("."))
MARKDOWN_DIR = ROOT / "markdown"
HTML_DIR = ROOT / "html"
# Determine executable extension based on OS
exe_ext = ".exe" if platform.system() == "Windows" else ""
RUST_PARSER_PATH = ROOT / "fastmd" / "target" / "release" / f"fastmd{exe_ext}"
# Create markdown parser with table support
# Fallback to debug build if release not found
if not RUST_PARSER_PATH.exists():
RUST_PARSER_PATH = ROOT / "fastmd" / "target" / "debug" / f"fastmd{exe_ext}"
# Create Python markdown parser with table support (fallback for small files)
markdown_parser = marko.Markdown(extensions=[GFM])
# Threshold for switching to Rust parser (number of lines)
RUST_PARSER_THRESHOLD = 500
Logger = Logger()
# Global obfuscate flag, default True
obfuscate = True
def count_lines_in_file(file_path: Path) -> int:
"""Count the number of lines in a file."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
return sum(1 for _ in f)
except Exception as e:
Logger.log_error(f"Could not count lines in {file_path}: {e}")
return 0
def should_use_rust_parser(md_path: Path) -> bool:
"""Determine if we should use the Rust parser based on file size."""
if not RUST_PARSER_PATH.exists():
return False
line_count = count_lines_in_file(md_path)
use_rust = line_count > RUST_PARSER_THRESHOLD
if use_rust:
Logger.log_rust_usage(f"Using Rust parser for {md_path} ({line_count} lines)")
else:
Logger.log_debug(f"Using Python parser for {md_path} ({line_count} lines)")
return use_rust
def parse_markdown_with_rust(md_path: Path) -> str:
"""Parse markdown using the Rust parser."""
try:
# Run the Rust parser
result = subprocess.run(
[str(RUST_PARSER_PATH), str(md_path)],
capture_output=True,
text=True,
encoding='utf-8',
check=True
)
return result.stdout
except subprocess.CalledProcessError as e:
Logger.log_error(f"Rust parser failed for {md_path}: {e}")
Logger.log_error(f"stderr: {e.stderr}")
raise
except Exception as e:
Logger.log_error(f"Error running Rust parser for {md_path}: {e}")
raise
def render_markdown(md_path: Path):
"""Render a single markdown file to an obfuscated HTML file."""
try:
@@ -34,7 +91,15 @@ def render_markdown(md_path: Path):
Logger.log_error(f"Could not read {md_path}: {e}")
return
html_body = markdown_parser.convert(text)
# Decide which parser to use based on file size
if should_use_rust_parser(md_path):
try:
html_body = parse_markdown_with_rust(md_path)
except Exception as e:
Logger.log_warning(f"Rust parser failed for {md_path}, falling back to Python parser: {e}")
html_body = markdown_parser.convert(text)
else:
html_body = markdown_parser.convert(text)
# Extract title from filename or first H1
title = md_path.stem
@@ -97,7 +162,6 @@ def render_markdown(md_path: Path):
# Create parent directories if needed
out_path.parent.mkdir(parents=True, exist_ok=True)
if obfuscate:
out_path.write_text(obfuscated_html, encoding="utf-8")
else:
@@ -119,7 +183,48 @@ def initial_scan(markdown_dir: Path):
render_markdown(md)
def build_rust_parser() -> bool:
"""Attempt to build the Rust parser using cargo."""
fastmd_dir = ROOT / "fastmd"
if not fastmd_dir.exists():
Logger.log_error(f"fastmd directory not found at {fastmd_dir}")
return False
cargo_toml = fastmd_dir / "Cargo.toml"
if not cargo_toml.exists():
Logger.log_error(f"Cargo.toml not found at {cargo_toml}")
return False
Logger.log_info("Attempting to build Rust parser with 'cargo build --release'...")
try:
# Run cargo build --release in the fastmd directory
result = subprocess.run(
["cargo", "build", "--release"],
cwd=str(fastmd_dir),
capture_output=True,
text=True,
check=True
)
Logger.log_info("Rust parser built successfully!")
Logger.log_debug(f"Build output: {result.stdout}")
return True
except subprocess.CalledProcessError as e:
Logger.log_error(f"Failed to build Rust parser: {e}")
Logger.log_error(f"Build stderr: {e.stderr}")
return False
except FileNotFoundError:
Logger.log_error("cargo command not found. Please install Rust and Cargo.")
return False
except Exception as e:
Logger.log_error(f"Unexpected error building Rust parser: {e}")
return False
if __name__ == "__main__":
# Check for markdown directory
if not MARKDOWN_DIR.exists():
alt_root = ROOT / "PyPost"
if alt_root.exists() and alt_root.is_dir():
@@ -127,13 +232,46 @@ if __name__ == "__main__":
ROOT = alt_root
MARKDOWN_DIR = ROOT / "markdown"
HTML_DIR = ROOT / "html"
# Update Rust parser path for new root
RUST_PARSER_PATH = ROOT / "fastmd" / "target" / "release" / f"fastmd{exe_ext}"
if not RUST_PARSER_PATH.exists():
RUST_PARSER_PATH = ROOT / "fastmd" / "target" / "debug" / f"fastmd{exe_ext}"
else:
Logger.log_error(f"Markdown directory not found: {MARKDOWN_DIR}")
Logger.log_warning("Please create a 'markdown' directory or use a 'PyPost' directory with one inside it.")
sys.exit(1)
# Check if Rust parser exists, if not try to build it
if not RUST_PARSER_PATH.exists():
Logger.log_warning(f"Rust parser not found at {RUST_PARSER_PATH}")
# Try to build the Rust parser
if build_rust_parser():
# Update path after successful build
RUST_PARSER_PATH = ROOT / "fastmd" / "target" / "release" / f"fastmd{exe_ext}"
if not RUST_PARSER_PATH.exists():
RUST_PARSER_PATH = ROOT / "fastmd" / "target" / "debug" / f"fastmd{exe_ext}"
if RUST_PARSER_PATH.exists():
Logger.log_info(f"Rust parser built and found at: {RUST_PARSER_PATH}")
else:
Logger.log_error("Build succeeded but parser binary not found")
Logger.log_warning("Will use Python parser for all files")
else:
Logger.log_error("Failed to build Rust parser")
Logger.log_warning("Will use Python parser for all files")
else:
Logger.log_info(f"Rust parser found at: {RUST_PARSER_PATH}")
# Log parser strategy
if RUST_PARSER_PATH.exists():
Logger.log_info(f"Will use Rust parser for files with more than {RUST_PARSER_THRESHOLD} lines")
else:
Logger.log_warning("Using Python parser for all files")
import argparse
parser = argparse.ArgumentParser(description="Monitor markdown directory and convert to HTML.")
parser = argparse.ArgumentParser(description="Monitor markdown directory and convert to HTML with dynamic parser selection.")
# This stores True when passed, but means "no obfuscation"
parser.add_argument(
@@ -141,11 +279,21 @@ if __name__ == "__main__":
action="store_false",
help="Disable HTML obfuscation."
)
parser.add_argument(
"--rust-threshold",
type=int,
default=500,
help=f"Line count threshold for using Rust parser (default: {RUST_PARSER_THRESHOLD})"
)
args = parser.parse_args()
# Invert it to get the obfuscate flag
obfuscate = not args.no_obfuscate
# Update threshold if specified
RUST_PARSER_THRESHOLD = args.rust_threshold
Logger.log_obfuscation_info(f"Obfuscation is {'enabled' if obfuscate else 'disabled'}", obfuscate)

BIN
css/icons/script.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 176 KiB

8
fastmd/Cargo.toml Normal file
View File

@@ -0,0 +1,8 @@
[package]
name = "fastmd"
version = "0.1.0"
edition = "2024"
author = "rattatwinko"
[dependencies]
pulldown-cmark = "0.13.0"

37
fastmd/src/main.rs Normal file
View File

@@ -0,0 +1,37 @@
use std::{env, fs, process};
use pulldown_cmark::{Parser, Options, html};
fn main() {
// Get the file path from CLI args
let args: Vec<String> = env::args().collect();
if args.len() != 2 {
eprintln!("Usage: {} <markdown_file>", args[0]);
process::exit(1);
}
let path = &args[1];
// Read file contents
let markdown = match fs::read_to_string(path) {
Ok(content) => content,
Err(e) => {
eprintln!("Error reading {}: {}", path, e);
process::exit(1);
}
};
// Configure parser with GitHub-flavored options
let mut options = Options::empty();
options.insert(Options::ENABLE_TABLES);
options.insert(Options::ENABLE_FOOTNOTES);
options.insert(Options::ENABLE_STRIKETHROUGH);
options.insert(Options::ENABLE_TASKLISTS);
// Parse and render to HTML
let parser = Parser::new_ext(&markdown, options);
let mut html_output = String::new();
html::push_html(&mut html_output, parser);
// Print only the body content (no <html>/<head>)
println!("{} <!-- this was generated from rust with pulldown_cmark ; REASON: Large File -->", html_output);
}

View File

@@ -19,13 +19,15 @@
</head>
<body>
<noscript>
<h1 id="nojs">Please enable Javascript!</h1>
<div style="display: inline-flex; align-items: center;">
<img src="../../css/icons/script.png" width="45" height="45" style="vertical-align: middle; margin-right: 8px;" />
<h1 id="nojs" style="margin: 0;">Please enable Javascript!</h1>
</div>
<p>
<i><strong> If you might be wondering, what does the Script do?</strong></i><br/>
<ul id="nonenormalul">
<li>It strips the Links you see below from any .html extension</li>
<li>It is essential for themeswitching to work</li>
<li>it will definetly hak u >:3</li>
<li>It strips the .HTML ending from each file you see in the list below</li>
<li>It isnt necessary, but visually tweaks the page.</li>
</ul>
</p>
</noscript>

View File

@@ -32,3 +32,8 @@ class Logger:
print(f"[ INFO@{now} ]: {colorama.Fore.GREEN}{message}{colorama.Style.RESET_ALL}")
else:
print(f"[ INFO@{now} ]: {colorama.Fore.RED}{message}{colorama.Style.RESET_ALL}")
@staticmethod
def log_rust_usage(message: str) -> None:
now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
print(f"{colorama.Fore.GREEN}[ RUST@{now} ]: {message}{colorama.Style.RESET_ALL}")