rust parser ; yaaayy i can now claim this project as fast.
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,2 +1,4 @@
|
||||
__pycache__
|
||||
env
|
||||
target
|
||||
Cargo.lock
|
||||
154
PyPost.py
154
PyPost.py
@@ -2,6 +2,8 @@
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import subprocess
|
||||
import platform
|
||||
from pathlib import Path
|
||||
|
||||
import marko
|
||||
@@ -18,14 +20,69 @@ ROOT = Path(os.path.abspath("."))
|
||||
MARKDOWN_DIR = ROOT / "markdown"
|
||||
HTML_DIR = ROOT / "html"
|
||||
|
||||
# Determine executable extension based on OS
|
||||
exe_ext = ".exe" if platform.system() == "Windows" else ""
|
||||
RUST_PARSER_PATH = ROOT / "fastmd" / "target" / "release" / f"fastmd{exe_ext}"
|
||||
|
||||
# Create markdown parser with table support
|
||||
# Fallback to debug build if release not found
|
||||
if not RUST_PARSER_PATH.exists():
|
||||
RUST_PARSER_PATH = ROOT / "fastmd" / "target" / "debug" / f"fastmd{exe_ext}"
|
||||
|
||||
# Create Python markdown parser with table support (fallback for small files)
|
||||
markdown_parser = marko.Markdown(extensions=[GFM])
|
||||
|
||||
# Threshold for switching to Rust parser (number of lines)
|
||||
RUST_PARSER_THRESHOLD = 500
|
||||
|
||||
Logger = Logger()
|
||||
|
||||
# Global obfuscate flag, default True
|
||||
obfuscate = True
|
||||
|
||||
def count_lines_in_file(file_path: Path) -> int:
|
||||
"""Count the number of lines in a file."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return sum(1 for _ in f)
|
||||
except Exception as e:
|
||||
Logger.log_error(f"Could not count lines in {file_path}: {e}")
|
||||
return 0
|
||||
|
||||
def should_use_rust_parser(md_path: Path) -> bool:
|
||||
"""Determine if we should use the Rust parser based on file size."""
|
||||
if not RUST_PARSER_PATH.exists():
|
||||
return False
|
||||
|
||||
line_count = count_lines_in_file(md_path)
|
||||
use_rust = line_count > RUST_PARSER_THRESHOLD
|
||||
|
||||
if use_rust:
|
||||
Logger.log_rust_usage(f"Using Rust parser for {md_path} ({line_count} lines)")
|
||||
else:
|
||||
Logger.log_debug(f"Using Python parser for {md_path} ({line_count} lines)")
|
||||
|
||||
return use_rust
|
||||
|
||||
def parse_markdown_with_rust(md_path: Path) -> str:
|
||||
"""Parse markdown using the Rust parser."""
|
||||
try:
|
||||
# Run the Rust parser
|
||||
result = subprocess.run(
|
||||
[str(RUST_PARSER_PATH), str(md_path)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding='utf-8',
|
||||
check=True
|
||||
)
|
||||
return result.stdout
|
||||
except subprocess.CalledProcessError as e:
|
||||
Logger.log_error(f"Rust parser failed for {md_path}: {e}")
|
||||
Logger.log_error(f"stderr: {e.stderr}")
|
||||
raise
|
||||
except Exception as e:
|
||||
Logger.log_error(f"Error running Rust parser for {md_path}: {e}")
|
||||
raise
|
||||
|
||||
def render_markdown(md_path: Path):
|
||||
"""Render a single markdown file to an obfuscated HTML file."""
|
||||
try:
|
||||
@@ -34,6 +91,14 @@ def render_markdown(md_path: Path):
|
||||
Logger.log_error(f"Could not read {md_path}: {e}")
|
||||
return
|
||||
|
||||
# Decide which parser to use based on file size
|
||||
if should_use_rust_parser(md_path):
|
||||
try:
|
||||
html_body = parse_markdown_with_rust(md_path)
|
||||
except Exception as e:
|
||||
Logger.log_warning(f"Rust parser failed for {md_path}, falling back to Python parser: {e}")
|
||||
html_body = markdown_parser.convert(text)
|
||||
else:
|
||||
html_body = markdown_parser.convert(text)
|
||||
|
||||
# Extract title from filename or first H1
|
||||
@@ -97,7 +162,6 @@ def render_markdown(md_path: Path):
|
||||
# Create parent directories if needed
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
if obfuscate:
|
||||
out_path.write_text(obfuscated_html, encoding="utf-8")
|
||||
else:
|
||||
@@ -119,7 +183,48 @@ def initial_scan(markdown_dir: Path):
|
||||
render_markdown(md)
|
||||
|
||||
|
||||
def build_rust_parser() -> bool:
|
||||
"""Attempt to build the Rust parser using cargo."""
|
||||
fastmd_dir = ROOT / "fastmd"
|
||||
|
||||
if not fastmd_dir.exists():
|
||||
Logger.log_error(f"fastmd directory not found at {fastmd_dir}")
|
||||
return False
|
||||
|
||||
cargo_toml = fastmd_dir / "Cargo.toml"
|
||||
if not cargo_toml.exists():
|
||||
Logger.log_error(f"Cargo.toml not found at {cargo_toml}")
|
||||
return False
|
||||
|
||||
Logger.log_info("Attempting to build Rust parser with 'cargo build --release'...")
|
||||
|
||||
try:
|
||||
# Run cargo build --release in the fastmd directory
|
||||
result = subprocess.run(
|
||||
["cargo", "build", "--release"],
|
||||
cwd=str(fastmd_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
|
||||
Logger.log_info("Rust parser built successfully!")
|
||||
Logger.log_debug(f"Build output: {result.stdout}")
|
||||
return True
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
Logger.log_error(f"Failed to build Rust parser: {e}")
|
||||
Logger.log_error(f"Build stderr: {e.stderr}")
|
||||
return False
|
||||
except FileNotFoundError:
|
||||
Logger.log_error("cargo command not found. Please install Rust and Cargo.")
|
||||
return False
|
||||
except Exception as e:
|
||||
Logger.log_error(f"Unexpected error building Rust parser: {e}")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check for markdown directory
|
||||
if not MARKDOWN_DIR.exists():
|
||||
alt_root = ROOT / "PyPost"
|
||||
if alt_root.exists() and alt_root.is_dir():
|
||||
@@ -127,13 +232,46 @@ if __name__ == "__main__":
|
||||
ROOT = alt_root
|
||||
MARKDOWN_DIR = ROOT / "markdown"
|
||||
HTML_DIR = ROOT / "html"
|
||||
# Update Rust parser path for new root
|
||||
RUST_PARSER_PATH = ROOT / "fastmd" / "target" / "release" / f"fastmd{exe_ext}"
|
||||
if not RUST_PARSER_PATH.exists():
|
||||
RUST_PARSER_PATH = ROOT / "fastmd" / "target" / "debug" / f"fastmd{exe_ext}"
|
||||
else:
|
||||
Logger.log_error(f"Markdown directory not found: {MARKDOWN_DIR}")
|
||||
Logger.log_warning("Please create a 'markdown' directory or use a 'PyPost' directory with one inside it.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if Rust parser exists, if not try to build it
|
||||
if not RUST_PARSER_PATH.exists():
|
||||
Logger.log_warning(f"Rust parser not found at {RUST_PARSER_PATH}")
|
||||
|
||||
# Try to build the Rust parser
|
||||
if build_rust_parser():
|
||||
# Update path after successful build
|
||||
RUST_PARSER_PATH = ROOT / "fastmd" / "target" / "release" / f"fastmd{exe_ext}"
|
||||
if not RUST_PARSER_PATH.exists():
|
||||
RUST_PARSER_PATH = ROOT / "fastmd" / "target" / "debug" / f"fastmd{exe_ext}"
|
||||
|
||||
if RUST_PARSER_PATH.exists():
|
||||
Logger.log_info(f"Rust parser built and found at: {RUST_PARSER_PATH}")
|
||||
else:
|
||||
Logger.log_error("Build succeeded but parser binary not found")
|
||||
Logger.log_warning("Will use Python parser for all files")
|
||||
else:
|
||||
Logger.log_error("Failed to build Rust parser")
|
||||
Logger.log_warning("Will use Python parser for all files")
|
||||
else:
|
||||
Logger.log_info(f"Rust parser found at: {RUST_PARSER_PATH}")
|
||||
|
||||
# Log parser strategy
|
||||
if RUST_PARSER_PATH.exists():
|
||||
Logger.log_info(f"Will use Rust parser for files with more than {RUST_PARSER_THRESHOLD} lines")
|
||||
else:
|
||||
Logger.log_warning("Using Python parser for all files")
|
||||
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Monitor markdown directory and convert to HTML.")
|
||||
parser = argparse.ArgumentParser(description="Monitor markdown directory and convert to HTML with dynamic parser selection.")
|
||||
|
||||
# This stores True when passed, but means "no obfuscation"
|
||||
parser.add_argument(
|
||||
@@ -142,11 +280,21 @@ if __name__ == "__main__":
|
||||
help="Disable HTML obfuscation."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--rust-threshold",
|
||||
type=int,
|
||||
default=500,
|
||||
help=f"Line count threshold for using Rust parser (default: {RUST_PARSER_THRESHOLD})"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Invert it to get the obfuscate flag
|
||||
obfuscate = not args.no_obfuscate
|
||||
|
||||
# Update threshold if specified
|
||||
RUST_PARSER_THRESHOLD = args.rust_threshold
|
||||
|
||||
Logger.log_obfuscation_info(f"Obfuscation is {'enabled' if obfuscate else 'disabled'}", obfuscate)
|
||||
|
||||
initial_scan(MARKDOWN_DIR)
|
||||
|
||||
BIN
css/icons/script.png
Normal file
BIN
css/icons/script.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 176 KiB |
8
fastmd/Cargo.toml
Normal file
8
fastmd/Cargo.toml
Normal file
@@ -0,0 +1,8 @@
|
||||
[package]
|
||||
name = "fastmd"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
author = "rattatwinko"
|
||||
|
||||
[dependencies]
|
||||
pulldown-cmark = "0.13.0"
|
||||
37
fastmd/src/main.rs
Normal file
37
fastmd/src/main.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
use std::{env, fs, process};
|
||||
use pulldown_cmark::{Parser, Options, html};
|
||||
|
||||
fn main() {
|
||||
// Get the file path from CLI args
|
||||
let args: Vec<String> = env::args().collect();
|
||||
if args.len() != 2 {
|
||||
eprintln!("Usage: {} <markdown_file>", args[0]);
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
let path = &args[1];
|
||||
|
||||
// Read file contents
|
||||
let markdown = match fs::read_to_string(path) {
|
||||
Ok(content) => content,
|
||||
Err(e) => {
|
||||
eprintln!("Error reading {}: {}", path, e);
|
||||
process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
// Configure parser with GitHub-flavored options
|
||||
let mut options = Options::empty();
|
||||
options.insert(Options::ENABLE_TABLES);
|
||||
options.insert(Options::ENABLE_FOOTNOTES);
|
||||
options.insert(Options::ENABLE_STRIKETHROUGH);
|
||||
options.insert(Options::ENABLE_TASKLISTS);
|
||||
|
||||
// Parse and render to HTML
|
||||
let parser = Parser::new_ext(&markdown, options);
|
||||
let mut html_output = String::new();
|
||||
html::push_html(&mut html_output, parser);
|
||||
|
||||
// Print only the body content (no <html>/<head>)
|
||||
println!("{} <!-- this was generated from rust with pulldown_cmark ; REASON: Large File -->", html_output);
|
||||
}
|
||||
@@ -19,13 +19,15 @@
|
||||
</head>
|
||||
<body>
|
||||
<noscript>
|
||||
<h1 id="nojs">Please enable Javascript!</h1>
|
||||
<div style="display: inline-flex; align-items: center;">
|
||||
<img src="../../css/icons/script.png" width="45" height="45" style="vertical-align: middle; margin-right: 8px;" />
|
||||
<h1 id="nojs" style="margin: 0;">Please enable Javascript!</h1>
|
||||
</div>
|
||||
<p>
|
||||
<i><strong> If you might be wondering, what does the Script do?</strong></i><br/>
|
||||
<ul id="nonenormalul">
|
||||
<li>It strips the Links you see below from any .html extension</li>
|
||||
<li>It is essential for themeswitching to work</li>
|
||||
<li>it will definetly hak u >:3</li>
|
||||
<li>It strips the .HTML ending from each file you see in the list below</li>
|
||||
<li>It isnt necessary, but visually tweaks the page.</li>
|
||||
</ul>
|
||||
</p>
|
||||
</noscript>
|
||||
|
||||
@@ -32,3 +32,8 @@ class Logger:
|
||||
print(f"[ INFO@{now} ]: {colorama.Fore.GREEN}{message}{colorama.Style.RESET_ALL}")
|
||||
else:
|
||||
print(f"[ INFO@{now} ]: {colorama.Fore.RED}{message}{colorama.Style.RESET_ALL}")
|
||||
|
||||
@staticmethod
|
||||
def log_rust_usage(message: str) -> None:
|
||||
now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
|
||||
print(f"{colorama.Fore.GREEN}[ RUST@{now} ]: {message}{colorama.Style.RESET_ALL}")
|
||||
|
||||
Reference in New Issue
Block a user