Merge pull request 'rustparser' (#8) from rustparser into main
Some checks failed
Deploy / build-and-deploy (push) Failing after 1s

Reviewed-on: http://10.0.0.13:3002/rattatwinko/markdownblog/pulls/8
This commit is contained in:
2025-06-25 18:01:28 +00:00
17 changed files with 1034 additions and 36 deletions

6
.gitignore vendored
View File

@@ -8,3 +8,9 @@ posts/pinned.json
posts/Aquaworld/tag-1.md
posts/pinned.json
posts/pinned.json
# Rust
target/
Cargo.lock
**/*.rs.bk
*.pdb

View File

@@ -1,3 +1,13 @@
# syntax=docker/dockerfile:1
FROM rust:latest as rust-build
WORKDIR /build
COPY ./markdown_backend ./markdown_backend
WORKDIR /build/markdown_backend
RUN rustup target add x86_64-unknown-linux-musl
RUN apt-get update && apt-get install -y musl-tools
# Build with musl target for static linking
RUN cargo build --release --target x86_64-unknown-linux-musl
FROM node:20
WORKDIR /app
@@ -7,9 +17,20 @@ COPY package*.json ./
RUN npm install
COPY . .
# Ensure posts directory exists and has correct permissions
RUN mkdir -p /app/posts
COPY posts/* /app/posts/
RUN chmod -R 755 /app/posts
# Copy the statically linked Rust binary from the build stage
COPY --from=rust-build /build/markdown_backend/target/x86_64-unknown-linux-musl/release/markdown_backend ./markdown_backend/target/release/markdown_backend
RUN chmod +x ./markdown_backend/target/release/markdown_backend
RUN npm run build
# Create and set permissions for the docker volume mount point
RUN mkdir -p /app/docker && chmod 777 /app/docker
VOLUME ["/app/docker"]
EXPOSE 3000

View File

@@ -2,6 +2,12 @@
set -e
# Check if Docker daemon is running
if ! docker info >/dev/null 2>&1; then
echo "Error: Docker daemon is not running"
exit 1
fi
IMAGE_NAME="markdownblog"
CONTAINER_NAME="markdownblog"
VOLUME_NAME="markdownblog-posts"
@@ -32,5 +38,11 @@ docker run -d \
echo "Copying built-in posts to Docker volume if empty..."
docker exec $CONTAINER_NAME sh -c 'if [ -d /app/posts ] && [ -d /app/docker ] && [ "$(ls -A /app/docker)" = "" ]; then cp -r /app/posts/* /app/docker/; fi'
# Check if container is running
if ! docker ps | grep -q $CONTAINER_NAME; then
echo "Error: Container failed to start. Check logs with: docker logs $CONTAINER_NAME"
exit 1
fi
echo "Deployment complete!"
echo "App should be available at http://localhost:$PORT"

View File

@@ -0,0 +1,21 @@
[package]
name = "markdown_backend"
version = "0.1.0"
edition = "2021"
[dependencies]
chrono = { version = "0.4", features = ["serde"] }
serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.9"
pulldown-cmark = "0.9"
gray_matter = "0.2.8"
ammonia = "3.1"
slug = "0.1"
notify = "6.1"
syntect = { version = "5.1", features = ["default"] }
regex = "1.10"
clap = { version = "4.4", features = ["derive"] }
serde_json = "1.0"
html-escape = "0.2.13"
once_cell = "1.18"
sysinfo = "0.30.7"

View File

@@ -0,0 +1,77 @@
use clap::{Parser, Subcommand};
mod markdown;
use markdown::{get_all_posts, get_post_by_slug, get_posts_by_tag, watch_posts};
use serde_json;
use std::fs;
#[derive(Parser)]
#[command(name = "Markdown Backend")]
#[command(about = "A CLI for managing markdown blog posts", long_about = None)]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// List all posts
List,
/// Show a post by slug
Show {
slug: String,
},
/// List posts by tag
Tags {
tag: String,
},
/// Watch for changes in the posts directory
Watch,
/// Show Rust parser statistics
Rsparseinfo,
}
fn main() {
markdown::load_post_cache_from_disk();
let cli = Cli::parse();
match &cli.command {
Commands::List => {
let posts = get_all_posts().unwrap_or_else(|e| {
eprintln!("{}", e);
std::process::exit(1);
});
println!("{}", serde_json::to_string(&posts).unwrap());
}
Commands::Show { slug } => {
match get_post_by_slug(slug) {
Ok(post) => {
println!("{}", serde_json::to_string(&post).unwrap());
markdown::save_post_cache_to_disk();
}
Err(e) => {
eprintln!("{}", e);
std::process::exit(1);
}
}
}
Commands::Tags { tag } => {
let posts = get_posts_by_tag(tag).unwrap_or_else(|e| {
eprintln!("{}", e);
std::process::exit(1);
});
println!("{}", serde_json::to_string(&posts).unwrap());
}
Commands::Watch => {
println!("Watching for changes in posts directory. Press Ctrl+C to exit.");
let _ = watch_posts(|| {
println!("Posts directory changed!");
});
// Keep the main thread alive
loop {
std::thread::sleep(std::time::Duration::from_secs(60));
}
}
Commands::Rsparseinfo => {
println!("{}", markdown::rsparseinfo());
}
}
}

View File

@@ -0,0 +1,374 @@
// src/markdown.rs
/*
This is the Rust Markdown Parser.
It supports caching of posts and is
BLAZINGLY FAST!
*/
use std::fs;
use std::path::{Path, PathBuf};
use chrono::{DateTime, Utc};
use serde::Deserialize;
use pulldown_cmark::{Parser, Options, html, Event, Tag, CowStr};
use gray_matter::engine::YAML;
use gray_matter::Matter;
use ammonia::clean;
use slug::slugify;
use notify::{RecursiveMode, RecommendedWatcher, Watcher, Config};
use std::sync::mpsc::channel;
use std::time::{Duration, Instant};
use syntect::highlighting::{ThemeSet, Style};
use syntect::parsing::SyntaxSet;
use syntect::html::{highlighted_html_for_string, IncludeBackground};
use once_cell::sync::Lazy;
use std::collections::HashMap;
use std::sync::RwLock;
use serde_json;
use sysinfo::{System, Pid, RefreshKind, CpuRefreshKind, ProcessRefreshKind};
const POSTS_CACHE_PATH: &str = "./cache/posts_cache.json";
const POST_STATS_PATH: &str = "./cache/post_stats.json";
#[derive(Debug, Deserialize, Clone, serde::Serialize)]
pub struct PostFrontmatter {
pub title: String,
pub date: String,
pub tags: Option<Vec<String>>,
pub summary: Option<String>,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct Post {
pub slug: String,
pub title: String,
pub date: String,
pub tags: Vec<String>,
pub summary: Option<String>,
pub content: String,
pub created_at: String,
pub author: String,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, Default)]
pub struct PostStats {
pub slug: String,
pub cache_hits: u64,
pub cache_misses: u64,
pub last_interpret_time_ms: u128,
pub last_compile_time_ms: u128,
pub last_cpu_usage_percent: f32, // Not f64
pub last_cache_status: String, // "hit" or "miss"
}
static POST_CACHE: Lazy<RwLock<HashMap<String, Post>>> = Lazy::new(|| RwLock::new(HashMap::new()));
static ALL_POSTS_CACHE: Lazy<RwLock<Option<Vec<Post>>>> = Lazy::new(|| RwLock::new(None));
static POST_STATS: Lazy<RwLock<HashMap<String, PostStats>>> = Lazy::new(|| RwLock::new(HashMap::new()));
fn get_posts_directory() -> PathBuf {
let candidates = [
"./posts",
"../posts",
"/posts",
"/docker"
];
for candidate in candidates.iter() {
let path = PathBuf::from(candidate);
if path.exists() && path.is_dir() {
return path;
}
}
// Fallback: default to ./posts
PathBuf::from("./posts")
}
fn get_file_creation_date(path: &Path) -> std::io::Result<DateTime<Utc>> {
let metadata = fs::metadata(path)?;
// Try to get creation time, fall back to modification time if not available
match metadata.created() {
Ok(created) => Ok(DateTime::<Utc>::from(created)),
Err(_) => {
// Fall back to modification time if creation time is not available
let modified = metadata.modified()?;
Ok(DateTime::<Utc>::from(modified))
}
}
}
fn process_anchor_links(content: &str) -> String {
// Replace [text](#anchor) with slugified anchor
let re = regex::Regex::new(r"\[([^\]]+)\]\(#([^)]+)\)").unwrap();
re.replace_all(content, |caps: &regex::Captures| {
let link_text = &caps[1];
let anchor = &caps[2];
let slugified = slugify(anchor);
format!("[{}](#{})", link_text, slugified)
}).to_string()
}
// Helper function to strip emojis from a string
// Neccesary for the slugify function to work correctly. And the ID's to work with the frontend.
fn strip_emojis(s: &str) -> String {
// Remove all characters in the Emoji Unicode ranges
// This is a simple approach and may not cover all emojis, but works for most cases
s.chars()
.filter(|c| {
let c = *c as u32;
// Basic Emoji ranges
!( (c >= 0x1F600 && c <= 0x1F64F) // Emoticons
|| (c >= 0x1F300 && c <= 0x1F5FF) // Misc Symbols and Pictographs
|| (c >= 0x1F680 && c <= 0x1F6FF) // Transport and Map
|| (c >= 0x2600 && c <= 0x26FF) // Misc symbols
|| (c >= 0x2700 && c <= 0x27BF) // Dingbats
|| (c >= 0x1F900 && c <= 0x1F9FF) // Supplemental Symbols and Pictographs
|| (c >= 0x1FA70 && c <= 0x1FAFF) // Symbols and Pictographs Extended-A
|| (c >= 0x1F1E6 && c <= 0x1F1FF) // Regional Indicator Symbols
)
})
.collect()
}
static AMMONIA: Lazy<ammonia::Builder<'static>> = Lazy::new(|| {
let mut builder = ammonia::Builder::default();
builder.add_tag_attributes("h1", &["id"]);
builder.add_tag_attributes("h2", &["id"]);
builder.add_tag_attributes("h3", &["id"]);
builder.add_tag_attributes("h4", &["id"]);
builder.add_tag_attributes("h5", &["id"]);
builder.add_tag_attributes("h6", &["id"]);
builder
});
pub fn rsparseinfo() -> String {
// Eagerly load all posts to populate stats
let _ = get_all_posts();
let stats = POST_STATS.read().unwrap();
let values: Vec<&PostStats> = stats.values().collect();
if values.is_empty() {
"[]".to_string()
} else {
serde_json::to_string(&values).unwrap_or_else(|_| "[]".to_string())
}
}
pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>> {
let mut sys = System::new_with_specifics(RefreshKind::new().with_processes(ProcessRefreshKind::everything()).with_cpu(CpuRefreshKind::everything()));
sys.refresh_processes();
let pid = sysinfo::get_current_pid()?;
let before_cpu = sys.process(pid).map(|p| p.cpu_usage()).unwrap_or(0.0);
let start = Instant::now();
let mut stats = POST_STATS.write().unwrap();
let entry = stats.entry(slug.to_string()).or_insert_with(|| PostStats {
slug: slug.to_string(),
..Default::default()
});
// Try cache first
if let Some(post) = POST_CACHE.read().unwrap().get(slug).cloned() {
entry.cache_hits += 1;
entry.last_interpret_time_ms = 0;
entry.last_compile_time_ms = 0;
entry.last_cache_status = "hit".to_string();
sys.refresh_process(pid);
entry.last_cpu_usage_percent = sys.process(pid).map(|p| p.cpu_usage()).unwrap_or(0.0) - before_cpu;
return Ok(post);
}
entry.cache_misses += 1;
entry.last_cache_status = "miss".to_string();
drop(stats); // Release lock before heavy work
let posts_dir = get_posts_directory();
let file_path = posts_dir.join(format!("{}.md", slug));
let file_content = fs::read_to_string(&file_path)?;
let matter = Matter::<YAML>::new();
let result = matter.parse(&file_content);
let front: PostFrontmatter = if let Some(data) = result.data {
match data.deserialize() {
Ok(front) => front,
Err(e) => {
eprintln!("Failed to deserialize frontmatter for post {}: {}", slug, e);
return Err("Failed to deserialize frontmatter".into());
}
}
} else {
eprintln!("No frontmatter found for post: {}", slug);
return Err("No frontmatter found".into());
};
let created_at = get_file_creation_date(&file_path)?;
let processed_markdown = process_anchor_links(&result.content);
let parser = Parser::new_ext(&processed_markdown, Options::all());
let mut html_output = String::new();
let mut heading_text = String::new();
let mut in_heading = false;
let mut heading_level = 0;
let mut in_code_block = false;
let mut code_block_lang = String::new();
let mut code_block_content = String::new();
let mut events = Vec::new();
let ss = SyntaxSet::load_defaults_newlines(); // SS 卐
let ts = ThemeSet::load_defaults();
let theme = &ts.themes["base16-ocean.dark"];
for event in parser {
match &event {
Event::Start(Tag::Heading(level, _, _)) => {
in_heading = true;
heading_level = *level as usize;
heading_text.clear();
},
Event::End(Tag::Heading(_, _, _)) => {
in_heading = false;
// Strip emojis before slugifying for the id
let heading_no_emoji = strip_emojis(&heading_text);
let id = slugify(&heading_no_emoji);
events.push(Event::Html(CowStr::Boxed(format!("<h{lvl} id=\"{id}\">", lvl=heading_level, id=id).into_boxed_str())));
events.push(Event::Text(CowStr::Boxed(heading_text.clone().into_boxed_str())));
events.push(Event::Html(CowStr::Boxed(format!("</h{lvl}>", lvl=heading_level).into_boxed_str())));
},
Event::Text(text) if in_heading => {
heading_text.push_str(text);
},
Event::Start(Tag::CodeBlock(kind)) => {
in_code_block = true;
code_block_content.clear();
code_block_lang = match kind {
pulldown_cmark::CodeBlockKind::Fenced(lang) => lang.to_string(),
pulldown_cmark::CodeBlockKind::Indented => String::new(),
};
},
Event::End(Tag::CodeBlock(_)) => {
in_code_block = false;
// Highlight code block
let highlighted = if !code_block_lang.is_empty() {
if let Some(syntax) = ss.find_syntax_by_token(&code_block_lang) {
highlighted_html_for_string(&code_block_content, &ss, syntax, theme).unwrap_or_else(|_| format!("<pre><code>{}</code></pre>", html_escape::encode_text(&code_block_content)))
} else {
format!("<pre><code>{}</code></pre>", html_escape::encode_text(&code_block_content))
}
} else {
// No language specified
format!("<pre><code>{}</code></pre>", html_escape::encode_text(&code_block_content))
};
events.push(Event::Html(CowStr::Boxed(highlighted.into_boxed_str())));
},
Event::Text(text) if in_code_block => {
code_block_content.push_str(text);
},
_ if !in_heading && !in_code_block => {
events.push(event);
},
_ => {},
}
}
html::push_html(&mut html_output, events.into_iter());
let sanitized_html = AMMONIA.clean(&html_output).to_string();
let interpret_time = start.elapsed();
let compile_start = Instant::now();
let post = Post {
slug: slug.to_string(),
title: front.title,
date: front.date,
tags: front.tags.unwrap_or_default(),
summary: front.summary,
content: sanitized_html,
created_at: created_at.to_rfc3339(),
author: std::env::var("BLOG_OWNER").unwrap_or_else(|_| "Anonymous".to_string()),
};
let compile_time = compile_start.elapsed();
// Insert into cache
POST_CACHE.write().unwrap().insert(slug.to_string(), post.clone());
// Update stats
let mut stats = POST_STATS.write().unwrap();
let entry = stats.entry(slug.to_string()).or_insert_with(|| PostStats {
slug: slug.to_string(),
..Default::default()
});
entry.last_interpret_time_ms = interpret_time.as_millis();
entry.last_compile_time_ms = compile_time.as_millis();
sys.refresh_process(pid);
entry.last_cpu_usage_percent = sys.process(pid).map(|p| p.cpu_usage()).unwrap_or(0.0) - before_cpu;
Ok(post)
}
pub fn get_all_posts() -> Result<Vec<Post>, Box<dyn std::error::Error>> {
// Try cache first
if let Some(posts) = ALL_POSTS_CACHE.read().unwrap().clone() {
return Ok(posts);
}
let posts_dir = get_posts_directory();
let mut posts = Vec::new();
for entry in fs::read_dir(posts_dir)? {
let entry = entry?;
let path = entry.path();
if path.extension().map(|e| e == "md").unwrap_or(false) {
let file_stem = path.file_stem().unwrap().to_string_lossy();
if let Ok(post) = get_post_by_slug(&file_stem) {
// Insert each post into the individual post cache as well
POST_CACHE.write().unwrap().insert(file_stem.to_string(), post.clone());
posts.push(post);
}
}
}
posts.sort_by(|a, b| b.created_at.cmp(&a.created_at));
// Cache the result
*ALL_POSTS_CACHE.write().unwrap() = Some(posts.clone());
Ok(posts)
}
pub fn get_posts_by_tag(tag: &str) -> Result<Vec<Post>, Box<dyn std::error::Error>> {
let all_posts = get_all_posts()?;
Ok(all_posts.into_iter().filter(|p| p.tags.contains(&tag.to_string())).collect())
}
pub fn watch_posts<F: Fn() + Send + 'static>(on_change: F) -> notify::Result<RecommendedWatcher> {
let (tx, rx) = channel();
let mut watcher = RecommendedWatcher::new(tx, Config::default())?;
watcher.watch(get_posts_directory().as_path(), RecursiveMode::Recursive)?;
std::thread::spawn(move || {
loop {
match rx.recv() {
Ok(_event) => {
// Invalidate caches on any change
POST_CACHE.write().unwrap().clear();
*ALL_POSTS_CACHE.write().unwrap() = None;
on_change();
},
Err(e) => {
eprintln!("watch error: {:?}", e);
break;
}
}
}
});
Ok(watcher)
}
pub fn load_post_cache_from_disk() {
if let Ok(data) = fs::read_to_string(POSTS_CACHE_PATH) {
if let Ok(map) = serde_json::from_str::<HashMap<String, Post>>(&data) {
*POST_CACHE.write().unwrap() = map;
}
}
if let Ok(data) = fs::read_to_string(POST_STATS_PATH) {
if let Ok(map) = serde_json::from_str::<HashMap<String, PostStats>>(&data) {
*POST_STATS.write().unwrap() = map;
}
}
}
pub fn save_post_cache_to_disk() {
if let Ok(map) = serde_json::to_string(&*POST_CACHE.read().unwrap()) {
let _ = fs::create_dir_all("./cache");
let _ = fs::write(POSTS_CACHE_PATH, map);
}
if let Ok(map) = serde_json::to_string(&*POST_STATS.read().unwrap()) {
let _ = fs::create_dir_all("./cache");
let _ = fs::write(POST_STATS_PATH, map);
}
}

30
package-lock.json generated
View File

@@ -15,6 +15,7 @@
"autoprefixer": "^10.4.17",
"bcrypt": "^5.0.2",
"bcryptjs": "^2.4.3",
"chart.js": "^4.5.0",
"chokidar": "^3.6.0",
"date-fns": "^3.6.0",
"dompurify": "^3.0.9",
@@ -28,6 +29,7 @@
"pm2": "^6.0.8",
"postcss": "^8.4.35",
"react": "^18.2.0",
"react-chartjs-2": "^5.3.0",
"react-dom": "^18.2.0",
"tailwindcss": "^3.4.1",
"typescript": "^5.3.3"
@@ -502,6 +504,12 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
"node_modules/@kurkle/color": {
"version": "0.3.4",
"resolved": "https://registry.npmjs.org/@kurkle/color/-/color-0.3.4.tgz",
"integrity": "sha512-M5UknZPHRu3DEDWoipU6sE8PdkZ6Z/S+v4dD+Ke8IaNlpdSQah50lz1KtcFBa2vsdOnwbbnxJwVM4wty6udA5w==",
"license": "MIT"
},
"node_modules/@mapbox/node-pre-gyp": {
"version": "1.0.11",
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
@@ -2214,6 +2222,18 @@
"integrity": "sha512-syedaZ9cPe7r3hoQA9twWYKu5AIyCswN5+szkmPBe9ccdLrj4bYaCnLVPTLd2kgVRc7+zoX4tyPgRnFKCj5YjQ==",
"license": "MIT/X11"
},
"node_modules/chart.js": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.5.0.tgz",
"integrity": "sha512-aYeC/jDgSEx8SHWZvANYMioYMZ2KX02W6f6uVfyteuCGcadDLcYVHdfdygsTQkQ4TKn5lghoojAsPj5pu0SnvQ==",
"license": "MIT",
"dependencies": {
"@kurkle/color": "^0.3.0"
},
"engines": {
"pnpm": ">=8"
}
},
"node_modules/chokidar": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
@@ -6805,6 +6825,16 @@
"node": ">=0.10.0"
}
},
"node_modules/react-chartjs-2": {
"version": "5.3.0",
"resolved": "https://registry.npmjs.org/react-chartjs-2/-/react-chartjs-2-5.3.0.tgz",
"integrity": "sha512-UfZZFnDsERI3c3CZGxzvNJd02SHjaSJ8kgW1djn65H1KK8rehwTjyrRKOG3VTMG8wtHZ5rgAO5oTHtHi9GCCmw==",
"license": "MIT",
"peerDependencies": {
"chart.js": "^4.1.1",
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
}
},
"node_modules/react-dom": {
"version": "18.3.1",
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz",

View File

@@ -18,6 +18,7 @@
"autoprefixer": "^10.4.17",
"bcrypt": "^5.0.2",
"bcryptjs": "^2.4.3",
"chart.js": "^4.5.0",
"chokidar": "^3.6.0",
"date-fns": "^3.6.0",
"dompurify": "^3.0.9",
@@ -31,6 +32,7 @@
"pm2": "^6.0.8",
"postcss": "^8.4.35",
"react": "^18.2.0",
"react-chartjs-2": "^5.3.0",
"react-dom": "^18.2.0",
"tailwindcss": "^3.4.1",
"typescript": "^5.3.3"

View File

@@ -29,6 +29,8 @@ author: Rattatwinko
- [Support the Project ❤️](#support-the-project)
- [Acknowledgments 🙏](#acknowledgments)
- [Folder Emojis 🇦🇹](#folder-emoji-technical-note)
- [API 🏗️](#api)
- [ToT, and Todo](#train-of-thought-for-this-project-and-todo)
---
@@ -387,6 +389,35 @@ Thanks for choosing MarkdownBlog! If you find it useful, please:
---
## API
MarkdownBlog provides a built-in RESTful API to serve post data, handle live updates, and support integrations. The API is used internally by the frontend to fetch posts, stream updates (for live reloads), and manage features like emoji and pin assignments. You can also interact with these endpoints to build custom tools or integrations.
Key API endpoints include:
- `/api/posts`: Fetch all blog posts as JSON.
- `/api/posts/[slug]`: Fetch a single post by its slug.
- `/api/posts/stream`: Server-Sent Events (SSE) endpoint for real-time updates when posts change.
- `/api/posts/webhook`: Webhook endpoint to notify the app of external changes (e.g., from CI/CD or scripts).
All API routes are implemented using Next.js API routes and are available out of the box. For more details, check the code in the `src/app/api/posts/` directory.
--
## Train of Thought for this Project and Todo
Ok, so when I originally did this (about a week ago speaking from 24.6.25), I really had no thought of this being a huge thing. But reallistically speaking, this Repository is 2MiB large. And its bloated. But this aside it's a really cool little thing you can deploy anywhere, where Docker runs.
If you have seen this is not very mindfull of browser resources tho.
|<span style="color:pink;">IS DONE</span>|Task|
|-------|----|
|<span style="color:red;">partly</span> / <span style="color:orange;">working on it</span>|_Rewrite_ the Markdown Parser in **Rust** ; This works for local Builds but in Docker does not work due to permission error|
---
<!--Markdown Image :heart:-->
<img src="https://blog.cyon.ch/wp-content/uploads/2016/05/i-love-markdown.png" alt="I looooooove Markdown" style="display:block;margin:0 auto;">

View File

@@ -256,6 +256,16 @@ export default function ManagePage() {
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" />
</svg>
</button>
<Link
href="/admin/manage/rust-status"
className="px-4 py-3 sm:py-2 bg-teal-600 text-white rounded hover:bg-teal-700 transition-colors text-base font-medium flex items-center"
title="Rust Parser Status"
>
<svg className="h-5 w-5 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 16h-1v-4h-1m1-4h.01M12 20a8 8 0 100-16 8 8 0 000 16z" />
</svg>
Rust Parser Status
</Link>
<button
onClick={handleLogout}
className="px-4 py-3 sm:py-2 bg-red-600 text-white rounded hover:bg-red-700 text-base font-medium"

View File

@@ -0,0 +1,74 @@
import React, { useEffect, useState } from 'react';
interface PostStats {
slug: string;
cache_hits: number;
cache_misses: number;
last_interpret_time_ms: number;
last_compile_time_ms: number;
}
export default function RustStatusPage() {
const [stats, setStats] = useState<PostStats[]>([]);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const fetchStats = async () => {
setLoading(true);
setError(null);
try {
const res = await fetch('/api/admin/posts?rsparseinfo=1');
if (!res.ok) throw new Error('Failed to fetch stats');
const data = await res.json();
setStats(data);
} catch (e: any) {
setError(e.message || 'Unknown error');
} finally {
setLoading(false);
}
};
useEffect(() => {
fetchStats();
const interval = setInterval(fetchStats, 5000);
return () => clearInterval(interval);
}, []);
return (
<div className="p-8 max-w-4xl mx-auto">
<h1 className="text-2xl font-bold mb-6">Rust Parser Status</h1>
{loading && <div>Loading...</div>}
{error && <div className="text-red-500">{error}</div>}
{!loading && !error && (
<div className="overflow-x-auto">
<table className="min-w-full border border-gray-300 bg-white shadow-md rounded">
<thead>
<tr className="bg-gray-100">
<th className="px-4 py-2 text-left">Slug</th>
<th className="px-4 py-2 text-right">Cache Hits</th>
<th className="px-4 py-2 text-right">Cache Misses</th>
<th className="px-4 py-2 text-right">Last Interpret Time (ms)</th>
<th className="px-4 py-2 text-right">Last Compile Time (ms)</th>
</tr>
</thead>
<tbody>
{stats.length === 0 ? (
<tr><td colSpan={5} className="text-center py-4">No stats available.</td></tr>
) : (
stats.map(stat => (
<tr key={stat.slug} className="border-t">
<td className="px-4 py-2 font-mono">{stat.slug}</td>
<td className="px-4 py-2 text-right">{stat.cache_hits}</td>
<td className="px-4 py-2 text-right">{stat.cache_misses}</td>
<td className="px-4 py-2 text-right">{stat.last_interpret_time_ms}</td>
<td className="px-4 py-2 text-right">{stat.last_compile_time_ms}</td>
</tr>
))
)}
</tbody>
</table>
</div>
)}
</div>
);
}

View File

@@ -0,0 +1,201 @@
'use client';
import React, { useEffect, useState } from 'react';
import { Bar } from 'react-chartjs-2';
import {
Chart as ChartJS,
CategoryScale,
LinearScale,
BarElement,
Title,
Tooltip,
Legend,
ChartOptions,
} from 'chart.js';
ChartJS.register(CategoryScale, LinearScale, BarElement, Title, Tooltip, Legend);
interface PostStats {
slug: string;
cache_hits: number;
cache_misses: number;
last_interpret_time_ms: number;
last_compile_time_ms: number;
}
export default function RustStatusPage() {
const [stats, setStats] = useState<PostStats[]>([]);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const [autoRefresh, setAutoRefresh] = useState(false);
const autoRefreshRef = React.useRef<NodeJS.Timeout | null>(null);
const fetchStats = async () => {
setLoading(true);
setError(null);
try {
const res = await fetch('/api/admin/posts?rsparseinfo=1');
if (!res.ok) throw new Error('Failed to fetch stats');
const data = await res.json();
setStats(data);
} catch (e: any) {
setError(e.message || 'Unknown error');
} finally {
setLoading(false);
}
};
React.useEffect(() => {
fetchStats();
// Listen for post changes via BroadcastChannel
let bc: BroadcastChannel | null = null;
if (typeof window !== 'undefined' && 'BroadcastChannel' in window) {
bc = new BroadcastChannel('posts-changed');
bc.onmessage = (event) => {
if (event.data === 'changed') {
fetchStats();
}
};
}
return () => {
if (bc) bc.close();
if (autoRefreshRef.current) clearInterval(autoRefreshRef.current);
};
}, []);
// Handle auto-refresh toggle
React.useEffect(() => {
if (autoRefresh) {
autoRefreshRef.current = setInterval(fetchStats, 2000);
} else if (autoRefreshRef.current) {
clearInterval(autoRefreshRef.current);
autoRefreshRef.current = null;
}
return () => {
if (autoRefreshRef.current) clearInterval(autoRefreshRef.current);
};
}, [autoRefresh]);
// Dashboard summary calculations
const totalHits = stats.reduce((sum, s) => sum + s.cache_hits, 0);
const totalMisses = stats.reduce((sum, s) => sum + s.cache_misses, 0);
const avgInterpret = stats.length ? (stats.reduce((sum, s) => sum + s.last_interpret_time_ms, 0) / stats.length).toFixed(1) : 0;
const avgCompile = stats.length ? (stats.reduce((sum, s) => sum + s.last_compile_time_ms, 0) / stats.length).toFixed(1) : 0;
// Chart data
const chartData = {
labels: stats.map(s => s.slug),
datasets: [
{
label: 'Cache Hits',
data: stats.map(s => s.cache_hits),
backgroundColor: 'rgba(34,197,94,0.7)',
},
{
label: 'Cache Misses',
data: stats.map(s => s.cache_misses),
backgroundColor: 'rgba(239,68,68,0.7)',
},
],
};
const chartOptions: ChartOptions<'bar'> = {
responsive: true,
plugins: {
legend: { position: 'top' },
title: { display: true, text: 'Cache Hits & Misses per Post' },
},
scales: {
x: { stacked: true },
y: { stacked: true, beginAtZero: true },
},
};
return (
<div className="p-8 max-w-6xl mx-auto">
<h1 className="text-3xl font-bold mb-8 text-center">Rust Parser Dashboard</h1>
<div className="flex justify-end gap-4 mb-4">
<button
onClick={fetchStats}
className="px-4 py-2 bg-blue-600 text-white rounded shadow hover:bg-blue-700"
>
Refresh
</button>
<label className="flex items-center gap-2 cursor-pointer">
<input
type="checkbox"
checked={autoRefresh}
onChange={e => setAutoRefresh(e.target.checked)}
className="form-checkbox"
/>
<span className="text-sm">Auto-refresh every 2s</span>
</label>
</div>
{loading && (
<div className="flex flex-col items-center justify-center h-64">
<div className="animate-spin rounded-full h-12 w-12 border-b-2 border-gray-900 mb-4"></div>
<div className="text-lg">Loading stats...</div>
</div>
)}
{error && (
<div className="text-red-500 text-center text-lg">{error}</div>
)}
{!loading && !error && (
<>
{/* Summary Cards */}
<div className="grid grid-cols-1 md:grid-cols-4 gap-6 mb-8">
<div className="bg-green-100 rounded-lg p-6 flex flex-col items-center shadow">
<span className="text-2xl font-bold text-green-700">{totalHits}</span>
<span className="text-gray-700 mt-2">Total Cache Hits</span>
</div>
<div className="bg-red-100 rounded-lg p-6 flex flex-col items-center shadow">
<span className="text-2xl font-bold text-red-700">{totalMisses}</span>
<span className="text-gray-700 mt-2">Total Cache Misses</span>
</div>
<div className="bg-blue-100 rounded-lg p-6 flex flex-col items-center shadow">
<span className="text-2xl font-bold text-blue-700">{avgInterpret} ms</span>
<span className="text-gray-700 mt-2">Avg Interpret Time</span>
</div>
<div className="bg-purple-100 rounded-lg p-6 flex flex-col items-center shadow">
<span className="text-2xl font-bold text-purple-700">{avgCompile} ms</span>
<span className="text-gray-700 mt-2">Avg Compile Time</span>
</div>
</div>
{/* Bar Chart */}
<div className="bg-white rounded-lg shadow p-6 mb-10">
<Bar data={chartData} options={chartOptions} height={120} />
</div>
{/* Raw Data Table */}
<div className="overflow-x-auto">
<table className="min-w-full border border-gray-300 bg-white shadow-md rounded">
<thead>
<tr className="bg-gray-100">
<th className="px-4 py-2 text-left">Slug</th>
<th className="px-4 py-2 text-right">Cache Hits</th>
<th className="px-4 py-2 text-right">Cache Misses</th>
<th className="px-4 py-2 text-right">Last Interpret Time (ms)</th>
<th className="px-4 py-2 text-right">Last Compile Time (ms)</th>
</tr>
</thead>
<tbody>
{stats.length === 0 ? (
<tr><td colSpan={5} className="text-center py-4">No stats available.</td></tr>
) : (
stats.map(stat => (
<tr key={stat.slug} className="border-t">
<td className="px-4 py-2 font-mono">{stat.slug}</td>
<td className="px-4 py-2 text-right">{stat.cache_hits}</td>
<td className="px-4 py-2 text-right">{stat.cache_misses}</td>
<td className="px-4 py-2 text-right">{stat.last_interpret_time_ms}</td>
<td className="px-4 py-2 text-right">{stat.last_compile_time_ms}</td>
</tr>
))
)}
</tbody>
</table>
</div>
</>
)}
</div>
);
}

View File

@@ -3,6 +3,7 @@ import fs from 'fs';
import path from 'path';
import matter from 'gray-matter';
import { getPostsDirectory } from '@/lib/postsDirectory';
import { spawnSync } from 'child_process';
const postsDirectory = getPostsDirectory();
@@ -48,6 +49,27 @@ export async function POST(request: Request) {
}
export async function GET(request: Request) {
const { searchParams } = new URL(request.url);
const info = searchParams.get('rsparseinfo');
if (info === '1') {
// Call the Rust backend for parser stats
const rustResult = spawnSync(
process.cwd() + '/markdown_backend/target/release/markdown_backend',
['rsparseinfo'],
{ encoding: 'utf-8' }
);
if (rustResult.status === 0 && rustResult.stdout) {
return new Response(rustResult.stdout, {
status: 200,
headers: { 'Content-Type': 'application/json' },
});
} else {
return new Response(JSON.stringify({ error: rustResult.stderr || rustResult.error }), {
status: 500,
headers: { 'Content-Type': 'application/json' },
});
}
}
// Return the current pinned.json object
try {
const pinnedPath = path.join(process.cwd(), 'posts', 'pinned.json');

View File

@@ -9,6 +9,7 @@ import DOMPurify from 'dompurify';
import { JSDOM } from 'jsdom';
import hljs from 'highlight.js';
import { getPostsDirectory } from '@/lib/postsDirectory';
import { spawnSync } from 'child_process';
const postsDirectory = getPostsDirectory();
@@ -52,6 +53,29 @@ marked.setOptions({
async function getPostBySlug(slug: string) {
const realSlug = slug.replace(/\.md$/, '');
const fullPath = path.join(postsDirectory, `${realSlug}.md`);
let rustResult;
try {
// Try Rust backend first
rustResult = spawnSync(
path.resolve(process.cwd(), 'markdown_backend/target/release/markdown_backend'),
['show', realSlug],
{ encoding: 'utf-8' }
);
if (rustResult.status === 0 && rustResult.stdout) {
// Expect Rust to output a JSON object matching the post shape
const post = JSON.parse(rustResult.stdout);
// Map snake_case to camelCase for frontend compatibility
post.createdAt = post.created_at;
delete post.created_at;
return post;
} else {
console.error('[Rust parser error]', rustResult.stderr || rustResult.error);
}
} catch (e) {
console.error('[Rust parser exception]', e);
}
// Fallback to TypeScript parser
const fileContents = fs.readFileSync(fullPath, 'utf8');
const { data, content } = matter(fileContents);
const createdAt = getFileCreationDate(fullPath);
@@ -60,12 +84,8 @@ async function getPostBySlug(slug: string) {
try {
// Convert markdown to HTML
const rawHtml = marked.parse(content);
// Create a DOM window for DOMPurify
const window = new JSDOM('').window;
const purify = DOMPurify(window);
// Sanitize the HTML
processedContent = purify.sanitize(rawHtml as string, {
ALLOWED_TAGS: [
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
@@ -80,11 +100,10 @@ async function getPostBySlug(slug: string) {
'src', 'alt', 'title', 'width', 'height',
'frameborder', 'allowfullscreen'
],
ALLOWED_URI_REGEXP: /^(?:(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp):|[^a-z]|[a-z+.\-]+(?:[^a-z+.\-:]|$))/i
ALLOWED_URI_REGEXP: /^(?:(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp):|[^a-z]|[a-z+.-]+(?:[^a-z+.-:]|$))/i
});
} catch (err) {
console.error(`Error processing markdown for slug "${realSlug}":`, err);
// Return a more informative error message in the content
processedContent = `<div class="error-message">
<p>Error processing markdown content. Please check the console for details.</p>
<pre>${err instanceof Error ? err.message : 'Unknown error'}</pre>
@@ -107,11 +126,40 @@ export async function GET(
request: Request,
{ params }: { params: { slug: string[] | string } }
) {
let parser = 'typescript';
let rustError = '';
try {
const slugArr = Array.isArray(params.slug) ? params.slug : [params.slug];
const slugPath = slugArr.join('/');
const post = await getPostBySlug(slugPath);
return NextResponse.json(post);
let post;
try {
const rustResult = spawnSync(
path.resolve(process.cwd(), 'markdown_backend/target/release/markdown_backend'),
['show', slugPath],
{ encoding: 'utf-8' }
);
if (rustResult.status === 0 && rustResult.stdout) {
post = JSON.parse(rustResult.stdout);
post.createdAt = post.created_at;
delete post.created_at;
parser = 'rust';
} else {
rustError = rustResult.stderr || rustResult.error?.toString() || 'Unknown error';
console.error('[Rust parser error]', rustError);
}
} catch (e) {
rustError = e instanceof Error ? e.message : String(e);
console.error('[Rust parser exception]', rustError);
}
if (!post) {
post = await getPostBySlug(slugPath);
}
const response = NextResponse.json(post);
response.headers.set('X-Parser', parser);
if (parser !== 'rust' && rustError) {
response.headers.set('X-Rust-Parser-Error', rustError);
}
return response;
} catch (error) {
console.error('Error loading post:', error);
return NextResponse.json(

View File

@@ -33,6 +33,7 @@ export default function Home() {
const [search, setSearch] = useState('');
const [isLoading, setIsLoading] = useState(false);
const [lastUpdate, setLastUpdate] = useState<Date | null>(null);
const [error, setError] = useState<string | null>(null);
// Get blog owner from env
const blogOwner = process.env.NEXT_PUBLIC_BLOG_OWNER || 'Anonymous';
@@ -99,12 +100,17 @@ export default function Home() {
const loadTree = async () => {
try {
setIsLoading(true);
setError(null);
const response = await fetch('/api/posts');
if (!response.ok) {
throw new Error(`API error: ${response.status}`);
}
const data = await response.json();
setTree(data);
setLastUpdate(new Date());
} catch (error) {
console.error('Fehler beim Laden der Beiträge:', error);
setError(error instanceof Error ? error.message : String(error));
} finally {
setIsLoading(false);
}
@@ -168,6 +174,12 @@ export default function Home() {
return (
<main className="container mx-auto px-3 sm:px-4 py-4 sm:py-8">
{/* Error display */}
{error && (
<div className="mb-4 p-4 bg-red-100 text-red-800 rounded">
<strong>Fehler:</strong> {error}
</div>
)}
{/* Mobile-first header section */}
<div className="mb-6 sm:mb-8 space-y-4 sm:space-y-0 sm:flex sm:flex-row sm:gap-4 sm:items-center sm:justify-between">
<h1 className="text-2xl sm:text-3xl md:text-4xl font-bold text-center sm:text-left">{blogOwner}&apos;s Blog</h1>

View File

@@ -14,6 +14,23 @@ interface Post {
createdAt: string;
}
// Runtime statistics for parser usage
const parserStats = {
rust: 0,
typescript: 0,
lastRustError: '',
};
// Add a slugify function that matches Rust's slug::slugify
function slugify(text: string): string {
return text
.toLowerCase()
.normalize('NFKD')
.replace(/[\u0300-\u036F]/g, '') // Remove diacritics
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '');
}
export default function PostPage({ params }: { params: { slug: string[] } }) {
const [post, setPost] = useState<Post | null>(null);
// Modal state for zoomed image
@@ -308,35 +325,49 @@ export default function PostPage({ params }: { params: { slug: string[] } }) {
}
}
// Find the element, but only consider visible ones
const allElements = document.querySelectorAll(`#${id}`);
// Try to find the element by the raw ID first
let allElements = document.querySelectorAll(`#${id}`);
let element: HTMLElement | null = null;
// Check if we're on desktop or mobile
const isDesktop = window.innerWidth >= 640;
for (const el of Array.from(allElements)) {
const htmlEl = el as HTMLElement;
// Check if the element is visible (not hidden by CSS)
const rect = htmlEl.getBoundingClientRect();
const isVisible = rect.width > 0 && rect.height > 0;
if (isVisible) {
element = htmlEl;
break;
}
}
if (element) {
console.log('Found target element:', element.textContent?.substring(0, 50));
console.log('Found target element (raw id):', element.textContent?.substring(0, 50));
scrollToElement(element);
} else if (retryCount < 5) {
return;
}
// If not found, try slugified version
const slugId = slugify(id);
if (slugId !== id) {
allElements = document.querySelectorAll(`#${slugId}`);
for (const el of Array.from(allElements)) {
const htmlEl = el as HTMLElement;
const rect = htmlEl.getBoundingClientRect();
const isVisible = rect.width > 0 && rect.height > 0;
if (isVisible) {
element = htmlEl;
break;
}
}
if (element) {
console.log('Found target element (slugified id):', element.textContent?.substring(0, 50));
scrollToElement(element);
return;
}
}
if (retryCount < 5) {
console.log(`Element not found for anchor: ${id}, retrying... (${retryCount + 1}/5)`);
setTimeout(() => {
findAndScrollToElement(id, retryCount + 1);
}, 100);
} else {
console.warn(`Element with id "${id}" not found after retries`);
console.warn(`Element with id "${id}" (or slugified "${slugId}") not found after retries`);
}
};
@@ -648,6 +679,20 @@ export default function PostPage({ params }: { params: { slug: string[] } }) {
const loadPost = async () => {
try {
const response = await fetch(`/api/posts/${encodeURIComponent(slugPath)}`);
const parser = response.headers.get('X-Parser');
const rustError = response.headers.get('X-Rust-Parser-Error');
if (parser === 'rust') {
parserStats.rust++;
console.log('%c[Rust Parser] Used for this post.', 'color: green; font-weight: bold');
} else {
parserStats.typescript++;
console.log('%c[TypeScript Parser] Used for this post.', 'color: orange; font-weight: bold');
if (rustError) {
parserStats.lastRustError = rustError;
console.warn('[Rust Parser Error]', rustError);
}
}
console.info('[Parser Stats]', parserStats);
const data = await response.json();
setPost(data);
} catch (error) {

View File

@@ -1,3 +1,12 @@
// This is the frontend Markdown parser.
// It is written in TypeScript
// While I was writing this, only I and God knew how it works.
// Now, only God knows.
//
// If you are trying to understand how it works , and optimize it. Please increse the counter
//
// Hours wasted here: 12
import fs from 'fs';
import path from 'path';
import matter from 'gray-matter';
@@ -221,8 +230,25 @@ export function watchPosts(callback: () => void) {
onChangeCallback = callback;
watcher = chokidar.watch(postsDirectory, {
ignored: /(^|[\/\\])\../, // ignore dotfiles
persistent: true
ignored: [
/(^|[\/\\])\../, // ignore dotfiles
/node_modules/,
/\.git/,
/\.next/,
/\.cache/,
/\.DS_Store/,
/Thumbs\.db/,
/\.tmp$/,
/\.temp$/
],
persistent: true,
ignoreInitial: true, // Don't trigger on initial scan
awaitWriteFinish: {
stabilityThreshold: 1000, // Wait 1 second after file changes
pollInterval: 100 // Check every 100ms
},
usePolling: false, // Use native file system events when possible
interval: 1000 // Fallback polling interval (only used if native events fail)
});
watcher
@@ -235,20 +261,6 @@ function handleFileChange() {
if (onChangeCallback) {
onChangeCallback();
}
// Also notify via webhook if available
try {
fetch('/api/posts/webhook', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ type: 'update', timestamp: new Date().toISOString() })
}).catch(error => {
// Webhook is optional, so we don't need to handle this as a critical error
console.debug('Webhook notification failed:', error);
});
} catch (error) {
// Ignore webhook errors
}
}
export function stopWatching() {