From 5ad73485ce144d4ddbc734c22dba0dc382ec0d92 Mon Sep 17 00:00:00 2001 From: ZockerKatze Date: Tue, 24 Jun 2025 10:23:34 +0200 Subject: [PATCH] rust parser working on local build. fix docker build --- .gitignore | 6 + Dockerfile | 12 ++ markdown_backend/Cargo.toml | 18 +++ markdown_backend/src/main.rs | 69 ++++++++++++ markdown_backend/src/markdown.rs | 181 ++++++++++++++++++++++++++++++ posts/welcome.md | 31 +++++ src/app/api/posts/[slug]/route.ts | 64 +++++++++-- src/app/page.tsx | 12 ++ src/app/posts/[...slug]/page.tsx | 21 ++++ src/lib/markdown.ts | 35 +++--- 10 files changed, 425 insertions(+), 24 deletions(-) create mode 100644 markdown_backend/Cargo.toml create mode 100644 markdown_backend/src/main.rs create mode 100644 markdown_backend/src/markdown.rs diff --git a/.gitignore b/.gitignore index da35784..68ce977 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,9 @@ posts/pinned.json posts/Aquaworld/tag-1.md posts/pinned.json posts/pinned.json + +# Rust +target/ +Cargo.lock +**/*.rs.bk +*.pdb diff --git a/Dockerfile b/Dockerfile index 895faeb..cda0298 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,12 @@ +# syntax=docker/dockerfile:1 +FROM rust:latest as rust-build +WORKDIR /build +COPY ./markdown_backend ./markdown_backend +WORKDIR /build/markdown_backend +RUN rustup target add x86_64-unknown-linux-musl +RUN apt-get update && apt-get install -y musl-tools +RUN cargo build --release --target x86_64-unknown-linux-musl + FROM node:20 WORKDIR /app @@ -8,6 +17,9 @@ RUN npm install COPY . . +# Copy the Rust binary from the build stage +COPY --from=rust-build /build/markdown_backend/target/release/markdown_backend ./markdown_backend/target/release/markdown_backend + RUN npm run build VOLUME ["/app/docker"] diff --git a/markdown_backend/Cargo.toml b/markdown_backend/Cargo.toml new file mode 100644 index 0000000..31e5e68 --- /dev/null +++ b/markdown_backend/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "markdown_backend" +version = "0.1.0" +edition = "2021" + +[dependencies] +chrono = { version = "0.4", features = ["serde"] } +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9" +pulldown-cmark = "0.9" +gray_matter = "0.2.8" +ammonia = "3.1" +slug = "0.1" +notify = "6.1" +syntect = { version = "5.1", features = ["default"] } +regex = "1.10" +clap = { version = "4.4", features = ["derive"] } +serde_json = "1.0" \ No newline at end of file diff --git a/markdown_backend/src/main.rs b/markdown_backend/src/main.rs new file mode 100644 index 0000000..6e7b80c --- /dev/null +++ b/markdown_backend/src/main.rs @@ -0,0 +1,69 @@ +use clap::{Parser, Subcommand}; +mod markdown; +use markdown::{get_all_posts, get_post_by_slug, get_posts_by_tag, watch_posts}; +use serde_json; + +#[derive(Parser)] +#[command(name = "Markdown Backend")] +#[command(about = "A CLI for managing markdown blog posts", long_about = None)] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand)] +enum Commands { + /// List all posts + List, + /// Show a post by slug + Show { + slug: String, + }, + /// List posts by tag + Tags { + tag: String, + }, + /// Watch for changes in the posts directory + Watch, +} + +fn main() { + let cli = Cli::parse(); + match &cli.command { + Commands::List => { + let posts = get_all_posts().unwrap_or_else(|e| { + eprintln!("{}", e); + std::process::exit(1); + }); + println!("{}", serde_json::to_string(&posts).unwrap()); + } + Commands::Show { slug } => { + match get_post_by_slug(slug) { + Ok(post) => { + println!("{}", serde_json::to_string(&post).unwrap()); + } + Err(e) => { + eprintln!("{}", e); + std::process::exit(1); + } + } + } + Commands::Tags { tag } => { + let posts = get_posts_by_tag(tag).unwrap_or_else(|e| { + eprintln!("{}", e); + std::process::exit(1); + }); + println!("{}", serde_json::to_string(&posts).unwrap()); + } + Commands::Watch => { + println!("Watching for changes in posts directory. Press Ctrl+C to exit."); + let _ = watch_posts(|| { + println!("Posts directory changed!"); + }); + // Keep the main thread alive + loop { + std::thread::sleep(std::time::Duration::from_secs(60)); + } + } + } +} \ No newline at end of file diff --git a/markdown_backend/src/markdown.rs b/markdown_backend/src/markdown.rs new file mode 100644 index 0000000..f5c2540 --- /dev/null +++ b/markdown_backend/src/markdown.rs @@ -0,0 +1,181 @@ +// src/markdown.rs + +use std::fs; +use std::path::{Path, PathBuf}; +use chrono::{DateTime, Utc}; +use serde::Deserialize; +use pulldown_cmark::{Parser, Options, html}; +use gray_matter::engine::YAML; +use gray_matter::Matter; +use ammonia::clean; +use slug::slugify; +use notify::{RecursiveMode, RecommendedWatcher, Watcher, Config}; +use std::sync::mpsc::channel; +use std::time::Duration; +use syntect::highlighting::{ThemeSet, Style}; +use syntect::parsing::SyntaxSet; +use syntect::html::{highlighted_html_for_string, IncludeBackground}; + +#[derive(Debug, Deserialize, Clone, serde::Serialize)] +pub struct PostFrontmatter { + pub title: String, + pub date: String, + pub tags: Option>, + pub summary: Option, +} + +#[derive(Debug, Clone, serde::Serialize)] +pub struct Post { + pub slug: String, + pub title: String, + pub date: String, + pub tags: Vec, + pub summary: Option, + pub content: String, + pub created_at: String, + pub author: String, +} + +fn get_posts_directory() -> PathBuf { + let candidates = [ + "./posts", + "../posts", + "/posts", + "/docker" + ]; + for candidate in candidates.iter() { + let path = PathBuf::from(candidate); + if path.exists() && path.is_dir() { + return path; + } + } + // Fallback: default to ./posts + PathBuf::from("./posts") +} + +fn get_file_creation_date(path: &Path) -> std::io::Result> { + let metadata = fs::metadata(path)?; + let created = metadata.created()?; + Ok(DateTime::::from(created)) +} + +fn process_anchor_links(content: &str) -> String { + // Replace [text](#anchor) with slugified anchor + let re = regex::Regex::new(r"\[([^\]]+)\]\(#([^)]+)\)").unwrap(); + re.replace_all(content, |caps: ®ex::Captures| { + let link_text = &caps[1]; + let anchor = &caps[2]; + let slugified = slugify(anchor); + format!("[{}](#{})", link_text, slugified) + }).to_string() +} + +fn highlight_code_blocks(html: &str) -> String { + let ss = SyntaxSet::load_defaults_newlines(); + let ts = ThemeSet::load_defaults(); + let theme = &ts.themes["base16-ocean.dark"]; + + // Simple code block detection and highlighting + // In a real implementation, you'd want to parse the HTML and handle code blocks properly + let re = regex::Regex::new(r#"
([^<]+)
"#).unwrap(); + re.replace_all(html, |caps: ®ex::Captures| { + let lang = &caps[1]; + let code = &caps[2]; + + if let Some(syntax) = ss.find_syntax_by_token(lang) { + match highlighted_html_for_string(code, &ss, syntax, theme) { + Ok(highlighted) => highlighted, + Err(_) => caps[0].to_string(), + } + } else { + caps[0].to_string() + } + }).to_string() +} + +pub fn get_post_by_slug(slug: &str) -> Result> { + let posts_dir = get_posts_directory(); + let file_path = posts_dir.join(format!("{}.md", slug)); + let file_content = fs::read_to_string(&file_path)?; + + let matter = Matter::::new(); + let result = matter.parse(&file_content); + + let front: PostFrontmatter = if let Some(data) = result.data { + match data.deserialize() { + Ok(front) => front, + Err(e) => { + eprintln!("Failed to deserialize frontmatter for post {}: {}", slug, e); + return Err("Failed to deserialize frontmatter".into()); + } + } + } else { + eprintln!("No frontmatter found for post: {}", slug); + return Err("No frontmatter found".into()); + }; + + let created_at = get_file_creation_date(&file_path)?; + + let processed_markdown = process_anchor_links(&result.content); + let mut html_output = String::new(); + let parser = Parser::new_ext(&processed_markdown, Options::all()); + html::push_html(&mut html_output, parser); + + let highlighted_html = highlight_code_blocks(&html_output); + let sanitized_html = clean(&highlighted_html); + + Ok(Post { + slug: slug.to_string(), + title: front.title, + date: front.date, + tags: front.tags.unwrap_or_default(), + summary: front.summary, + content: sanitized_html, + created_at: created_at.to_rfc3339(), + author: std::env::var("BLOG_OWNER").unwrap_or_else(|_| "Anonymous".to_string()), + }) +} + +pub fn get_all_posts() -> Result, Box> { + let posts_dir = get_posts_directory(); + let mut posts = Vec::new(); + for entry in fs::read_dir(posts_dir)? { + let entry = entry?; + let path = entry.path(); + if path.extension().map(|e| e == "md").unwrap_or(false) { + let file_stem = path.file_stem().unwrap().to_string_lossy(); + if let Ok(post) = get_post_by_slug(&file_stem) { + posts.push(post); + } + } + } + posts.sort_by(|a, b| b.created_at.cmp(&a.created_at)); + Ok(posts) +} + +pub fn get_posts_by_tag(tag: &str) -> Result, Box> { + let all_posts = get_all_posts()?; + Ok(all_posts.into_iter().filter(|p| p.tags.contains(&tag.to_string())).collect()) +} + +pub fn watch_posts(on_change: F) -> notify::Result { + let (tx, rx) = channel(); + let mut watcher = RecommendedWatcher::new(tx, Config::default())?; + watcher.watch(get_posts_directory().as_path(), RecursiveMode::Recursive)?; + + std::thread::spawn(move || { + loop { + match rx.recv() { + Ok(_event) => { + on_change(); + }, + Err(e) => { + eprintln!("watch error: {:?}", e); + break; + } + } + } + }); + + Ok(watcher) +} \ No newline at end of file diff --git a/posts/welcome.md b/posts/welcome.md index 22f837e..7ed70e3 100644 --- a/posts/welcome.md +++ b/posts/welcome.md @@ -29,6 +29,8 @@ author: Rattatwinko - [Support the Project ❤️](#support-the-project) - [Acknowledgments 🙏](#acknowledgments) - [Folder Emojis 🇦🇹](#folder-emoji-technical-note) +- [API 🏗️](#api) +- [ToT, and Todo](#train-of-thought-for-this-project-and-todo) --- @@ -387,6 +389,35 @@ Thanks for choosing MarkdownBlog! If you find it useful, please: --- +## API + +MarkdownBlog provides a built-in RESTful API to serve post data, handle live updates, and support integrations. The API is used internally by the frontend to fetch posts, stream updates (for live reloads), and manage features like emoji and pin assignments. You can also interact with these endpoints to build custom tools or integrations. + +Key API endpoints include: + +- `/api/posts`: Fetch all blog posts as JSON. +- `/api/posts/[slug]`: Fetch a single post by its slug. +- `/api/posts/stream`: Server-Sent Events (SSE) endpoint for real-time updates when posts change. +- `/api/posts/webhook`: Webhook endpoint to notify the app of external changes (e.g., from CI/CD or scripts). + +All API routes are implemented using Next.js API routes and are available out of the box. For more details, check the code in the `src/app/api/posts/` directory. + + +-- + +## Train of Thought for this Project and Todo + +Ok, so when I originally did this (about a week ago speaking from 24.6.25), I really had no thought of this being a huge thing. But reallistically speaking, this Repository is 2MiB large. And its bloated. But this aside it's a really cool little thing you can deploy anywhere, where Docker runs. + +If you have seen this is not very mindfull of browser resources tho. + +|IS DONE|Task| +|-------|----| +|partly / working on it|_Rewrite_ the Markdown Parser in **Rust** ; This works for local Builds but in Docker does not work due to permission error| + +--- + + I looooooove Markdown diff --git a/src/app/api/posts/[slug]/route.ts b/src/app/api/posts/[slug]/route.ts index c6e2273..16c7f21 100644 --- a/src/app/api/posts/[slug]/route.ts +++ b/src/app/api/posts/[slug]/route.ts @@ -9,6 +9,7 @@ import DOMPurify from 'dompurify'; import { JSDOM } from 'jsdom'; import hljs from 'highlight.js'; import { getPostsDirectory } from '@/lib/postsDirectory'; +import { spawnSync } from 'child_process'; const postsDirectory = getPostsDirectory(); @@ -52,6 +53,29 @@ marked.setOptions({ async function getPostBySlug(slug: string) { const realSlug = slug.replace(/\.md$/, ''); const fullPath = path.join(postsDirectory, `${realSlug}.md`); + let rustResult; + try { + // Try Rust backend first + rustResult = spawnSync( + path.resolve(process.cwd(), 'markdown_backend/target/release/markdown_backend'), + ['show', realSlug], + { encoding: 'utf-8' } + ); + if (rustResult.status === 0 && rustResult.stdout) { + // Expect Rust to output a JSON object matching the post shape + const post = JSON.parse(rustResult.stdout); + // Map snake_case to camelCase for frontend compatibility + post.createdAt = post.created_at; + delete post.created_at; + return post; + } else { + console.error('[Rust parser error]', rustResult.stderr || rustResult.error); + } + } catch (e) { + console.error('[Rust parser exception]', e); + } + + // Fallback to TypeScript parser const fileContents = fs.readFileSync(fullPath, 'utf8'); const { data, content } = matter(fileContents); const createdAt = getFileCreationDate(fullPath); @@ -60,12 +84,8 @@ async function getPostBySlug(slug: string) { try { // Convert markdown to HTML const rawHtml = marked.parse(content); - - // Create a DOM window for DOMPurify const window = new JSDOM('').window; const purify = DOMPurify(window); - - // Sanitize the HTML processedContent = purify.sanitize(rawHtml as string, { ALLOWED_TAGS: [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', @@ -80,11 +100,10 @@ async function getPostBySlug(slug: string) { 'src', 'alt', 'title', 'width', 'height', 'frameborder', 'allowfullscreen' ], - ALLOWED_URI_REGEXP: /^(?:(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp):|[^a-z]|[a-z+.\-]+(?:[^a-z+.\-:]|$))/i + ALLOWED_URI_REGEXP: /^(?:(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp):|[^a-z]|[a-z+.-]+(?:[^a-z+.-:]|$))/i }); } catch (err) { console.error(`Error processing markdown for slug "${realSlug}":`, err); - // Return a more informative error message in the content processedContent = `

Error processing markdown content. Please check the console for details.

${err instanceof Error ? err.message : 'Unknown error'}
@@ -107,11 +126,40 @@ export async function GET( request: Request, { params }: { params: { slug: string[] | string } } ) { + let parser = 'typescript'; + let rustError = ''; try { const slugArr = Array.isArray(params.slug) ? params.slug : [params.slug]; const slugPath = slugArr.join('/'); - const post = await getPostBySlug(slugPath); - return NextResponse.json(post); + let post; + try { + const rustResult = spawnSync( + path.resolve(process.cwd(), 'markdown_backend/target/release/markdown_backend'), + ['show', slugPath], + { encoding: 'utf-8' } + ); + if (rustResult.status === 0 && rustResult.stdout) { + post = JSON.parse(rustResult.stdout); + post.createdAt = post.created_at; + delete post.created_at; + parser = 'rust'; + } else { + rustError = rustResult.stderr || rustResult.error?.toString() || 'Unknown error'; + console.error('[Rust parser error]', rustError); + } + } catch (e) { + rustError = e instanceof Error ? e.message : String(e); + console.error('[Rust parser exception]', rustError); + } + if (!post) { + post = await getPostBySlug(slugPath); + } + const response = NextResponse.json(post); + response.headers.set('X-Parser', parser); + if (parser !== 'rust' && rustError) { + response.headers.set('X-Rust-Parser-Error', rustError); + } + return response; } catch (error) { console.error('Error loading post:', error); return NextResponse.json( diff --git a/src/app/page.tsx b/src/app/page.tsx index 4f32407..fc5a5e3 100644 --- a/src/app/page.tsx +++ b/src/app/page.tsx @@ -33,6 +33,7 @@ export default function Home() { const [search, setSearch] = useState(''); const [isLoading, setIsLoading] = useState(false); const [lastUpdate, setLastUpdate] = useState(null); + const [error, setError] = useState(null); // Get blog owner from env const blogOwner = process.env.NEXT_PUBLIC_BLOG_OWNER || 'Anonymous'; @@ -99,12 +100,17 @@ export default function Home() { const loadTree = async () => { try { setIsLoading(true); + setError(null); const response = await fetch('/api/posts'); + if (!response.ok) { + throw new Error(`API error: ${response.status}`); + } const data = await response.json(); setTree(data); setLastUpdate(new Date()); } catch (error) { console.error('Fehler beim Laden der Beiträge:', error); + setError(error instanceof Error ? error.message : String(error)); } finally { setIsLoading(false); } @@ -168,6 +174,12 @@ export default function Home() { return (
+ {/* Error display */} + {error && ( +
+ Fehler: {error} +
+ )} {/* Mobile-first header section */}

{blogOwner}'s Blog

diff --git a/src/app/posts/[...slug]/page.tsx b/src/app/posts/[...slug]/page.tsx index 1bc16f0..5f52973 100644 --- a/src/app/posts/[...slug]/page.tsx +++ b/src/app/posts/[...slug]/page.tsx @@ -14,6 +14,13 @@ interface Post { createdAt: string; } +// Runtime statistics for parser usage +const parserStats = { + rust: 0, + typescript: 0, + lastRustError: '', +}; + export default function PostPage({ params }: { params: { slug: string[] } }) { const [post, setPost] = useState(null); // Modal state for zoomed image @@ -648,6 +655,20 @@ export default function PostPage({ params }: { params: { slug: string[] } }) { const loadPost = async () => { try { const response = await fetch(`/api/posts/${encodeURIComponent(slugPath)}`); + const parser = response.headers.get('X-Parser'); + const rustError = response.headers.get('X-Rust-Parser-Error'); + if (parser === 'rust') { + parserStats.rust++; + console.log('%c[Rust Parser] Used for this post.', 'color: green; font-weight: bold'); + } else { + parserStats.typescript++; + console.log('%c[TypeScript Parser] Used for this post.', 'color: orange; font-weight: bold'); + if (rustError) { + parserStats.lastRustError = rustError; + console.warn('[Rust Parser Error]', rustError); + } + } + console.info('[Parser Stats]', parserStats); const data = await response.json(); setPost(data); } catch (error) { diff --git a/src/lib/markdown.ts b/src/lib/markdown.ts index 3827190..4a4a8aa 100644 --- a/src/lib/markdown.ts +++ b/src/lib/markdown.ts @@ -221,8 +221,25 @@ export function watchPosts(callback: () => void) { onChangeCallback = callback; watcher = chokidar.watch(postsDirectory, { - ignored: /(^|[\/\\])\../, // ignore dotfiles - persistent: true + ignored: [ + /(^|[\/\\])\../, // ignore dotfiles + /node_modules/, + /\.git/, + /\.next/, + /\.cache/, + /\.DS_Store/, + /Thumbs\.db/, + /\.tmp$/, + /\.temp$/ + ], + persistent: true, + ignoreInitial: true, // Don't trigger on initial scan + awaitWriteFinish: { + stabilityThreshold: 1000, // Wait 1 second after file changes + pollInterval: 100 // Check every 100ms + }, + usePolling: false, // Use native file system events when possible + interval: 1000 // Fallback polling interval (only used if native events fail) }); watcher @@ -235,20 +252,6 @@ function handleFileChange() { if (onChangeCallback) { onChangeCallback(); } - - // Also notify via webhook if available - try { - fetch('/api/posts/webhook', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ type: 'update', timestamp: new Date().toISOString() }) - }).catch(error => { - // Webhook is optional, so we don't need to handle this as a critical error - console.debug('Webhook notification failed:', error); - }); - } catch (error) { - // Ignore webhook errors - } } export function stopWatching() {