From 5ad73485ce144d4ddbc734c22dba0dc382ec0d92 Mon Sep 17 00:00:00 2001 From: ZockerKatze Date: Tue, 24 Jun 2025 10:23:34 +0200 Subject: [PATCH 1/7] rust parser working on local build. fix docker build --- .gitignore | 6 + Dockerfile | 12 ++ markdown_backend/Cargo.toml | 18 +++ markdown_backend/src/main.rs | 69 ++++++++++++ markdown_backend/src/markdown.rs | 181 ++++++++++++++++++++++++++++++ posts/welcome.md | 31 +++++ src/app/api/posts/[slug]/route.ts | 64 +++++++++-- src/app/page.tsx | 12 ++ src/app/posts/[...slug]/page.tsx | 21 ++++ src/lib/markdown.ts | 35 +++--- 10 files changed, 425 insertions(+), 24 deletions(-) create mode 100644 markdown_backend/Cargo.toml create mode 100644 markdown_backend/src/main.rs create mode 100644 markdown_backend/src/markdown.rs diff --git a/.gitignore b/.gitignore index da35784..68ce977 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,9 @@ posts/pinned.json posts/Aquaworld/tag-1.md posts/pinned.json posts/pinned.json + +# Rust +target/ +Cargo.lock +**/*.rs.bk +*.pdb diff --git a/Dockerfile b/Dockerfile index 895faeb..cda0298 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,12 @@ +# syntax=docker/dockerfile:1 +FROM rust:latest as rust-build +WORKDIR /build +COPY ./markdown_backend ./markdown_backend +WORKDIR /build/markdown_backend +RUN rustup target add x86_64-unknown-linux-musl +RUN apt-get update && apt-get install -y musl-tools +RUN cargo build --release --target x86_64-unknown-linux-musl + FROM node:20 WORKDIR /app @@ -8,6 +17,9 @@ RUN npm install COPY . . +# Copy the Rust binary from the build stage +COPY --from=rust-build /build/markdown_backend/target/release/markdown_backend ./markdown_backend/target/release/markdown_backend + RUN npm run build VOLUME ["/app/docker"] diff --git a/markdown_backend/Cargo.toml b/markdown_backend/Cargo.toml new file mode 100644 index 0000000..31e5e68 --- /dev/null +++ b/markdown_backend/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "markdown_backend" +version = "0.1.0" +edition = "2021" + +[dependencies] +chrono = { version = "0.4", features = ["serde"] } +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9" +pulldown-cmark = "0.9" +gray_matter = "0.2.8" +ammonia = "3.1" +slug = "0.1" +notify = "6.1" +syntect = { version = "5.1", features = ["default"] } +regex = "1.10" +clap = { version = "4.4", features = ["derive"] } +serde_json = "1.0" \ No newline at end of file diff --git a/markdown_backend/src/main.rs b/markdown_backend/src/main.rs new file mode 100644 index 0000000..6e7b80c --- /dev/null +++ b/markdown_backend/src/main.rs @@ -0,0 +1,69 @@ +use clap::{Parser, Subcommand}; +mod markdown; +use markdown::{get_all_posts, get_post_by_slug, get_posts_by_tag, watch_posts}; +use serde_json; + +#[derive(Parser)] +#[command(name = "Markdown Backend")] +#[command(about = "A CLI for managing markdown blog posts", long_about = None)] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand)] +enum Commands { + /// List all posts + List, + /// Show a post by slug + Show { + slug: String, + }, + /// List posts by tag + Tags { + tag: String, + }, + /// Watch for changes in the posts directory + Watch, +} + +fn main() { + let cli = Cli::parse(); + match &cli.command { + Commands::List => { + let posts = get_all_posts().unwrap_or_else(|e| { + eprintln!("{}", e); + std::process::exit(1); + }); + println!("{}", serde_json::to_string(&posts).unwrap()); + } + Commands::Show { slug } => { + match get_post_by_slug(slug) { + Ok(post) => { + println!("{}", serde_json::to_string(&post).unwrap()); + } + Err(e) => { + eprintln!("{}", e); + std::process::exit(1); + } + } + } + Commands::Tags { tag } => { + let posts = get_posts_by_tag(tag).unwrap_or_else(|e| { + eprintln!("{}", e); + std::process::exit(1); + }); + println!("{}", serde_json::to_string(&posts).unwrap()); + } + Commands::Watch => { + println!("Watching for changes in posts directory. Press Ctrl+C to exit."); + let _ = watch_posts(|| { + println!("Posts directory changed!"); + }); + // Keep the main thread alive + loop { + std::thread::sleep(std::time::Duration::from_secs(60)); + } + } + } +} \ No newline at end of file diff --git a/markdown_backend/src/markdown.rs b/markdown_backend/src/markdown.rs new file mode 100644 index 0000000..f5c2540 --- /dev/null +++ b/markdown_backend/src/markdown.rs @@ -0,0 +1,181 @@ +// src/markdown.rs + +use std::fs; +use std::path::{Path, PathBuf}; +use chrono::{DateTime, Utc}; +use serde::Deserialize; +use pulldown_cmark::{Parser, Options, html}; +use gray_matter::engine::YAML; +use gray_matter::Matter; +use ammonia::clean; +use slug::slugify; +use notify::{RecursiveMode, RecommendedWatcher, Watcher, Config}; +use std::sync::mpsc::channel; +use std::time::Duration; +use syntect::highlighting::{ThemeSet, Style}; +use syntect::parsing::SyntaxSet; +use syntect::html::{highlighted_html_for_string, IncludeBackground}; + +#[derive(Debug, Deserialize, Clone, serde::Serialize)] +pub struct PostFrontmatter { + pub title: String, + pub date: String, + pub tags: Option>, + pub summary: Option, +} + +#[derive(Debug, Clone, serde::Serialize)] +pub struct Post { + pub slug: String, + pub title: String, + pub date: String, + pub tags: Vec, + pub summary: Option, + pub content: String, + pub created_at: String, + pub author: String, +} + +fn get_posts_directory() -> PathBuf { + let candidates = [ + "./posts", + "../posts", + "/posts", + "/docker" + ]; + for candidate in candidates.iter() { + let path = PathBuf::from(candidate); + if path.exists() && path.is_dir() { + return path; + } + } + // Fallback: default to ./posts + PathBuf::from("./posts") +} + +fn get_file_creation_date(path: &Path) -> std::io::Result> { + let metadata = fs::metadata(path)?; + let created = metadata.created()?; + Ok(DateTime::::from(created)) +} + +fn process_anchor_links(content: &str) -> String { + // Replace [text](#anchor) with slugified anchor + let re = regex::Regex::new(r"\[([^\]]+)\]\(#([^)]+)\)").unwrap(); + re.replace_all(content, |caps: ®ex::Captures| { + let link_text = &caps[1]; + let anchor = &caps[2]; + let slugified = slugify(anchor); + format!("[{}](#{})", link_text, slugified) + }).to_string() +} + +fn highlight_code_blocks(html: &str) -> String { + let ss = SyntaxSet::load_defaults_newlines(); + let ts = ThemeSet::load_defaults(); + let theme = &ts.themes["base16-ocean.dark"]; + + // Simple code block detection and highlighting + // In a real implementation, you'd want to parse the HTML and handle code blocks properly + let re = regex::Regex::new(r#"
([^<]+)
"#).unwrap(); + re.replace_all(html, |caps: ®ex::Captures| { + let lang = &caps[1]; + let code = &caps[2]; + + if let Some(syntax) = ss.find_syntax_by_token(lang) { + match highlighted_html_for_string(code, &ss, syntax, theme) { + Ok(highlighted) => highlighted, + Err(_) => caps[0].to_string(), + } + } else { + caps[0].to_string() + } + }).to_string() +} + +pub fn get_post_by_slug(slug: &str) -> Result> { + let posts_dir = get_posts_directory(); + let file_path = posts_dir.join(format!("{}.md", slug)); + let file_content = fs::read_to_string(&file_path)?; + + let matter = Matter::::new(); + let result = matter.parse(&file_content); + + let front: PostFrontmatter = if let Some(data) = result.data { + match data.deserialize() { + Ok(front) => front, + Err(e) => { + eprintln!("Failed to deserialize frontmatter for post {}: {}", slug, e); + return Err("Failed to deserialize frontmatter".into()); + } + } + } else { + eprintln!("No frontmatter found for post: {}", slug); + return Err("No frontmatter found".into()); + }; + + let created_at = get_file_creation_date(&file_path)?; + + let processed_markdown = process_anchor_links(&result.content); + let mut html_output = String::new(); + let parser = Parser::new_ext(&processed_markdown, Options::all()); + html::push_html(&mut html_output, parser); + + let highlighted_html = highlight_code_blocks(&html_output); + let sanitized_html = clean(&highlighted_html); + + Ok(Post { + slug: slug.to_string(), + title: front.title, + date: front.date, + tags: front.tags.unwrap_or_default(), + summary: front.summary, + content: sanitized_html, + created_at: created_at.to_rfc3339(), + author: std::env::var("BLOG_OWNER").unwrap_or_else(|_| "Anonymous".to_string()), + }) +} + +pub fn get_all_posts() -> Result, Box> { + let posts_dir = get_posts_directory(); + let mut posts = Vec::new(); + for entry in fs::read_dir(posts_dir)? { + let entry = entry?; + let path = entry.path(); + if path.extension().map(|e| e == "md").unwrap_or(false) { + let file_stem = path.file_stem().unwrap().to_string_lossy(); + if let Ok(post) = get_post_by_slug(&file_stem) { + posts.push(post); + } + } + } + posts.sort_by(|a, b| b.created_at.cmp(&a.created_at)); + Ok(posts) +} + +pub fn get_posts_by_tag(tag: &str) -> Result, Box> { + let all_posts = get_all_posts()?; + Ok(all_posts.into_iter().filter(|p| p.tags.contains(&tag.to_string())).collect()) +} + +pub fn watch_posts(on_change: F) -> notify::Result { + let (tx, rx) = channel(); + let mut watcher = RecommendedWatcher::new(tx, Config::default())?; + watcher.watch(get_posts_directory().as_path(), RecursiveMode::Recursive)?; + + std::thread::spawn(move || { + loop { + match rx.recv() { + Ok(_event) => { + on_change(); + }, + Err(e) => { + eprintln!("watch error: {:?}", e); + break; + } + } + } + }); + + Ok(watcher) +} \ No newline at end of file diff --git a/posts/welcome.md b/posts/welcome.md index 22f837e..7ed70e3 100644 --- a/posts/welcome.md +++ b/posts/welcome.md @@ -29,6 +29,8 @@ author: Rattatwinko - [Support the Project ❤️](#support-the-project) - [Acknowledgments 🙏](#acknowledgments) - [Folder Emojis 🇦🇹](#folder-emoji-technical-note) +- [API 🏗️](#api) +- [ToT, and Todo](#train-of-thought-for-this-project-and-todo) --- @@ -387,6 +389,35 @@ Thanks for choosing MarkdownBlog! If you find it useful, please: --- +## API + +MarkdownBlog provides a built-in RESTful API to serve post data, handle live updates, and support integrations. The API is used internally by the frontend to fetch posts, stream updates (for live reloads), and manage features like emoji and pin assignments. You can also interact with these endpoints to build custom tools or integrations. + +Key API endpoints include: + +- `/api/posts`: Fetch all blog posts as JSON. +- `/api/posts/[slug]`: Fetch a single post by its slug. +- `/api/posts/stream`: Server-Sent Events (SSE) endpoint for real-time updates when posts change. +- `/api/posts/webhook`: Webhook endpoint to notify the app of external changes (e.g., from CI/CD or scripts). + +All API routes are implemented using Next.js API routes and are available out of the box. For more details, check the code in the `src/app/api/posts/` directory. + + +-- + +## Train of Thought for this Project and Todo + +Ok, so when I originally did this (about a week ago speaking from 24.6.25), I really had no thought of this being a huge thing. But reallistically speaking, this Repository is 2MiB large. And its bloated. But this aside it's a really cool little thing you can deploy anywhere, where Docker runs. + +If you have seen this is not very mindfull of browser resources tho. + +|IS DONE|Task| +|-------|----| +|partly / working on it|_Rewrite_ the Markdown Parser in **Rust** ; This works for local Builds but in Docker does not work due to permission error| + +--- + + I looooooove Markdown diff --git a/src/app/api/posts/[slug]/route.ts b/src/app/api/posts/[slug]/route.ts index c6e2273..16c7f21 100644 --- a/src/app/api/posts/[slug]/route.ts +++ b/src/app/api/posts/[slug]/route.ts @@ -9,6 +9,7 @@ import DOMPurify from 'dompurify'; import { JSDOM } from 'jsdom'; import hljs from 'highlight.js'; import { getPostsDirectory } from '@/lib/postsDirectory'; +import { spawnSync } from 'child_process'; const postsDirectory = getPostsDirectory(); @@ -52,6 +53,29 @@ marked.setOptions({ async function getPostBySlug(slug: string) { const realSlug = slug.replace(/\.md$/, ''); const fullPath = path.join(postsDirectory, `${realSlug}.md`); + let rustResult; + try { + // Try Rust backend first + rustResult = spawnSync( + path.resolve(process.cwd(), 'markdown_backend/target/release/markdown_backend'), + ['show', realSlug], + { encoding: 'utf-8' } + ); + if (rustResult.status === 0 && rustResult.stdout) { + // Expect Rust to output a JSON object matching the post shape + const post = JSON.parse(rustResult.stdout); + // Map snake_case to camelCase for frontend compatibility + post.createdAt = post.created_at; + delete post.created_at; + return post; + } else { + console.error('[Rust parser error]', rustResult.stderr || rustResult.error); + } + } catch (e) { + console.error('[Rust parser exception]', e); + } + + // Fallback to TypeScript parser const fileContents = fs.readFileSync(fullPath, 'utf8'); const { data, content } = matter(fileContents); const createdAt = getFileCreationDate(fullPath); @@ -60,12 +84,8 @@ async function getPostBySlug(slug: string) { try { // Convert markdown to HTML const rawHtml = marked.parse(content); - - // Create a DOM window for DOMPurify const window = new JSDOM('').window; const purify = DOMPurify(window); - - // Sanitize the HTML processedContent = purify.sanitize(rawHtml as string, { ALLOWED_TAGS: [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', @@ -80,11 +100,10 @@ async function getPostBySlug(slug: string) { 'src', 'alt', 'title', 'width', 'height', 'frameborder', 'allowfullscreen' ], - ALLOWED_URI_REGEXP: /^(?:(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp):|[^a-z]|[a-z+.\-]+(?:[^a-z+.\-:]|$))/i + ALLOWED_URI_REGEXP: /^(?:(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp):|[^a-z]|[a-z+.-]+(?:[^a-z+.-:]|$))/i }); } catch (err) { console.error(`Error processing markdown for slug "${realSlug}":`, err); - // Return a more informative error message in the content processedContent = `

Error processing markdown content. Please check the console for details.

${err instanceof Error ? err.message : 'Unknown error'}
@@ -107,11 +126,40 @@ export async function GET( request: Request, { params }: { params: { slug: string[] | string } } ) { + let parser = 'typescript'; + let rustError = ''; try { const slugArr = Array.isArray(params.slug) ? params.slug : [params.slug]; const slugPath = slugArr.join('/'); - const post = await getPostBySlug(slugPath); - return NextResponse.json(post); + let post; + try { + const rustResult = spawnSync( + path.resolve(process.cwd(), 'markdown_backend/target/release/markdown_backend'), + ['show', slugPath], + { encoding: 'utf-8' } + ); + if (rustResult.status === 0 && rustResult.stdout) { + post = JSON.parse(rustResult.stdout); + post.createdAt = post.created_at; + delete post.created_at; + parser = 'rust'; + } else { + rustError = rustResult.stderr || rustResult.error?.toString() || 'Unknown error'; + console.error('[Rust parser error]', rustError); + } + } catch (e) { + rustError = e instanceof Error ? e.message : String(e); + console.error('[Rust parser exception]', rustError); + } + if (!post) { + post = await getPostBySlug(slugPath); + } + const response = NextResponse.json(post); + response.headers.set('X-Parser', parser); + if (parser !== 'rust' && rustError) { + response.headers.set('X-Rust-Parser-Error', rustError); + } + return response; } catch (error) { console.error('Error loading post:', error); return NextResponse.json( diff --git a/src/app/page.tsx b/src/app/page.tsx index 4f32407..fc5a5e3 100644 --- a/src/app/page.tsx +++ b/src/app/page.tsx @@ -33,6 +33,7 @@ export default function Home() { const [search, setSearch] = useState(''); const [isLoading, setIsLoading] = useState(false); const [lastUpdate, setLastUpdate] = useState(null); + const [error, setError] = useState(null); // Get blog owner from env const blogOwner = process.env.NEXT_PUBLIC_BLOG_OWNER || 'Anonymous'; @@ -99,12 +100,17 @@ export default function Home() { const loadTree = async () => { try { setIsLoading(true); + setError(null); const response = await fetch('/api/posts'); + if (!response.ok) { + throw new Error(`API error: ${response.status}`); + } const data = await response.json(); setTree(data); setLastUpdate(new Date()); } catch (error) { console.error('Fehler beim Laden der Beiträge:', error); + setError(error instanceof Error ? error.message : String(error)); } finally { setIsLoading(false); } @@ -168,6 +174,12 @@ export default function Home() { return (
+ {/* Error display */} + {error && ( +
+ Fehler: {error} +
+ )} {/* Mobile-first header section */}

{blogOwner}'s Blog

diff --git a/src/app/posts/[...slug]/page.tsx b/src/app/posts/[...slug]/page.tsx index 1bc16f0..5f52973 100644 --- a/src/app/posts/[...slug]/page.tsx +++ b/src/app/posts/[...slug]/page.tsx @@ -14,6 +14,13 @@ interface Post { createdAt: string; } +// Runtime statistics for parser usage +const parserStats = { + rust: 0, + typescript: 0, + lastRustError: '', +}; + export default function PostPage({ params }: { params: { slug: string[] } }) { const [post, setPost] = useState(null); // Modal state for zoomed image @@ -648,6 +655,20 @@ export default function PostPage({ params }: { params: { slug: string[] } }) { const loadPost = async () => { try { const response = await fetch(`/api/posts/${encodeURIComponent(slugPath)}`); + const parser = response.headers.get('X-Parser'); + const rustError = response.headers.get('X-Rust-Parser-Error'); + if (parser === 'rust') { + parserStats.rust++; + console.log('%c[Rust Parser] Used for this post.', 'color: green; font-weight: bold'); + } else { + parserStats.typescript++; + console.log('%c[TypeScript Parser] Used for this post.', 'color: orange; font-weight: bold'); + if (rustError) { + parserStats.lastRustError = rustError; + console.warn('[Rust Parser Error]', rustError); + } + } + console.info('[Parser Stats]', parserStats); const data = await response.json(); setPost(data); } catch (error) { diff --git a/src/lib/markdown.ts b/src/lib/markdown.ts index 3827190..4a4a8aa 100644 --- a/src/lib/markdown.ts +++ b/src/lib/markdown.ts @@ -221,8 +221,25 @@ export function watchPosts(callback: () => void) { onChangeCallback = callback; watcher = chokidar.watch(postsDirectory, { - ignored: /(^|[\/\\])\../, // ignore dotfiles - persistent: true + ignored: [ + /(^|[\/\\])\../, // ignore dotfiles + /node_modules/, + /\.git/, + /\.next/, + /\.cache/, + /\.DS_Store/, + /Thumbs\.db/, + /\.tmp$/, + /\.temp$/ + ], + persistent: true, + ignoreInitial: true, // Don't trigger on initial scan + awaitWriteFinish: { + stabilityThreshold: 1000, // Wait 1 second after file changes + pollInterval: 100 // Check every 100ms + }, + usePolling: false, // Use native file system events when possible + interval: 1000 // Fallback polling interval (only used if native events fail) }); watcher @@ -235,20 +252,6 @@ function handleFileChange() { if (onChangeCallback) { onChangeCallback(); } - - // Also notify via webhook if available - try { - fetch('/api/posts/webhook', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ type: 'update', timestamp: new Date().toISOString() }) - }).catch(error => { - // Webhook is optional, so we don't need to handle this as a critical error - console.debug('Webhook notification failed:', error); - }); - } catch (error) { - // Ignore webhook errors - } } export function stopWatching() { From fbc41654e0d4d946519582ec6304ec044fb5121a Mon Sep 17 00:00:00 2001 From: ZockerKatze Date: Tue, 24 Jun 2025 10:47:44 +0200 Subject: [PATCH 2/7] works --- Dockerfile | 13 +++++++++++-- docker.sh | 12 ++++++++++++ markdown_backend/src/markdown.rs | 11 +++++++++-- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index cda0298..75f8d5d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,6 +5,7 @@ COPY ./markdown_backend ./markdown_backend WORKDIR /build/markdown_backend RUN rustup target add x86_64-unknown-linux-musl RUN apt-get update && apt-get install -y musl-tools +# Build with musl target for static linking RUN cargo build --release --target x86_64-unknown-linux-musl FROM node:20 @@ -16,12 +17,20 @@ COPY package*.json ./ RUN npm install COPY . . +# Ensure posts directory exists and has correct permissions +RUN mkdir -p /app/posts +COPY posts/* /app/posts/ +RUN chmod -R 755 /app/posts -# Copy the Rust binary from the build stage -COPY --from=rust-build /build/markdown_backend/target/release/markdown_backend ./markdown_backend/target/release/markdown_backend +# Copy the statically linked Rust binary from the build stage +COPY --from=rust-build /build/markdown_backend/target/x86_64-unknown-linux-musl/release/markdown_backend ./markdown_backend/target/release/markdown_backend +RUN chmod +x ./markdown_backend/target/release/markdown_backend RUN npm run build +# Create and set permissions for the docker volume mount point +RUN mkdir -p /app/docker && chmod 777 /app/docker + VOLUME ["/app/docker"] EXPOSE 3000 diff --git a/docker.sh b/docker.sh index 4ab52b0..a75ae64 100755 --- a/docker.sh +++ b/docker.sh @@ -2,6 +2,12 @@ set -e +# Check if Docker daemon is running +if ! docker info >/dev/null 2>&1; then + echo "Error: Docker daemon is not running" + exit 1 +fi + IMAGE_NAME="markdownblog" CONTAINER_NAME="markdownblog" VOLUME_NAME="markdownblog-posts" @@ -32,5 +38,11 @@ docker run -d \ echo "Copying built-in posts to Docker volume if empty..." docker exec $CONTAINER_NAME sh -c 'if [ -d /app/posts ] && [ -d /app/docker ] && [ "$(ls -A /app/docker)" = "" ]; then cp -r /app/posts/* /app/docker/; fi' +# Check if container is running +if ! docker ps | grep -q $CONTAINER_NAME; then + echo "Error: Container failed to start. Check logs with: docker logs $CONTAINER_NAME" + exit 1 +fi + echo "Deployment complete!" echo "App should be available at http://localhost:$PORT" diff --git a/markdown_backend/src/markdown.rs b/markdown_backend/src/markdown.rs index f5c2540..1fa0bb2 100644 --- a/markdown_backend/src/markdown.rs +++ b/markdown_backend/src/markdown.rs @@ -55,8 +55,15 @@ fn get_posts_directory() -> PathBuf { fn get_file_creation_date(path: &Path) -> std::io::Result> { let metadata = fs::metadata(path)?; - let created = metadata.created()?; - Ok(DateTime::::from(created)) + // Try to get creation time, fall back to modification time if not available + match metadata.created() { + Ok(created) => Ok(DateTime::::from(created)), + Err(_) => { + // Fall back to modification time if creation time is not available + let modified = metadata.modified()?; + Ok(DateTime::::from(modified)) + } + } } fn process_anchor_links(content: &str) -> String { From b0b66258101ae660a0e208547999c67fad444f66 Mon Sep 17 00:00:00 2001 From: rattatwinko Date: Wed, 25 Jun 2025 14:56:54 +0200 Subject: [PATCH 3/7] folders work --- markdown_backend/Cargo.toml | 3 +- markdown_backend/src/markdown.rs | 91 ++++++++++++++++++++++---------- 2 files changed, 65 insertions(+), 29 deletions(-) diff --git a/markdown_backend/Cargo.toml b/markdown_backend/Cargo.toml index 31e5e68..1b38d05 100644 --- a/markdown_backend/Cargo.toml +++ b/markdown_backend/Cargo.toml @@ -15,4 +15,5 @@ notify = "6.1" syntect = { version = "5.1", features = ["default"] } regex = "1.10" clap = { version = "4.4", features = ["derive"] } -serde_json = "1.0" \ No newline at end of file +serde_json = "1.0" +html-escape = "0.2.13" diff --git a/markdown_backend/src/markdown.rs b/markdown_backend/src/markdown.rs index 1fa0bb2..d8a3f9e 100644 --- a/markdown_backend/src/markdown.rs +++ b/markdown_backend/src/markdown.rs @@ -4,7 +4,7 @@ use std::fs; use std::path::{Path, PathBuf}; use chrono::{DateTime, Utc}; use serde::Deserialize; -use pulldown_cmark::{Parser, Options, html}; +use pulldown_cmark::{Parser, Options, html, Event, Tag, CowStr}; use gray_matter::engine::YAML; use gray_matter::Matter; use ammonia::clean; @@ -77,29 +77,6 @@ fn process_anchor_links(content: &str) -> String { }).to_string() } -fn highlight_code_blocks(html: &str) -> String { - let ss = SyntaxSet::load_defaults_newlines(); - let ts = ThemeSet::load_defaults(); - let theme = &ts.themes["base16-ocean.dark"]; - - // Simple code block detection and highlighting - // In a real implementation, you'd want to parse the HTML and handle code blocks properly - let re = regex::Regex::new(r#"
([^<]+)
"#).unwrap(); - re.replace_all(html, |caps: ®ex::Captures| { - let lang = &caps[1]; - let code = &caps[2]; - - if let Some(syntax) = ss.find_syntax_by_token(lang) { - match highlighted_html_for_string(code, &ss, syntax, theme) { - Ok(highlighted) => highlighted, - Err(_) => caps[0].to_string(), - } - } else { - caps[0].to_string() - } - }).to_string() -} - pub fn get_post_by_slug(slug: &str) -> Result> { let posts_dir = get_posts_directory(); let file_path = posts_dir.join(format!("{}.md", slug)); @@ -124,12 +101,70 @@ pub fn get_post_by_slug(slug: &str) -> Result> let created_at = get_file_creation_date(&file_path)?; let processed_markdown = process_anchor_links(&result.content); - let mut html_output = String::new(); let parser = Parser::new_ext(&processed_markdown, Options::all()); - html::push_html(&mut html_output, parser); + let mut html_output = String::new(); + let mut heading_text = String::new(); + let mut in_heading = false; + let mut heading_level = 0; + let mut in_code_block = false; + let mut code_block_lang = String::new(); + let mut code_block_content = String::new(); + let mut events = Vec::new(); + let ss = SyntaxSet::load_defaults_newlines(); + let ts = ThemeSet::load_defaults(); + let theme = &ts.themes["base16-ocean.dark"]; + for event in parser { + match &event { + Event::Start(Tag::Heading(level, _, _)) => { + in_heading = true; + heading_level = *level as usize; + heading_text.clear(); + }, + Event::End(Tag::Heading(_, _, _)) => { + in_heading = false; + let id = slugify(&heading_text); + events.push(Event::Html(CowStr::Boxed(format!("", lvl=heading_level, id=id).into_boxed_str()))); + events.push(Event::Text(CowStr::Boxed(heading_text.clone().into_boxed_str()))); + events.push(Event::Html(CowStr::Boxed(format!("", lvl=heading_level).into_boxed_str()))); + }, + Event::Text(text) if in_heading => { + heading_text.push_str(text); + }, + Event::Start(Tag::CodeBlock(kind)) => { + in_code_block = true; + code_block_content.clear(); + code_block_lang = match kind { + pulldown_cmark::CodeBlockKind::Fenced(lang) => lang.to_string(), + pulldown_cmark::CodeBlockKind::Indented => String::new(), + }; + }, + Event::End(Tag::CodeBlock(_)) => { + in_code_block = false; + // Highlight code block + let highlighted = if !code_block_lang.is_empty() { + if let Some(syntax) = ss.find_syntax_by_token(&code_block_lang) { + highlighted_html_for_string(&code_block_content, &ss, syntax, theme).unwrap_or_else(|_| format!("
{}
", html_escape::encode_text(&code_block_content))) + } else { + format!("
{}
", html_escape::encode_text(&code_block_content)) + } + } else { + // No language specified + format!("
{}
", html_escape::encode_text(&code_block_content)) + }; + events.push(Event::Html(CowStr::Boxed(highlighted.into_boxed_str()))); + }, + Event::Text(text) if in_code_block => { + code_block_content.push_str(text); + }, + _ if !in_heading && !in_code_block => { + events.push(event); + }, + _ => {}, + } + } + html::push_html(&mut html_output, events.into_iter()); - let highlighted_html = highlight_code_blocks(&html_output); - let sanitized_html = clean(&highlighted_html); + let sanitized_html = clean(&html_output); Ok(Post { slug: slug.to_string(), From 0878b7dcec511227cc299a68c9fb48b0a0cb3c8c Mon Sep 17 00:00:00 2001 From: rattatwinko Date: Wed, 25 Jun 2025 17:30:45 +0200 Subject: [PATCH 4/7] tags working now. rust parser works pretty good. next is cache --- markdown_backend/Cargo.toml | 1 + markdown_backend/src/markdown.rs | 40 ++++++++++++++++++++++++-- src/app/posts/[...slug]/page.tsx | 48 ++++++++++++++++++++++++-------- 3 files changed, 75 insertions(+), 14 deletions(-) diff --git a/markdown_backend/Cargo.toml b/markdown_backend/Cargo.toml index 1b38d05..5a55a40 100644 --- a/markdown_backend/Cargo.toml +++ b/markdown_backend/Cargo.toml @@ -17,3 +17,4 @@ regex = "1.10" clap = { version = "4.4", features = ["derive"] } serde_json = "1.0" html-escape = "0.2.13" +once_cell = "1.18" diff --git a/markdown_backend/src/markdown.rs b/markdown_backend/src/markdown.rs index d8a3f9e..15bfcc5 100644 --- a/markdown_backend/src/markdown.rs +++ b/markdown_backend/src/markdown.rs @@ -15,6 +15,7 @@ use std::time::Duration; use syntect::highlighting::{ThemeSet, Style}; use syntect::parsing::SyntaxSet; use syntect::html::{highlighted_html_for_string, IncludeBackground}; +use once_cell::sync::Lazy; #[derive(Debug, Deserialize, Clone, serde::Serialize)] pub struct PostFrontmatter { @@ -77,6 +78,39 @@ fn process_anchor_links(content: &str) -> String { }).to_string() } +// Helper function to strip emojis from a string +fn strip_emojis(s: &str) -> String { + // Remove all characters in the Emoji Unicode ranges + // This is a simple approach and may not cover all emojis, but works for most cases + // Unicode emoji ranges: https://unicode.org/Public/emoji/15.0/emoji-data.txt + s.chars() + .filter(|c| { + let c = *c as u32; + // Basic Emoji ranges + !( (c >= 0x1F600 && c <= 0x1F64F) // Emoticons + || (c >= 0x1F300 && c <= 0x1F5FF) // Misc Symbols and Pictographs + || (c >= 0x1F680 && c <= 0x1F6FF) // Transport and Map + || (c >= 0x2600 && c <= 0x26FF) // Misc symbols + || (c >= 0x2700 && c <= 0x27BF) // Dingbats + || (c >= 0x1F900 && c <= 0x1F9FF) // Supplemental Symbols and Pictographs + || (c >= 0x1FA70 && c <= 0x1FAFF) // Symbols and Pictographs Extended-A + || (c >= 0x1F1E6 && c <= 0x1F1FF) // Regional Indicator Symbols + ) + }) + .collect() +} + +static AMMONIA: Lazy> = Lazy::new(|| { + let mut builder = ammonia::Builder::default(); + builder.add_tag_attributes("h1", &["id"]); + builder.add_tag_attributes("h2", &["id"]); + builder.add_tag_attributes("h3", &["id"]); + builder.add_tag_attributes("h4", &["id"]); + builder.add_tag_attributes("h5", &["id"]); + builder.add_tag_attributes("h6", &["id"]); + builder +}); + pub fn get_post_by_slug(slug: &str) -> Result> { let posts_dir = get_posts_directory(); let file_path = posts_dir.join(format!("{}.md", slug)); @@ -122,7 +156,9 @@ pub fn get_post_by_slug(slug: &str) -> Result> }, Event::End(Tag::Heading(_, _, _)) => { in_heading = false; - let id = slugify(&heading_text); + // Strip emojis before slugifying for the id + let heading_no_emoji = strip_emojis(&heading_text); + let id = slugify(&heading_no_emoji); events.push(Event::Html(CowStr::Boxed(format!("", lvl=heading_level, id=id).into_boxed_str()))); events.push(Event::Text(CowStr::Boxed(heading_text.clone().into_boxed_str()))); events.push(Event::Html(CowStr::Boxed(format!("", lvl=heading_level).into_boxed_str()))); @@ -164,7 +200,7 @@ pub fn get_post_by_slug(slug: &str) -> Result> } html::push_html(&mut html_output, events.into_iter()); - let sanitized_html = clean(&html_output); + let sanitized_html = AMMONIA.clean(&html_output).to_string(); Ok(Post { slug: slug.to_string(), diff --git a/src/app/posts/[...slug]/page.tsx b/src/app/posts/[...slug]/page.tsx index 5f52973..7db7cdb 100644 --- a/src/app/posts/[...slug]/page.tsx +++ b/src/app/posts/[...slug]/page.tsx @@ -21,6 +21,16 @@ const parserStats = { lastRustError: '', }; +// Add a slugify function that matches Rust's slug::slugify +function slugify(text: string): string { + return text + .toLowerCase() + .normalize('NFKD') + .replace(/[\u0300-\u036F]/g, '') // Remove diacritics + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, ''); +} + export default function PostPage({ params }: { params: { slug: string[] } }) { const [post, setPost] = useState(null); // Modal state for zoomed image @@ -315,35 +325,49 @@ export default function PostPage({ params }: { params: { slug: string[] } }) { } } - // Find the element, but only consider visible ones - const allElements = document.querySelectorAll(`#${id}`); + // Try to find the element by the raw ID first + let allElements = document.querySelectorAll(`#${id}`); let element: HTMLElement | null = null; - - // Check if we're on desktop or mobile - const isDesktop = window.innerWidth >= 640; - for (const el of Array.from(allElements)) { const htmlEl = el as HTMLElement; - // Check if the element is visible (not hidden by CSS) const rect = htmlEl.getBoundingClientRect(); const isVisible = rect.width > 0 && rect.height > 0; - if (isVisible) { element = htmlEl; break; } } - if (element) { - console.log('Found target element:', element.textContent?.substring(0, 50)); + console.log('Found target element (raw id):', element.textContent?.substring(0, 50)); scrollToElement(element); - } else if (retryCount < 5) { + return; + } + // If not found, try slugified version + const slugId = slugify(id); + if (slugId !== id) { + allElements = document.querySelectorAll(`#${slugId}`); + for (const el of Array.from(allElements)) { + const htmlEl = el as HTMLElement; + const rect = htmlEl.getBoundingClientRect(); + const isVisible = rect.width > 0 && rect.height > 0; + if (isVisible) { + element = htmlEl; + break; + } + } + if (element) { + console.log('Found target element (slugified id):', element.textContent?.substring(0, 50)); + scrollToElement(element); + return; + } + } + if (retryCount < 5) { console.log(`Element not found for anchor: ${id}, retrying... (${retryCount + 1}/5)`); setTimeout(() => { findAndScrollToElement(id, retryCount + 1); }, 100); } else { - console.warn(`Element with id "${id}" not found after retries`); + console.warn(`Element with id "${id}" (or slugified "${slugId}") not found after retries`); } }; From 2d373da4c5f452bede2fbe6fae651c5a6acecd4c Mon Sep 17 00:00:00 2001 From: rattatwinko Date: Wed, 25 Jun 2025 18:03:44 +0200 Subject: [PATCH 5/7] stat page and cache working --- markdown_backend/src/main.rs | 5 ++ markdown_backend/src/markdown.rs | 67 ++++++++++++++++++-- src/app/admin/manage/page.tsx | 10 +++ src/app/admin/manage/rust-status.tsx | 74 ++++++++++++++++++++++ src/app/admin/manage/rust-status/page.tsx | 75 +++++++++++++++++++++++ src/app/api/admin/posts/route.ts | 22 +++++++ 6 files changed, 248 insertions(+), 5 deletions(-) create mode 100644 src/app/admin/manage/rust-status.tsx create mode 100644 src/app/admin/manage/rust-status/page.tsx diff --git a/markdown_backend/src/main.rs b/markdown_backend/src/main.rs index 6e7b80c..8dc14fc 100644 --- a/markdown_backend/src/main.rs +++ b/markdown_backend/src/main.rs @@ -25,6 +25,8 @@ enum Commands { }, /// Watch for changes in the posts directory Watch, + /// Show Rust parser statistics + Rsparseinfo, } fn main() { @@ -65,5 +67,8 @@ fn main() { std::thread::sleep(std::time::Duration::from_secs(60)); } } + Commands::Rsparseinfo => { + println!("{}", markdown::rsparseinfo()); + } } } \ No newline at end of file diff --git a/markdown_backend/src/markdown.rs b/markdown_backend/src/markdown.rs index 15bfcc5..6e3a529 100644 --- a/markdown_backend/src/markdown.rs +++ b/markdown_backend/src/markdown.rs @@ -11,11 +11,14 @@ use ammonia::clean; use slug::slugify; use notify::{RecursiveMode, RecommendedWatcher, Watcher, Config}; use std::sync::mpsc::channel; -use std::time::Duration; +use std::time::{Duration, Instant}; use syntect::highlighting::{ThemeSet, Style}; use syntect::parsing::SyntaxSet; use syntect::html::{highlighted_html_for_string, IncludeBackground}; use once_cell::sync::Lazy; +use std::collections::HashMap; +use std::sync::RwLock; +use serde_json; #[derive(Debug, Deserialize, Clone, serde::Serialize)] pub struct PostFrontmatter { @@ -37,6 +40,19 @@ pub struct Post { pub author: String, } +#[derive(Debug, Clone, serde::Serialize, Default)] +pub struct PostStats { + pub slug: String, + pub cache_hits: u64, + pub cache_misses: u64, + pub last_interpret_time_ms: u128, + pub last_compile_time_ms: u128, +} + +static POST_CACHE: Lazy>> = Lazy::new(|| RwLock::new(HashMap::new())); +static ALL_POSTS_CACHE: Lazy>>> = Lazy::new(|| RwLock::new(None)); +static POST_STATS: Lazy>> = Lazy::new(|| RwLock::new(HashMap::new())); + fn get_posts_directory() -> PathBuf { let candidates = [ "./posts", @@ -111,7 +127,27 @@ static AMMONIA: Lazy> = Lazy::new(|| { builder }); +pub fn rsparseinfo() -> String { + let stats = POST_STATS.read().unwrap(); + serde_json::to_string(&stats.values().collect::>()).unwrap_or_else(|_| "[]".to_string()) +} + pub fn get_post_by_slug(slug: &str) -> Result> { + let start = Instant::now(); + let mut stats = POST_STATS.write().unwrap(); + let entry = stats.entry(slug.to_string()).or_insert_with(|| PostStats { + slug: slug.to_string(), + ..Default::default() + }); + // Try cache first + if let Some(post) = POST_CACHE.read().unwrap().get(slug).cloned() { + entry.cache_hits += 1; + entry.last_interpret_time_ms = 0; + entry.last_compile_time_ms = 0; + return Ok(post); + } + entry.cache_misses += 1; + drop(stats); // Release lock before heavy work let posts_dir = get_posts_directory(); let file_path = posts_dir.join(format!("{}.md", slug)); let file_content = fs::read_to_string(&file_path)?; @@ -202,7 +238,9 @@ pub fn get_post_by_slug(slug: &str) -> Result> let sanitized_html = AMMONIA.clean(&html_output).to_string(); - Ok(Post { + let interpret_time = start.elapsed(); + let compile_start = Instant::now(); + let post = Post { slug: slug.to_string(), title: front.title, date: front.date, @@ -211,10 +249,26 @@ pub fn get_post_by_slug(slug: &str) -> Result> content: sanitized_html, created_at: created_at.to_rfc3339(), author: std::env::var("BLOG_OWNER").unwrap_or_else(|_| "Anonymous".to_string()), - }) + }; + let compile_time = compile_start.elapsed(); + // Insert into cache + POST_CACHE.write().unwrap().insert(slug.to_string(), post.clone()); + // Update stats + let mut stats = POST_STATS.write().unwrap(); + let entry = stats.entry(slug.to_string()).or_insert_with(|| PostStats { + slug: slug.to_string(), + ..Default::default() + }); + entry.last_interpret_time_ms = interpret_time.as_millis(); + entry.last_compile_time_ms = compile_time.as_millis(); + Ok(post) } pub fn get_all_posts() -> Result, Box> { + // Try cache first + if let Some(posts) = ALL_POSTS_CACHE.read().unwrap().clone() { + return Ok(posts); + } let posts_dir = get_posts_directory(); let mut posts = Vec::new(); for entry in fs::read_dir(posts_dir)? { @@ -228,6 +282,8 @@ pub fn get_all_posts() -> Result, Box> { } } posts.sort_by(|a, b| b.created_at.cmp(&a.created_at)); + // Cache the result + *ALL_POSTS_CACHE.write().unwrap() = Some(posts.clone()); Ok(posts) } @@ -240,11 +296,13 @@ pub fn watch_posts(on_change: F) -> notify::Result { + // Invalidate caches on any change + POST_CACHE.write().unwrap().clear(); + *ALL_POSTS_CACHE.write().unwrap() = None; on_change(); }, Err(e) => { @@ -254,6 +312,5 @@ pub fn watch_posts(on_change: F) -> notify::Result + + + + + Rust Parser Status + + +
+ {loading && ( +
+
+
Loading stats...
)} + {error && ( +
{error}
+ )} + {!loading && !error && ( + <> + {/* Summary Cards */} +
+
+ {totalHits} + Total Cache Hits +
+
+ {totalMisses} + Total Cache Misses +
+
+ {avgInterpret} ms + Avg Interpret Time +
+
+ {avgCompile} ms + Avg Compile Time +
+
+ + {/* Bar Chart */} +
+ +
+ + {/* Raw Data Table */} +
+ + + + + + + + + + + + {stats.length === 0 ? ( + + ) : ( + stats.map(stat => ( + + + + + + + + )) + )} + +
SlugCache HitsCache MissesLast Interpret Time (ms)Last Compile Time (ms)
No stats available.
{stat.slug}{stat.cache_hits}{stat.cache_misses}{stat.last_interpret_time_ms}{stat.last_compile_time_ms}
+
+ + )}
); } \ No newline at end of file