From 0878b7dcec511227cc299a68c9fb48b0a0cb3c8c Mon Sep 17 00:00:00 2001 From: rattatwinko Date: Wed, 25 Jun 2025 17:30:45 +0200 Subject: [PATCH] tags working now. rust parser works pretty good. next is cache --- markdown_backend/Cargo.toml | 1 + markdown_backend/src/markdown.rs | 40 ++++++++++++++++++++++++-- src/app/posts/[...slug]/page.tsx | 48 ++++++++++++++++++++++++-------- 3 files changed, 75 insertions(+), 14 deletions(-) diff --git a/markdown_backend/Cargo.toml b/markdown_backend/Cargo.toml index 1b38d05..5a55a40 100644 --- a/markdown_backend/Cargo.toml +++ b/markdown_backend/Cargo.toml @@ -17,3 +17,4 @@ regex = "1.10" clap = { version = "4.4", features = ["derive"] } serde_json = "1.0" html-escape = "0.2.13" +once_cell = "1.18" diff --git a/markdown_backend/src/markdown.rs b/markdown_backend/src/markdown.rs index d8a3f9e..15bfcc5 100644 --- a/markdown_backend/src/markdown.rs +++ b/markdown_backend/src/markdown.rs @@ -15,6 +15,7 @@ use std::time::Duration; use syntect::highlighting::{ThemeSet, Style}; use syntect::parsing::SyntaxSet; use syntect::html::{highlighted_html_for_string, IncludeBackground}; +use once_cell::sync::Lazy; #[derive(Debug, Deserialize, Clone, serde::Serialize)] pub struct PostFrontmatter { @@ -77,6 +78,39 @@ fn process_anchor_links(content: &str) -> String { }).to_string() } +// Helper function to strip emojis from a string +fn strip_emojis(s: &str) -> String { + // Remove all characters in the Emoji Unicode ranges + // This is a simple approach and may not cover all emojis, but works for most cases + // Unicode emoji ranges: https://unicode.org/Public/emoji/15.0/emoji-data.txt + s.chars() + .filter(|c| { + let c = *c as u32; + // Basic Emoji ranges + !( (c >= 0x1F600 && c <= 0x1F64F) // Emoticons + || (c >= 0x1F300 && c <= 0x1F5FF) // Misc Symbols and Pictographs + || (c >= 0x1F680 && c <= 0x1F6FF) // Transport and Map + || (c >= 0x2600 && c <= 0x26FF) // Misc symbols + || (c >= 0x2700 && c <= 0x27BF) // Dingbats + || (c >= 0x1F900 && c <= 0x1F9FF) // Supplemental Symbols and Pictographs + || (c >= 0x1FA70 && c <= 0x1FAFF) // Symbols and Pictographs Extended-A + || (c >= 0x1F1E6 && c <= 0x1F1FF) // Regional Indicator Symbols + ) + }) + .collect() +} + +static AMMONIA: Lazy> = Lazy::new(|| { + let mut builder = ammonia::Builder::default(); + builder.add_tag_attributes("h1", &["id"]); + builder.add_tag_attributes("h2", &["id"]); + builder.add_tag_attributes("h3", &["id"]); + builder.add_tag_attributes("h4", &["id"]); + builder.add_tag_attributes("h5", &["id"]); + builder.add_tag_attributes("h6", &["id"]); + builder +}); + pub fn get_post_by_slug(slug: &str) -> Result> { let posts_dir = get_posts_directory(); let file_path = posts_dir.join(format!("{}.md", slug)); @@ -122,7 +156,9 @@ pub fn get_post_by_slug(slug: &str) -> Result> }, Event::End(Tag::Heading(_, _, _)) => { in_heading = false; - let id = slugify(&heading_text); + // Strip emojis before slugifying for the id + let heading_no_emoji = strip_emojis(&heading_text); + let id = slugify(&heading_no_emoji); events.push(Event::Html(CowStr::Boxed(format!("", lvl=heading_level, id=id).into_boxed_str()))); events.push(Event::Text(CowStr::Boxed(heading_text.clone().into_boxed_str()))); events.push(Event::Html(CowStr::Boxed(format!("", lvl=heading_level).into_boxed_str()))); @@ -164,7 +200,7 @@ pub fn get_post_by_slug(slug: &str) -> Result> } html::push_html(&mut html_output, events.into_iter()); - let sanitized_html = clean(&html_output); + let sanitized_html = AMMONIA.clean(&html_output).to_string(); Ok(Post { slug: slug.to_string(), diff --git a/src/app/posts/[...slug]/page.tsx b/src/app/posts/[...slug]/page.tsx index 5f52973..7db7cdb 100644 --- a/src/app/posts/[...slug]/page.tsx +++ b/src/app/posts/[...slug]/page.tsx @@ -21,6 +21,16 @@ const parserStats = { lastRustError: '', }; +// Add a slugify function that matches Rust's slug::slugify +function slugify(text: string): string { + return text + .toLowerCase() + .normalize('NFKD') + .replace(/[\u0300-\u036F]/g, '') // Remove diacritics + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, ''); +} + export default function PostPage({ params }: { params: { slug: string[] } }) { const [post, setPost] = useState(null); // Modal state for zoomed image @@ -315,35 +325,49 @@ export default function PostPage({ params }: { params: { slug: string[] } }) { } } - // Find the element, but only consider visible ones - const allElements = document.querySelectorAll(`#${id}`); + // Try to find the element by the raw ID first + let allElements = document.querySelectorAll(`#${id}`); let element: HTMLElement | null = null; - - // Check if we're on desktop or mobile - const isDesktop = window.innerWidth >= 640; - for (const el of Array.from(allElements)) { const htmlEl = el as HTMLElement; - // Check if the element is visible (not hidden by CSS) const rect = htmlEl.getBoundingClientRect(); const isVisible = rect.width > 0 && rect.height > 0; - if (isVisible) { element = htmlEl; break; } } - if (element) { - console.log('Found target element:', element.textContent?.substring(0, 50)); + console.log('Found target element (raw id):', element.textContent?.substring(0, 50)); scrollToElement(element); - } else if (retryCount < 5) { + return; + } + // If not found, try slugified version + const slugId = slugify(id); + if (slugId !== id) { + allElements = document.querySelectorAll(`#${slugId}`); + for (const el of Array.from(allElements)) { + const htmlEl = el as HTMLElement; + const rect = htmlEl.getBoundingClientRect(); + const isVisible = rect.width > 0 && rect.height > 0; + if (isVisible) { + element = htmlEl; + break; + } + } + if (element) { + console.log('Found target element (slugified id):', element.textContent?.substring(0, 50)); + scrollToElement(element); + return; + } + } + if (retryCount < 5) { console.log(`Element not found for anchor: ${id}, retrying... (${retryCount + 1}/5)`); setTimeout(() => { findAndScrollToElement(id, retryCount + 1); }, 100); } else { - console.warn(`Element with id "${id}" not found after retries`); + console.warn(`Element with id "${id}" (or slugified "${slugId}") not found after retries`); } };