tags working now. rust parser works pretty good. next is cache

This commit is contained in:
2025-06-25 17:30:45 +02:00
parent b0b6625810
commit 0878b7dcec
3 changed files with 75 additions and 14 deletions

View File

@@ -15,6 +15,7 @@ use std::time::Duration;
use syntect::highlighting::{ThemeSet, Style};
use syntect::parsing::SyntaxSet;
use syntect::html::{highlighted_html_for_string, IncludeBackground};
use once_cell::sync::Lazy;
#[derive(Debug, Deserialize, Clone, serde::Serialize)]
pub struct PostFrontmatter {
@@ -77,6 +78,39 @@ fn process_anchor_links(content: &str) -> String {
}).to_string()
}
// Helper function to strip emojis from a string
fn strip_emojis(s: &str) -> String {
// Remove all characters in the Emoji Unicode ranges
// This is a simple approach and may not cover all emojis, but works for most cases
// Unicode emoji ranges: https://unicode.org/Public/emoji/15.0/emoji-data.txt
s.chars()
.filter(|c| {
let c = *c as u32;
// Basic Emoji ranges
!( (c >= 0x1F600 && c <= 0x1F64F) // Emoticons
|| (c >= 0x1F300 && c <= 0x1F5FF) // Misc Symbols and Pictographs
|| (c >= 0x1F680 && c <= 0x1F6FF) // Transport and Map
|| (c >= 0x2600 && c <= 0x26FF) // Misc symbols
|| (c >= 0x2700 && c <= 0x27BF) // Dingbats
|| (c >= 0x1F900 && c <= 0x1F9FF) // Supplemental Symbols and Pictographs
|| (c >= 0x1FA70 && c <= 0x1FAFF) // Symbols and Pictographs Extended-A
|| (c >= 0x1F1E6 && c <= 0x1F1FF) // Regional Indicator Symbols
)
})
.collect()
}
static AMMONIA: Lazy<ammonia::Builder<'static>> = Lazy::new(|| {
let mut builder = ammonia::Builder::default();
builder.add_tag_attributes("h1", &["id"]);
builder.add_tag_attributes("h2", &["id"]);
builder.add_tag_attributes("h3", &["id"]);
builder.add_tag_attributes("h4", &["id"]);
builder.add_tag_attributes("h5", &["id"]);
builder.add_tag_attributes("h6", &["id"]);
builder
});
pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>> {
let posts_dir = get_posts_directory();
let file_path = posts_dir.join(format!("{}.md", slug));
@@ -122,7 +156,9 @@ pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>>
},
Event::End(Tag::Heading(_, _, _)) => {
in_heading = false;
let id = slugify(&heading_text);
// Strip emojis before slugifying for the id
let heading_no_emoji = strip_emojis(&heading_text);
let id = slugify(&heading_no_emoji);
events.push(Event::Html(CowStr::Boxed(format!("<h{lvl} id=\"{id}\">", lvl=heading_level, id=id).into_boxed_str())));
events.push(Event::Text(CowStr::Boxed(heading_text.clone().into_boxed_str())));
events.push(Event::Html(CowStr::Boxed(format!("</h{lvl}>", lvl=heading_level).into_boxed_str())));
@@ -164,7 +200,7 @@ pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>>
}
html::push_html(&mut html_output, events.into_iter());
let sanitized_html = clean(&html_output);
let sanitized_html = AMMONIA.clean(&html_output).to_string();
Ok(Post {
slug: slug.to_string(),