diff --git a/markdown_backend/Cargo.toml b/markdown_backend/Cargo.toml index 31e5e68..1b38d05 100644 --- a/markdown_backend/Cargo.toml +++ b/markdown_backend/Cargo.toml @@ -15,4 +15,5 @@ notify = "6.1" syntect = { version = "5.1", features = ["default"] } regex = "1.10" clap = { version = "4.4", features = ["derive"] } -serde_json = "1.0" \ No newline at end of file +serde_json = "1.0" +html-escape = "0.2.13" diff --git a/markdown_backend/src/markdown.rs b/markdown_backend/src/markdown.rs index 1fa0bb2..d8a3f9e 100644 --- a/markdown_backend/src/markdown.rs +++ b/markdown_backend/src/markdown.rs @@ -4,7 +4,7 @@ use std::fs; use std::path::{Path, PathBuf}; use chrono::{DateTime, Utc}; use serde::Deserialize; -use pulldown_cmark::{Parser, Options, html}; +use pulldown_cmark::{Parser, Options, html, Event, Tag, CowStr}; use gray_matter::engine::YAML; use gray_matter::Matter; use ammonia::clean; @@ -77,29 +77,6 @@ fn process_anchor_links(content: &str) -> String { }).to_string() } -fn highlight_code_blocks(html: &str) -> String { - let ss = SyntaxSet::load_defaults_newlines(); - let ts = ThemeSet::load_defaults(); - let theme = &ts.themes["base16-ocean.dark"]; - - // Simple code block detection and highlighting - // In a real implementation, you'd want to parse the HTML and handle code blocks properly - let re = regex::Regex::new(r#"
([^<]+)
"#).unwrap(); - re.replace_all(html, |caps: ®ex::Captures| { - let lang = &caps[1]; - let code = &caps[2]; - - if let Some(syntax) = ss.find_syntax_by_token(lang) { - match highlighted_html_for_string(code, &ss, syntax, theme) { - Ok(highlighted) => highlighted, - Err(_) => caps[0].to_string(), - } - } else { - caps[0].to_string() - } - }).to_string() -} - pub fn get_post_by_slug(slug: &str) -> Result> { let posts_dir = get_posts_directory(); let file_path = posts_dir.join(format!("{}.md", slug)); @@ -124,12 +101,70 @@ pub fn get_post_by_slug(slug: &str) -> Result> let created_at = get_file_creation_date(&file_path)?; let processed_markdown = process_anchor_links(&result.content); - let mut html_output = String::new(); let parser = Parser::new_ext(&processed_markdown, Options::all()); - html::push_html(&mut html_output, parser); + let mut html_output = String::new(); + let mut heading_text = String::new(); + let mut in_heading = false; + let mut heading_level = 0; + let mut in_code_block = false; + let mut code_block_lang = String::new(); + let mut code_block_content = String::new(); + let mut events = Vec::new(); + let ss = SyntaxSet::load_defaults_newlines(); + let ts = ThemeSet::load_defaults(); + let theme = &ts.themes["base16-ocean.dark"]; + for event in parser { + match &event { + Event::Start(Tag::Heading(level, _, _)) => { + in_heading = true; + heading_level = *level as usize; + heading_text.clear(); + }, + Event::End(Tag::Heading(_, _, _)) => { + in_heading = false; + let id = slugify(&heading_text); + events.push(Event::Html(CowStr::Boxed(format!("", lvl=heading_level, id=id).into_boxed_str()))); + events.push(Event::Text(CowStr::Boxed(heading_text.clone().into_boxed_str()))); + events.push(Event::Html(CowStr::Boxed(format!("", lvl=heading_level).into_boxed_str()))); + }, + Event::Text(text) if in_heading => { + heading_text.push_str(text); + }, + Event::Start(Tag::CodeBlock(kind)) => { + in_code_block = true; + code_block_content.clear(); + code_block_lang = match kind { + pulldown_cmark::CodeBlockKind::Fenced(lang) => lang.to_string(), + pulldown_cmark::CodeBlockKind::Indented => String::new(), + }; + }, + Event::End(Tag::CodeBlock(_)) => { + in_code_block = false; + // Highlight code block + let highlighted = if !code_block_lang.is_empty() { + if let Some(syntax) = ss.find_syntax_by_token(&code_block_lang) { + highlighted_html_for_string(&code_block_content, &ss, syntax, theme).unwrap_or_else(|_| format!("
{}
", html_escape::encode_text(&code_block_content))) + } else { + format!("
{}
", html_escape::encode_text(&code_block_content)) + } + } else { + // No language specified + format!("
{}
", html_escape::encode_text(&code_block_content)) + }; + events.push(Event::Html(CowStr::Boxed(highlighted.into_boxed_str()))); + }, + Event::Text(text) if in_code_block => { + code_block_content.push_str(text); + }, + _ if !in_heading && !in_code_block => { + events.push(event); + }, + _ => {}, + } + } + html::push_html(&mut html_output, events.into_iter()); - let highlighted_html = highlight_code_blocks(&html_output); - let sanitized_html = clean(&highlighted_html); + let sanitized_html = clean(&html_output); Ok(Post { slug: slug.to_string(),