From b4b41ebcfda841a992221912af778cb706a73790 Mon Sep 17 00:00:00 2001 From: rattatwinko Date: Sun, 29 Jun 2025 14:44:42 +0200 Subject: [PATCH] did some more improvements on the docker/rust things --- Dockerfile | 4 + markdown_backend/src/markdown.rs | 151 +++++++++++++++++++++++++++---- 2 files changed, 135 insertions(+), 20 deletions(-) diff --git a/Dockerfile b/Dockerfile index 75f8d5d..c0b3099 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,6 +31,10 @@ RUN npm run build # Create and set permissions for the docker volume mount point RUN mkdir -p /app/docker && chmod 777 /app/docker + +# Set environment variable to indicate we're running in Docker +ENV DOCKER_CONTAINER=true + VOLUME ["/app/docker"] EXPOSE 3000 diff --git a/markdown_backend/src/markdown.rs b/markdown_backend/src/markdown.rs index db93bcf..76c5be5 100644 --- a/markdown_backend/src/markdown.rs +++ b/markdown_backend/src/markdown.rs @@ -69,12 +69,30 @@ static ALL_POSTS_CACHE: Lazy>>> = Lazy::new(|| RwLock::n static POST_STATS: Lazy>> = Lazy::new(|| RwLock::new(HashMap::new())); fn get_posts_directory() -> PathBuf { - let candidates = [ - "./posts", - "../posts", - "/posts", - "/docker" - ]; + // Check if we're running in Docker by looking for common Docker environment indicators + let is_docker = std::env::var("DOCKER_CONTAINER").is_ok() + || std::env::var("KUBERNETES_SERVICE_HOST").is_ok() + || std::path::Path::new("/.dockerenv").exists(); + + let candidates = if is_docker { + vec![ + "/app/docker", // Docker volume mount point (highest priority in Docker) + "/app/posts", // Fallback in Docker + "./posts", + "../posts", + "/posts", + "/docker" + ] + } else { + vec![ + "./posts", + "../posts", + "/posts", + "/docker", + "/app/docker" // Lower priority for non-Docker environments + ] + }; + for candidate in candidates.iter() { let path = PathBuf::from(candidate); if path.exists() && path.is_dir() { @@ -85,6 +103,62 @@ fn get_posts_directory() -> PathBuf { PathBuf::from("./posts") } +// Helper function to recursively find all markdown files +fn find_markdown_files(dir: &Path) -> std::io::Result> { + let mut files = Vec::new(); + if dir.is_dir() { + for entry in fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + + if path.is_dir() { + // Recursively scan subdirectories + files.extend(find_markdown_files(&path)?); + } else if path.extension().map(|e| e == "md").unwrap_or(false) { + files.push(path); + } + } + } + Ok(files) +} + +// Helper function to convert a file path to a slug +fn path_to_slug(file_path: &Path, posts_dir: &Path) -> String { + // Get the relative path from posts directory + let relative_path = file_path.strip_prefix(posts_dir).unwrap_or(file_path); + // Remove the .md extension + let without_ext = relative_path.with_extension(""); + // Convert to string and replace path separators with a special separator + // Use "::" as a directory separator to avoid conflicts with hyphens in filenames + without_ext.to_string_lossy() + .replace(std::path::MAIN_SEPARATOR, "::") + .replace("/", "::") + .replace("\\", "::") +} + +// Helper function to convert a slug back to a file path +fn slug_to_path(slug: &str, posts_dir: &Path) -> PathBuf { + // Split by the special directory separator "::" + let parts: Vec<&str> = slug.split("::").collect(); + if parts.len() == 1 { + // Single part, no subdirectory + posts_dir.join(format!("{}.md", parts[0])) + } else { + // Multiple parts, all but the last are directories, last is filename + let mut path = posts_dir.to_path_buf(); + for (i, part) in parts.iter().enumerate() { + if i == parts.len() - 1 { + // Last part is the filename + path = path.join(format!("{}.md", part)); + } else { + // Other parts are directories + path = path.join(part); + } + } + path + } +} + fn get_file_creation_date(path: &Path) -> std::io::Result> { let metadata = fs::metadata(path)?; // Try to get creation time, fall back to modification time if not available @@ -245,8 +319,27 @@ pub fn get_post_by_slug(slug: &str) -> Result> entry.last_cache_status = "miss".to_string(); drop(stats); // Release lock before heavy work let posts_dir = get_posts_directory(); - let file_path = posts_dir.join(format!("{}.md", slug)); + let file_path = slug_to_path(slug, &posts_dir); + + // Add debugging for file path resolution + eprintln!("[Rust Parser] Looking for file: {:?}", file_path); + eprintln!("[Rust Parser] Posts directory: {:?}", posts_dir); + eprintln!("[Rust Parser] Slug: {}", slug); + + if !file_path.exists() { + eprintln!("[Rust Parser] File does not exist: {:?}", file_path); + return Err(format!("File not found: {:?}", file_path).into()); + } + let file_content = fs::read_to_string(&file_path)?; + eprintln!("[Rust Parser] File size: {} bytes", file_content.len()); + + // Check file size limit (10MB) + const MAX_FILE_SIZE: usize = 10 * 1024 * 1024; // 10MB + if file_content.len() > MAX_FILE_SIZE { + eprintln!("[Rust Parser] File too large: {} bytes (max: {} bytes)", file_content.len(), MAX_FILE_SIZE); + return Err(format!("File too large: {} bytes (max: {} bytes)", file_content.len(), MAX_FILE_SIZE).into()); + } let matter = Matter::::new(); let result = matter.parse(&file_content); @@ -255,18 +348,20 @@ pub fn get_post_by_slug(slug: &str) -> Result> match data.deserialize() { Ok(front) => front, Err(e) => { - eprintln!("Failed to deserialize frontmatter for post {}: {}", slug, e); - return Err("Failed to deserialize frontmatter".into()); + eprintln!("[Rust Parser] Failed to deserialize frontmatter for post {}: {}", slug, e); + return Err(format!("Failed to deserialize frontmatter: {}", e).into()); } } } else { - eprintln!("No frontmatter found for post: {}", slug); + eprintln!("[Rust Parser] No frontmatter found for post: {}", slug); return Err("No frontmatter found".into()); }; let created_at = get_file_creation_date(&file_path)?; let processed_markdown = process_anchor_links(&result.content); + eprintln!("[Rust Parser] Processed markdown length: {} characters", processed_markdown.len()); + let parser = Parser::new_ext(&processed_markdown, Options::all()); let mut html_output = String::new(); let mut heading_text = String::new(); @@ -279,7 +374,21 @@ pub fn get_post_by_slug(slug: &str) -> Result> let ss = SyntaxSet::load_defaults_newlines(); // SS 卐 let ts = ThemeSet::load_defaults(); let theme = &ts.themes["base16-ocean.dark"]; + + // Add error handling around the parsing loop + let mut event_count = 0; + let start_parsing = Instant::now(); for event in parser { + event_count += 1; + if event_count % 1000 == 0 { + eprintln!("[Rust Parser] Processed {} events for slug: {}", event_count, slug); + // Check for timeout (30 seconds) + if start_parsing.elapsed().as_secs() > 30 { + eprintln!("[Rust Parser] Timeout reached for slug: {}", slug); + return Err("Parsing timeout - file too large".into()); + } + } + match &event { Event::Start(Tag::Heading(level, _, _)) => { in_heading = true; @@ -332,9 +441,12 @@ pub fn get_post_by_slug(slug: &str) -> Result> _ => {}, } } + eprintln!("[Rust Parser] Total events processed: {} for slug: {}", event_count, slug); html::push_html(&mut html_output, events.into_iter()); + eprintln!("[Rust Parser] HTML output length: {} characters", html_output.len()); let sanitized_html = AMMONIA.clean(&html_output).to_string(); + eprintln!("[Rust Parser] Sanitized HTML length: {} characters", sanitized_html.len()); let interpret_time = start.elapsed(); let compile_start = Instant::now(); @@ -370,19 +482,18 @@ pub fn get_all_posts() -> Result, Box> { return Ok(posts); } let posts_dir = get_posts_directory(); + let markdown_files = find_markdown_files(&posts_dir)?; let mut posts = Vec::new(); - for entry in fs::read_dir(posts_dir)? { - let entry = entry?; - let path = entry.path(); - if path.extension().map(|e| e == "md").unwrap_or(false) { - let file_stem = path.file_stem().unwrap().to_string_lossy(); - if let Ok(post) = get_post_by_slug(&file_stem) { - // Insert each post into the individual post cache as well - POST_CACHE.write().unwrap().insert(file_stem.to_string(), post.clone()); - posts.push(post); - } + + for file_path in markdown_files { + let slug = path_to_slug(&file_path, &posts_dir); + if let Ok(post) = get_post_by_slug(&slug) { + // Insert each post into the individual post cache as well + POST_CACHE.write().unwrap().insert(slug.clone(), post.clone()); + posts.push(post); } } + posts.sort_by(|a, b| b.created_at.cmp(&a.created_at)); // Cache the result *ALL_POSTS_CACHE.write().unwrap() = Some(posts.clone());