cleaned up and added a logging system
This commit is contained in:
@@ -1,41 +1,40 @@
|
||||
//
|
||||
// src/markdown.rs
|
||||
/*
|
||||
// Written by: @rattatwinko
|
||||
//
|
||||
|
||||
This is the Rust Markdown Parser.
|
||||
It supports caching of posts and is
|
||||
|
||||
BLAZINGLY FAST!
|
||||
|
||||
*/
|
||||
|
||||
#[warn(unused_imports)]
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::RwLock;
|
||||
use std::time::Instant;
|
||||
use std::sync::mpsc::channel;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::Deserialize;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use pulldown_cmark::{Parser, Options, html, Event, Tag, CowStr};
|
||||
use gray_matter::engine::YAML;
|
||||
use gray_matter::Matter;
|
||||
use ammonia::clean;
|
||||
use slug::slugify;
|
||||
use notify::{RecursiveMode, RecommendedWatcher, Watcher, Config};
|
||||
use std::sync::mpsc::channel;
|
||||
use std::time::{Duration, Instant};
|
||||
use syntect::highlighting::{ThemeSet, Style};
|
||||
use syntect::highlighting::ThemeSet;
|
||||
use syntect::parsing::SyntaxSet;
|
||||
use syntect::html::{highlighted_html_for_string, IncludeBackground};
|
||||
use syntect::html::highlighted_html_for_string;
|
||||
use once_cell::sync::Lazy;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::RwLock;
|
||||
use serde_json;
|
||||
use sysinfo::{System, Pid, RefreshKind, CpuRefreshKind, ProcessRefreshKind};
|
||||
use serde::Serialize;
|
||||
use sysinfo::{System, RefreshKind, CpuRefreshKind, ProcessRefreshKind};
|
||||
use regex::Regex;
|
||||
|
||||
// Constants
|
||||
const POSTS_CACHE_PATH: &str = "./cache/posts_cache.json";
|
||||
const POST_STATS_PATH: &str = "./cache/post_stats.json";
|
||||
const MAX_FILE_SIZE: usize = 10 * 1024 * 1024; // 10MB
|
||||
const PARSING_TIMEOUT_SECS: u64 = 30;
|
||||
const MAX_LOG_ENTRIES: usize = 1000;
|
||||
|
||||
#[derive(Debug, Deserialize, Clone, serde::Serialize)]
|
||||
// Data structures
|
||||
#[derive(Debug, Deserialize, Clone, Serialize)]
|
||||
pub struct PostFrontmatter {
|
||||
pub title: String,
|
||||
pub date: String,
|
||||
@@ -43,7 +42,7 @@ pub struct PostFrontmatter {
|
||||
pub summary: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Post {
|
||||
pub slug: String,
|
||||
pub title: String,
|
||||
@@ -55,21 +54,17 @@ pub struct Post {
|
||||
pub author: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, Default)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct PostStats {
|
||||
pub slug: String,
|
||||
pub cache_hits: u64,
|
||||
pub cache_misses: u64,
|
||||
pub last_interpret_time_ms: u128,
|
||||
pub last_compile_time_ms: u128,
|
||||
pub last_cpu_usage_percent: f32, // Not f64
|
||||
pub last_cpu_usage_percent: f32,
|
||||
pub last_cache_status: String, // "hit" or "miss"
|
||||
}
|
||||
|
||||
static POST_CACHE: Lazy<RwLock<HashMap<String, Post>>> = Lazy::new(|| RwLock::new(HashMap::new()));
|
||||
static ALL_POSTS_CACHE: Lazy<RwLock<Option<Vec<Post>>>> = Lazy::new(|| RwLock::new(None));
|
||||
static POST_STATS: Lazy<RwLock<HashMap<String, PostStats>>> = Lazy::new(|| RwLock::new(HashMap::new()));
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct HealthReport {
|
||||
pub posts_dir_exists: bool,
|
||||
@@ -83,197 +78,32 @@ pub struct HealthReport {
|
||||
pub errors: Vec<String>,
|
||||
}
|
||||
|
||||
fn get_posts_directory() -> PathBuf {
|
||||
// Check if we're running in Docker by looking for common Docker environment indicators
|
||||
let is_docker = std::env::var("DOCKER_CONTAINER").is_ok()
|
||||
|| std::env::var("KUBERNETES_SERVICE_HOST").is_ok()
|
||||
|| std::path::Path::new("/.dockerenv").exists();
|
||||
|
||||
let candidates = if is_docker {
|
||||
vec![
|
||||
"/app/docker", // Docker volume mount point (highest priority in Docker)
|
||||
"/app/posts", // Fallback in Docker
|
||||
"./posts",
|
||||
"../posts",
|
||||
"/posts",
|
||||
"/docker"
|
||||
]
|
||||
} else {
|
||||
vec![
|
||||
"./posts",
|
||||
"../posts",
|
||||
"/posts",
|
||||
"/docker",
|
||||
"/app/docker" // Lower priority for non-Docker environments
|
||||
]
|
||||
};
|
||||
|
||||
for candidate in candidates.iter() {
|
||||
let path = PathBuf::from(candidate);
|
||||
if path.exists() && path.is_dir() {
|
||||
return path;
|
||||
}
|
||||
}
|
||||
// Fallback: default to ./posts
|
||||
PathBuf::from("./posts")
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct LogEntry {
|
||||
pub timestamp: String,
|
||||
pub level: String, // "info", "warning", "error"
|
||||
pub message: String,
|
||||
pub slug: Option<String>,
|
||||
pub details: Option<String>,
|
||||
}
|
||||
|
||||
// Helper function to recursively find all markdown files
|
||||
fn find_markdown_files(dir: &Path) -> std::io::Result<Vec<PathBuf>> {
|
||||
let mut files = Vec::new();
|
||||
if dir.is_dir() {
|
||||
for entry in fs::read_dir(dir)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
|
||||
if path.is_dir() {
|
||||
// Recursively scan subdirectories
|
||||
files.extend(find_markdown_files(&path)?);
|
||||
} else if path.extension().map(|e| e == "md").unwrap_or(false) {
|
||||
files.push(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
// Helper function to convert a file path to a slug
|
||||
fn path_to_slug(file_path: &Path, posts_dir: &Path) -> String {
|
||||
// Get the relative path from posts directory
|
||||
let relative_path = file_path.strip_prefix(posts_dir).unwrap_or(file_path);
|
||||
// Remove the .md extension
|
||||
let without_ext = relative_path.with_extension("");
|
||||
// Convert to string and replace path separators with a special separator
|
||||
// Use "::" as a directory separator to avoid conflicts with hyphens in filenames
|
||||
without_ext.to_string_lossy()
|
||||
.replace(std::path::MAIN_SEPARATOR, "::")
|
||||
.replace("/", "::")
|
||||
.replace("\\", "::")
|
||||
}
|
||||
|
||||
// Helper function to convert a slug back to a file path
|
||||
fn slug_to_path(slug: &str, posts_dir: &Path) -> PathBuf {
|
||||
// Split by the special directory separator "::"
|
||||
let parts: Vec<&str> = slug.split("::").collect();
|
||||
if parts.len() == 1 {
|
||||
// Single part, no subdirectory
|
||||
posts_dir.join(format!("{}.md", parts[0]))
|
||||
} else {
|
||||
// Multiple parts, all but the last are directories, last is filename
|
||||
let mut path = posts_dir.to_path_buf();
|
||||
for (i, part) in parts.iter().enumerate() {
|
||||
if i == parts.len() - 1 {
|
||||
// Last part is the filename
|
||||
path = path.join(format!("{}.md", part));
|
||||
} else {
|
||||
// Other parts are directories
|
||||
path = path.join(part);
|
||||
}
|
||||
}
|
||||
path
|
||||
}
|
||||
}
|
||||
|
||||
fn get_file_creation_date(path: &Path) -> std::io::Result<DateTime<Utc>> {
|
||||
let metadata = fs::metadata(path)?;
|
||||
// Try to get creation time, fall back to modification time if not available
|
||||
match metadata.created() {
|
||||
Ok(created) => Ok(DateTime::<Utc>::from(created)),
|
||||
Err(_) => {
|
||||
// Fall back to modification time if creation time is not available
|
||||
let modified = metadata.modified()?;
|
||||
Ok(DateTime::<Utc>::from(modified))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn process_anchor_links(content: &str) -> String {
|
||||
// Replace [text](#anchor) with slugified anchor
|
||||
let re = regex::Regex::new(r"\[([^\]]+)\]\(#([^)]+)\)").unwrap();
|
||||
re.replace_all(content, |caps: ®ex::Captures| {
|
||||
let link_text = &caps[1];
|
||||
let anchor = &caps[2];
|
||||
let slugified = slugify(anchor);
|
||||
format!("[{}](#{})", link_text, slugified)
|
||||
}).to_string()
|
||||
}
|
||||
|
||||
// Helper function to strip emojis from a string
|
||||
// Neccesary for the slugify function to work correctly. And the ID's to work with the frontend.
|
||||
fn strip_emojis(s: &str) -> String {
|
||||
// Remove all characters in the Emoji Unicode ranges
|
||||
// This is a simple approach and may not cover all emojis, but works for most cases
|
||||
s.chars()
|
||||
.filter(|c| {
|
||||
let c = *c as u32;
|
||||
// Basic Emoji ranges
|
||||
!( (c >= 0x1F600 && c <= 0x1F64F) // Emoticons
|
||||
|| (c >= 0x1F300 && c <= 0x1F5FF) // Misc Symbols and Pictographs
|
||||
|| (c >= 0x1F680 && c <= 0x1F6FF) // Transport and Map
|
||||
|| (c >= 0x2600 && c <= 0x26FF) // Misc symbols
|
||||
|| (c >= 0x2700 && c <= 0x27BF) // Dingbats
|
||||
|| (c >= 0x1F900 && c <= 0x1F9FF) // Supplemental Symbols and Pictographs
|
||||
|| (c >= 0x1FA70 && c <= 0x1FAFF) // Symbols and Pictographs Extended-A
|
||||
|| (c >= 0x1F1E6 && c <= 0x1F1FF) // Regional Indicator Symbols
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
// Function to process custom tags in markdown content
|
||||
fn process_custom_tags(content: &str) -> String {
|
||||
let mut processed = content.to_string();
|
||||
|
||||
// Handle simple tags without parameters FIRST
|
||||
let simple_tags = [
|
||||
("<mytag />", "<div class=\"custom-tag mytag\">This is my custom tag content!</div>"),
|
||||
("<warning />", "<div class=\"custom-tag warning\" style=\"background: #fff3cd; border: 1px solid #ffeaa7; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">⚠️ Warning: This is a custom warning tag!</div>"),
|
||||
("<info />", "<div class=\"custom-tag info\" style=\"background: #d1ecf1; border: 1px solid #bee5eb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">ℹ️ Info: This is a custom info tag!</div>"),
|
||||
("<success />", "<div class=\"custom-tag success\" style=\"background: #d4edda; border: 1px solid #c3e6cb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">✅ Success: This is a custom success tag!</div>"),
|
||||
("<error />", "<div class=\"custom-tag error\" style=\"background: #f8d7da; border: 1px solid #f5c6cb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">❌ Error: This is a custom error tag!</div>"),
|
||||
];
|
||||
|
||||
for (tag, replacement) in simple_tags.iter() {
|
||||
processed = processed.replace(tag, replacement);
|
||||
}
|
||||
|
||||
// Handle tags with parameters like <mytag param="value" />
|
||||
let tag_with_params = Regex::new(r"<(\w+)\s+([^>]*?[a-zA-Z0-9=])[^>]*/>").unwrap();
|
||||
processed = tag_with_params.replace_all(&processed, |caps: ®ex::Captures| {
|
||||
let tag_name = &caps[1];
|
||||
let params = &caps[2];
|
||||
|
||||
match tag_name {
|
||||
"mytag" => {
|
||||
// Parse parameters and generate custom HTML
|
||||
format!("<div class=\"custom-tag mytag\" data-params=\"{}\">Custom content with params: {}</div>", params, params)
|
||||
},
|
||||
"alert" => {
|
||||
// Parse alert type from params
|
||||
if params.contains("type=\"warning\"") {
|
||||
"<div class=\"custom-tag alert warning\" style=\"background: #fff3cd; border: 1px solid #ffeaa7; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">⚠️ Warning Alert!</div>".to_string()
|
||||
} else if params.contains("type=\"error\"") {
|
||||
"<div class=\"custom-tag alert error\" style=\"background: #f8d7da; border: 1px solid #f5c6cb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">❌ Error Alert!</div>".to_string()
|
||||
} else {
|
||||
"<div class=\"custom-tag alert info\" style=\"background: #d1ecf1; border: 1px solid #bee5eb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">ℹ️ Info Alert!</div>".to_string()
|
||||
}
|
||||
},
|
||||
_ => format!("<div class=\"custom-tag {}\">Unknown custom tag: {}</div>", tag_name, tag_name)
|
||||
}
|
||||
}).to_string();
|
||||
|
||||
processed
|
||||
}
|
||||
// Static caches
|
||||
static POST_CACHE: Lazy<RwLock<HashMap<String, Post>>> = Lazy::new(|| RwLock::new(HashMap::new()));
|
||||
static ALL_POSTS_CACHE: Lazy<RwLock<Option<Vec<Post>>>> = Lazy::new(|| RwLock::new(None));
|
||||
static POST_STATS: Lazy<RwLock<HashMap<String, PostStats>>> = Lazy::new(|| RwLock::new(HashMap::new()));
|
||||
static PARSER_LOGS: Lazy<RwLock<VecDeque<LogEntry>>> = Lazy::new(|| RwLock::new(VecDeque::new()));
|
||||
|
||||
// Ammonia HTML sanitizer configuration
|
||||
static AMMONIA: Lazy<ammonia::Builder<'static>> = Lazy::new(|| {
|
||||
let mut builder = ammonia::Builder::default();
|
||||
// All possible HTML Tags so that you can stylize via HTML
|
||||
builder.add_tag_attributes("h1", &["id", "style"]);
|
||||
builder.add_tag_attributes("h2", &["id", "style"]);
|
||||
builder.add_tag_attributes("h3", &["id", "style"]);
|
||||
builder.add_tag_attributes("h4", &["id", "style"]);
|
||||
builder.add_tag_attributes("h5", &["id", "style"]);
|
||||
builder.add_tag_attributes("h6", &["id", "style"]);
|
||||
|
||||
// Add allowed attributes for various HTML tags
|
||||
builder.add_tag_attributes("h1", &["style", "id"]);
|
||||
builder.add_tag_attributes("h2", &["style", "id"]);
|
||||
builder.add_tag_attributes("h3", &["style", "id"]);
|
||||
builder.add_tag_attributes("h4", &["style", "id"]);
|
||||
builder.add_tag_attributes("h5", &["style", "id"]);
|
||||
builder.add_tag_attributes("h6", &["style", "id"]);
|
||||
builder.add_tag_attributes("p", &["style"]);
|
||||
builder.add_tag_attributes("span", &["style"]);
|
||||
builder.add_tag_attributes("strong", &["style"]);
|
||||
@@ -290,7 +120,6 @@ static AMMONIA: Lazy<ammonia::Builder<'static>> = Lazy::new(|| {
|
||||
builder.add_tag_attributes("pre", &["style"]);
|
||||
builder.add_tag_attributes("kbd", &["style"]);
|
||||
builder.add_tag_attributes("samp", &["style"]);
|
||||
builder.add_tag_attributes("div", &["style", "class"]);
|
||||
builder.add_tag_attributes("section", &["style"]);
|
||||
builder.add_tag_attributes("article", &["style"]);
|
||||
builder.add_tag_attributes("header", &["style"]);
|
||||
@@ -335,15 +164,197 @@ static AMMONIA: Lazy<ammonia::Builder<'static>> = Lazy::new(|| {
|
||||
builder.add_tag_attributes("fieldset", &["style"]);
|
||||
builder.add_tag_attributes("legend", &["style"]);
|
||||
builder.add_tag_attributes("blockquote", &["style"]);
|
||||
builder.add_tag_attributes("font", &["style"]); // deprecated
|
||||
builder.add_tag_attributes("center", &["style"]); // deprecated
|
||||
builder.add_tag_attributes("big", &["style"]); // deprecated
|
||||
builder.add_tag_attributes("tt", &["style"]); // deprecated
|
||||
builder.add_tag_attributes("font", &["style"]);
|
||||
builder.add_tag_attributes("center", &["style"]);
|
||||
builder.add_tag_attributes("big", &["style"]);
|
||||
builder.add_tag_attributes("tt", &["style"]);
|
||||
|
||||
// Add class attribute for div
|
||||
builder.add_tag_attributes("div", &["style", "class"]);
|
||||
|
||||
builder
|
||||
});
|
||||
|
||||
// Helper functions
|
||||
fn get_posts_directory() -> PathBuf {
|
||||
let is_docker = std::env::var("DOCKER_CONTAINER").is_ok()
|
||||
|| std::env::var("KUBERNETES_SERVICE_HOST").is_ok()
|
||||
|| std::path::Path::new("/.dockerenv").exists();
|
||||
|
||||
let candidates = if is_docker {
|
||||
vec![
|
||||
"/app/docker", // Docker volume mount point (highest priority in Docker)
|
||||
"/app/posts", // Fallback in Docker
|
||||
"./posts",
|
||||
"../posts",
|
||||
"/posts",
|
||||
"/docker"
|
||||
]
|
||||
} else {
|
||||
vec![
|
||||
"./posts",
|
||||
"../posts",
|
||||
"/posts",
|
||||
"/docker",
|
||||
"/app/docker" // Lower priority for non-Docker environments
|
||||
]
|
||||
};
|
||||
|
||||
for candidate in candidates.iter() {
|
||||
let path = PathBuf::from(candidate);
|
||||
if path.exists() && path.is_dir() {
|
||||
return path;
|
||||
}
|
||||
}
|
||||
// Fallback: default to ./posts
|
||||
PathBuf::from("./posts")
|
||||
}
|
||||
|
||||
fn find_markdown_files(dir: &Path) -> std::io::Result<Vec<PathBuf>> {
|
||||
let mut files = Vec::new();
|
||||
if dir.is_dir() {
|
||||
for entry in fs::read_dir(dir)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
|
||||
if path.is_dir() {
|
||||
files.extend(find_markdown_files(&path)?);
|
||||
} else if path.extension().map(|e| e == "md").unwrap_or(false) {
|
||||
files.push(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
fn path_to_slug(file_path: &Path, posts_dir: &Path) -> String {
|
||||
let relative_path = file_path.strip_prefix(posts_dir).unwrap_or(file_path);
|
||||
let without_ext = relative_path.with_extension("");
|
||||
without_ext.to_string_lossy()
|
||||
.replace(std::path::MAIN_SEPARATOR, "::")
|
||||
.replace("/", "::")
|
||||
.replace("\\", "::")
|
||||
}
|
||||
|
||||
fn slug_to_path(slug: &str, posts_dir: &Path) -> PathBuf {
|
||||
let parts: Vec<&str> = slug.split("::").collect();
|
||||
if parts.len() == 1 {
|
||||
posts_dir.join(format!("{}.md", parts[0]))
|
||||
} else {
|
||||
let mut path = posts_dir.to_path_buf();
|
||||
for (i, part) in parts.iter().enumerate() {
|
||||
if i == parts.len() - 1 {
|
||||
path = path.join(format!("{}.md", part));
|
||||
} else {
|
||||
path = path.join(part);
|
||||
}
|
||||
}
|
||||
path
|
||||
}
|
||||
}
|
||||
|
||||
fn get_file_creation_date(path: &Path) -> std::io::Result<DateTime<Utc>> {
|
||||
let metadata = fs::metadata(path)?;
|
||||
match metadata.created() {
|
||||
Ok(created) => Ok(DateTime::<Utc>::from(created)),
|
||||
Err(_) => {
|
||||
let modified = metadata.modified()?;
|
||||
Ok(DateTime::<Utc>::from(modified))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn process_anchor_links(content: &str) -> String {
|
||||
let re = regex::Regex::new(r"\[([^\]]+)\]\(#([^)]+)\)").unwrap();
|
||||
re.replace_all(content, |caps: ®ex::Captures| {
|
||||
let link_text = &caps[1];
|
||||
let anchor = &caps[2];
|
||||
let slugified = slugify(anchor);
|
||||
format!("[{}](#{})", link_text, slugified)
|
||||
}).to_string()
|
||||
}
|
||||
|
||||
fn strip_emojis(s: &str) -> String {
|
||||
s.chars()
|
||||
.filter(|c| {
|
||||
let c = *c as u32;
|
||||
!( (c >= 0x1F600 && c <= 0x1F64F) // Emoticons
|
||||
|| (c >= 0x1F300 && c <= 0x1F5FF) // Misc Symbols and Pictographs
|
||||
|| (c >= 0x1F680 && c <= 0x1F6FF) // Transport and Map
|
||||
|| (c >= 0x2600 && c <= 0x26FF) // Misc symbols
|
||||
|| (c >= 0x2700 && c <= 0x27BF) // Dingbats
|
||||
|| (c >= 0x1F900 && c <= 0x1F9FF) // Supplemental Symbols and Pictographs
|
||||
|| (c >= 0x1FA70 && c <= 0x1FAFF) // Symbols and Pictographs Extended-A
|
||||
|| (c >= 0x1F1E6 && c <= 0x1F1FF) // Regional Indicator Symbols
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn process_custom_tags(content: &str) -> String {
|
||||
let mut processed = content.to_string();
|
||||
|
||||
// Handle simple tags without parameters
|
||||
let simple_tags = [
|
||||
("<mytag />", "<div class=\"custom-tag mytag\">This is my custom tag content!</div>"),
|
||||
("<warning />", "<div class=\"custom-tag warning\" style=\"background: #fff3cd; border: 1px solid #ffeaa7; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">⚠️ Warning: This is a custom warning tag!</div>"),
|
||||
("<info />", "<div class=\"custom-tag info\" style=\"background: #d1ecf1; border: 1px solid #bee5eb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">ℹ️ Info: This is a custom info tag!</div>"),
|
||||
("<success />", "<div class=\"custom-tag success\" style=\"background: #d4edda; border: 1px solid #c3e6cb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">✅ Success: This is a custom success tag!</div>"),
|
||||
("<error />", "<div class=\"custom-tag error\" style=\"background: #f8d7da; border: 1px solid #f5c6cb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">❌ Error: This is a custom error tag!</div>"),
|
||||
];
|
||||
|
||||
for (tag, replacement) in simple_tags.iter() {
|
||||
processed = processed.replace(tag, replacement);
|
||||
}
|
||||
|
||||
// Handle tags with parameters
|
||||
let tag_with_params = Regex::new(r"<(\w+)\s+([^>]*?[a-zA-Z0-9=])[^>]*/>").unwrap();
|
||||
processed = tag_with_params.replace_all(&processed, |caps: ®ex::Captures| {
|
||||
let tag_name = &caps[1];
|
||||
let params = &caps[2];
|
||||
|
||||
match tag_name {
|
||||
"mytag" => {
|
||||
format!("<div class=\"custom-tag mytag\" data-params=\"{}\">Custom content with params: {}</div>", params, params)
|
||||
},
|
||||
"alert" => {
|
||||
if params.contains("type=\"warning\"") {
|
||||
"<div class=\"custom-tag alert warning\" style=\"background: #fff3cd; border: 1px solid #ffeaa7; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">⚠️ Warning Alert!</div>".to_string()
|
||||
} else if params.contains("type=\"error\"") {
|
||||
"<div class=\"custom-tag alert error\" style=\"background: #f8d7da; border: 1px solid #f5c6cb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">❌ Error Alert!</div>".to_string()
|
||||
} else {
|
||||
"<div class=\"custom-tag alert info\" style=\"background: #d1ecf1; border: 1px solid #bee5eb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">ℹ️ Info Alert!</div>".to_string()
|
||||
}
|
||||
},
|
||||
_ => format!("<div class=\"custom-tag {}\">Unknown custom tag: {}</div>", tag_name, tag_name)
|
||||
}
|
||||
}).to_string();
|
||||
|
||||
processed
|
||||
}
|
||||
|
||||
// Logging functions
|
||||
fn add_log(level: &str, message: &str, slug: Option<&str>, details: Option<&str>) {
|
||||
let timestamp = chrono::Utc::now().to_rfc3339();
|
||||
let log_entry = LogEntry {
|
||||
timestamp,
|
||||
level: level.to_string(),
|
||||
message: message.to_string(),
|
||||
slug: slug.map(|s| s.to_string()),
|
||||
details: details.map(|s| s.to_string()),
|
||||
};
|
||||
|
||||
let mut logs = PARSER_LOGS.write().unwrap();
|
||||
logs.push_back(log_entry);
|
||||
|
||||
// Keep only the last MAX_LOG_ENTRIES
|
||||
if logs.len() > MAX_LOG_ENTRIES {
|
||||
logs.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
// Main public functions
|
||||
pub fn rsparseinfo() -> String {
|
||||
// Eagerly load all posts to populate stats
|
||||
let _ = get_all_posts();
|
||||
let stats = POST_STATS.read().unwrap();
|
||||
let values: Vec<&PostStats> = stats.values().collect();
|
||||
@@ -355,16 +366,20 @@ pub fn rsparseinfo() -> String {
|
||||
}
|
||||
|
||||
pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>> {
|
||||
add_log("info", "Starting post parsing", Some(slug), None);
|
||||
|
||||
let mut sys = System::new_with_specifics(RefreshKind::new().with_processes(ProcessRefreshKind::everything()).with_cpu(CpuRefreshKind::everything()));
|
||||
sys.refresh_processes();
|
||||
let pid = sysinfo::get_current_pid()?;
|
||||
let before_cpu = sys.process(pid).map(|p| p.cpu_usage()).unwrap_or(0.0);
|
||||
let start = Instant::now();
|
||||
|
||||
let mut stats = POST_STATS.write().unwrap();
|
||||
let entry = stats.entry(slug.to_string()).or_insert_with(|| PostStats {
|
||||
slug: slug.to_string(),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
// Try cache first
|
||||
if let Some(post) = POST_CACHE.read().unwrap().get(slug).cloned() {
|
||||
entry.cache_hits += 1;
|
||||
@@ -373,32 +388,30 @@ pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>>
|
||||
entry.last_cache_status = "hit".to_string();
|
||||
sys.refresh_process(pid);
|
||||
entry.last_cpu_usage_percent = sys.process(pid).map(|p| p.cpu_usage()).unwrap_or(0.0) - before_cpu;
|
||||
add_log("info", "Cache hit", Some(slug), None);
|
||||
return Ok(post);
|
||||
}
|
||||
|
||||
entry.cache_misses += 1;
|
||||
entry.last_cache_status = "miss".to_string();
|
||||
drop(stats); // Release lock before heavy work
|
||||
drop(stats);
|
||||
|
||||
let posts_dir = get_posts_directory();
|
||||
let file_path = slug_to_path(slug, &posts_dir);
|
||||
|
||||
// Add debugging for file path resolution
|
||||
eprintln!("[Rust Parser] Looking for file: {:?}", file_path);
|
||||
eprintln!("[Rust Parser] Posts directory: {:?}", posts_dir);
|
||||
eprintln!("[Rust Parser] Slug: {}", slug);
|
||||
|
||||
if !file_path.exists() {
|
||||
eprintln!("[Rust Parser] File does not exist: {:?}", file_path);
|
||||
return Err(format!("File not found: {:?}", file_path).into());
|
||||
let error_msg = format!("File not found: {:?}", file_path);
|
||||
add_log("error", &error_msg, Some(slug), None);
|
||||
return Err(error_msg.into());
|
||||
}
|
||||
|
||||
let file_content = fs::read_to_string(&file_path)?;
|
||||
eprintln!("[Rust Parser] File size: {} bytes", file_content.len());
|
||||
add_log("info", &format!("File loaded: {} bytes", file_content.len()), Some(slug), None);
|
||||
|
||||
// Check file size limit (10MB)
|
||||
const MAX_FILE_SIZE: usize = 10 * 1024 * 1024; // 10MB
|
||||
if file_content.len() > MAX_FILE_SIZE {
|
||||
eprintln!("[Rust Parser] File too large: {} bytes (max: {} bytes)", file_content.len(), MAX_FILE_SIZE);
|
||||
return Err(format!("File too large: {} bytes (max: {} bytes)", file_content.len(), MAX_FILE_SIZE).into());
|
||||
let error_msg = format!("File too large: {} bytes (max: {} bytes)", file_content.len(), MAX_FILE_SIZE);
|
||||
add_log("error", &error_msg, Some(slug), None);
|
||||
return Err(error_msg.into());
|
||||
}
|
||||
|
||||
let matter = Matter::<YAML>::new();
|
||||
@@ -408,20 +421,21 @@ pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>>
|
||||
match data.deserialize() {
|
||||
Ok(front) => front,
|
||||
Err(e) => {
|
||||
eprintln!("[Rust Parser] Failed to deserialize frontmatter for post {}: {}", slug, e);
|
||||
return Err(format!("Failed to deserialize frontmatter: {}", e).into());
|
||||
let error_msg = format!("Failed to deserialize frontmatter: {}", e);
|
||||
add_log("error", &error_msg, Some(slug), None);
|
||||
return Err(error_msg.into());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eprintln!("[Rust Parser] No frontmatter found for post: {}", slug);
|
||||
add_log("error", "No frontmatter found", Some(slug), None);
|
||||
return Err("No frontmatter found".into());
|
||||
};
|
||||
|
||||
let created_at = get_file_creation_date(&file_path)?;
|
||||
|
||||
let processed_markdown = process_anchor_links(&result.content);
|
||||
let processed_markdown = process_custom_tags(&processed_markdown);
|
||||
eprintln!("[Rust Parser] Processed markdown length: {} characters", processed_markdown.len());
|
||||
|
||||
add_log("info", "Starting markdown parsing", Some(slug), Some(&format!("Content length: {} chars", processed_markdown.len())));
|
||||
|
||||
let parser = Parser::new_ext(&processed_markdown, Options::all());
|
||||
let mut html_output = String::new();
|
||||
@@ -432,22 +446,19 @@ pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>>
|
||||
let mut code_block_lang = String::new();
|
||||
let mut code_block_content = String::new();
|
||||
let mut events = Vec::new();
|
||||
let ss = SyntaxSet::load_defaults_newlines(); // SS 卐
|
||||
let ss = SyntaxSet::load_defaults_newlines();
|
||||
let ts = ThemeSet::load_defaults();
|
||||
let theme = &ts.themes["base16-ocean.dark"];
|
||||
|
||||
// Add error handling around the parsing loop
|
||||
let mut event_count = 0;
|
||||
let start_parsing = Instant::now();
|
||||
let mut event_count = 0;
|
||||
|
||||
for event in parser {
|
||||
event_count += 1;
|
||||
if event_count % 1000 == 0 {
|
||||
eprintln!("[Rust Parser] Processed {} events for slug: {}", event_count, slug);
|
||||
// Check for timeout (30 seconds)
|
||||
if start_parsing.elapsed().as_secs() > 30 {
|
||||
eprintln!("[Rust Parser] Timeout reached for slug: {}", slug);
|
||||
return Err("Parsing timeout - file too large".into());
|
||||
}
|
||||
if start_parsing.elapsed().as_secs() > PARSING_TIMEOUT_SECS {
|
||||
let error_msg = "Parsing timeout - file too large";
|
||||
add_log("error", error_msg, Some(slug), Some(&format!("Processed {} events", event_count)));
|
||||
return Err(error_msg.into());
|
||||
}
|
||||
|
||||
match &event {
|
||||
@@ -458,10 +469,8 @@ pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>>
|
||||
},
|
||||
Event::End(Tag::Heading(_, _, _)) => {
|
||||
in_heading = false;
|
||||
// Strip emojis before slugifying for the id
|
||||
let heading_no_emoji = strip_emojis(&heading_text);
|
||||
let id = slugify(&heading_no_emoji);
|
||||
// Add basic CSS style for headings
|
||||
let style = "color: #2d3748; margin-top: 1.5em; margin-bottom: 0.5em;";
|
||||
events.push(Event::Html(CowStr::Boxed(format!("<h{lvl} id=\"{id}\" style=\"{style}\">", lvl=heading_level, id=id, style=style).into_boxed_str())));
|
||||
events.push(Event::Text(CowStr::Boxed(heading_text.clone().into_boxed_str())));
|
||||
@@ -480,7 +489,6 @@ pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>>
|
||||
},
|
||||
Event::End(Tag::CodeBlock(_)) => {
|
||||
in_code_block = false;
|
||||
// Highlight code block
|
||||
let highlighted = if !code_block_lang.is_empty() {
|
||||
if let Some(syntax) = ss.find_syntax_by_token(&code_block_lang) {
|
||||
highlighted_html_for_string(&code_block_content, &ss, syntax, theme).unwrap_or_else(|_| format!("<pre style=\"background: #2d2d2d; color: #f8f8f2; padding: 1em; border-radius: 6px; overflow-x: auto;\"><code style=\"background: none;\">{}</code></pre>", html_escape::encode_text(&code_block_content)))
|
||||
@@ -488,7 +496,6 @@ pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>>
|
||||
format!("<pre style=\"background: #2d2d2d; color: #f8f8f2; padding: 1em; border-radius: 6px; overflow-x: auto;\"><code style=\"background: none;\">{}</code></pre>", html_escape::encode_text(&code_block_content))
|
||||
}
|
||||
} else {
|
||||
// No language specified
|
||||
format!("<pre style=\"background: #2d2d2d; color: #f8f8f2; padding: 1em; border-radius: 6px; overflow-x: auto;\"><code style=\"background: none;\">{}</code></pre>", html_escape::encode_text(&code_block_content))
|
||||
};
|
||||
events.push(Event::Html(CowStr::Boxed(highlighted.into_boxed_str())));
|
||||
@@ -502,12 +509,11 @@ pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>>
|
||||
_ => {},
|
||||
}
|
||||
}
|
||||
eprintln!("[Rust Parser] Total events processed: {} for slug: {}", event_count, slug);
|
||||
|
||||
add_log("info", "Markdown parsing completed", Some(slug), Some(&format!("Processed {} events", event_count)));
|
||||
|
||||
html::push_html(&mut html_output, events.into_iter());
|
||||
eprintln!("[Rust Parser] HTML output length: {} characters", html_output.len());
|
||||
|
||||
let sanitized_html = AMMONIA.clean(&html_output).to_string();
|
||||
eprintln!("[Rust Parser] Sanitized HTML length: {} characters", sanitized_html.len());
|
||||
|
||||
let interpret_time = start.elapsed();
|
||||
let compile_start = Instant::now();
|
||||
@@ -522,8 +528,10 @@ pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>>
|
||||
author: std::env::var("BLOG_OWNER").unwrap_or_else(|_| "Anonymous".to_string()),
|
||||
};
|
||||
let compile_time = compile_start.elapsed();
|
||||
|
||||
// Insert into cache
|
||||
POST_CACHE.write().unwrap().insert(slug.to_string(), post.clone());
|
||||
|
||||
// Update stats
|
||||
let mut stats = POST_STATS.write().unwrap();
|
||||
let entry = stats.entry(slug.to_string()).or_insert_with(|| PostStats {
|
||||
@@ -534,6 +542,9 @@ pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>>
|
||||
entry.last_compile_time_ms = compile_time.as_millis();
|
||||
sys.refresh_process(pid);
|
||||
entry.last_cpu_usage_percent = sys.process(pid).map(|p| p.cpu_usage()).unwrap_or(0.0) - before_cpu;
|
||||
|
||||
add_log("info", "Post parsing completed successfully", Some(slug), Some(&format!("Interpret: {}ms, Compile: {}ms", interpret_time.as_millis(), compile_time.as_millis())));
|
||||
|
||||
Ok(post)
|
||||
}
|
||||
|
||||
@@ -542,6 +553,7 @@ pub fn get_all_posts() -> Result<Vec<Post>, Box<dyn std::error::Error>> {
|
||||
if let Some(posts) = ALL_POSTS_CACHE.read().unwrap().clone() {
|
||||
return Ok(posts);
|
||||
}
|
||||
|
||||
let posts_dir = get_posts_directory();
|
||||
let markdown_files = find_markdown_files(&posts_dir)?;
|
||||
let mut posts = Vec::new();
|
||||
@@ -549,14 +561,12 @@ pub fn get_all_posts() -> Result<Vec<Post>, Box<dyn std::error::Error>> {
|
||||
for file_path in markdown_files {
|
||||
let slug = path_to_slug(&file_path, &posts_dir);
|
||||
if let Ok(post) = get_post_by_slug(&slug) {
|
||||
// Insert each post into the individual post cache as well
|
||||
POST_CACHE.write().unwrap().insert(slug.clone(), post.clone());
|
||||
posts.push(post);
|
||||
}
|
||||
}
|
||||
|
||||
posts.sort_by(|a, b| b.created_at.cmp(&a.created_at));
|
||||
// Cache the result
|
||||
*ALL_POSTS_CACHE.write().unwrap() = Some(posts.clone());
|
||||
Ok(posts)
|
||||
}
|
||||
@@ -570,11 +580,11 @@ pub fn watch_posts<F: Fn() + Send + 'static>(on_change: F) -> notify::Result<Rec
|
||||
let (tx, rx) = channel();
|
||||
let mut watcher = RecommendedWatcher::new(tx, Config::default())?;
|
||||
watcher.watch(get_posts_directory().as_path(), RecursiveMode::Recursive)?;
|
||||
|
||||
std::thread::spawn(move || {
|
||||
loop {
|
||||
match rx.recv() {
|
||||
Ok(_event) => {
|
||||
// Invalidate caches on any change
|
||||
POST_CACHE.write().unwrap().clear();
|
||||
*ALL_POSTS_CACHE.write().unwrap() = None;
|
||||
on_change();
|
||||
@@ -618,6 +628,7 @@ pub fn checkhealth() -> HealthReport {
|
||||
let posts_dir = get_posts_directory();
|
||||
let posts_dir_exists = posts_dir.exists() && posts_dir.is_dir();
|
||||
let mut posts_count = 0;
|
||||
|
||||
if posts_dir_exists {
|
||||
match std::fs::read_dir(&posts_dir) {
|
||||
Ok(entries) => {
|
||||
@@ -630,9 +641,11 @@ pub fn checkhealth() -> HealthReport {
|
||||
} else {
|
||||
errors.push("Posts directory does not exist".to_string());
|
||||
}
|
||||
|
||||
let cache_file_exists = Path::new(POSTS_CACHE_PATH).exists();
|
||||
let cache_stats_file_exists = Path::new(POST_STATS_PATH).exists();
|
||||
let (mut cache_readable, mut cache_post_count) = (false, None);
|
||||
|
||||
if cache_file_exists {
|
||||
match std::fs::read_to_string(POSTS_CACHE_PATH) {
|
||||
Ok(data) => {
|
||||
@@ -647,6 +660,7 @@ pub fn checkhealth() -> HealthReport {
|
||||
Err(e) => errors.push(format!("Failed to read cache file: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
let (mut cache_stats_readable, mut cache_stats_count) = (false, None);
|
||||
if cache_stats_file_exists {
|
||||
match std::fs::read_to_string(POST_STATS_PATH) {
|
||||
@@ -662,6 +676,7 @@ pub fn checkhealth() -> HealthReport {
|
||||
Err(e) => errors.push(format!("Failed to read cache stats file: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
HealthReport {
|
||||
posts_dir_exists,
|
||||
posts_count,
|
||||
@@ -674,3 +689,13 @@ pub fn checkhealth() -> HealthReport {
|
||||
errors,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_parser_logs() -> Vec<LogEntry> {
|
||||
let logs = PARSER_LOGS.read().unwrap();
|
||||
logs.iter().cloned().collect()
|
||||
}
|
||||
|
||||
pub fn clear_parser_logs() {
|
||||
let mut logs = PARSER_LOGS.write().unwrap();
|
||||
logs.clear();
|
||||
}
|
||||
Reference in New Issue
Block a user