- Added a VS Code-style editor with YAML frontmatter support and live preview. - Implemented force reparse functionality for immediate updates of posts. - Improved directory scanning with error handling and automatic directory creation. - Introduced new CLI commands for cache management: `reinterpret-all` and `reparse-post`. - Enhanced logging for better debugging and monitoring of the Rust backend. - Updated README to reflect new features and improvements.
911 lines
35 KiB
Rust
911 lines
35 KiB
Rust
//
|
||
// src/markdown.rs
|
||
// Written by: @rattatwinko
|
||
//
|
||
|
||
use std::fs;
|
||
use std::path::{Path, PathBuf};
|
||
use std::collections::HashMap;
|
||
use std::sync::RwLock;
|
||
use std::time::Instant;
|
||
use std::sync::mpsc::channel;
|
||
use std::collections::VecDeque;
|
||
|
||
use chrono::{DateTime, Utc};
|
||
use serde::{Deserialize, Serialize};
|
||
use pulldown_cmark::{Parser, Options, html, Event, Tag, CowStr};
|
||
use gray_matter::engine::YAML;
|
||
use gray_matter::Matter;
|
||
use slug::slugify;
|
||
use notify::{RecursiveMode, RecommendedWatcher, Watcher, Config};
|
||
use syntect::highlighting::ThemeSet;
|
||
use syntect::parsing::SyntaxSet;
|
||
use syntect::html::highlighted_html_for_string;
|
||
use once_cell::sync::Lazy;
|
||
use serde_json;
|
||
use sysinfo::{System, RefreshKind, CpuRefreshKind, ProcessRefreshKind};
|
||
use regex::Regex;
|
||
|
||
// Constants
|
||
const POSTS_CACHE_PATH: &str = "./cache/posts_cache.json";
|
||
const POST_STATS_PATH: &str = "./cache/post_stats.json";
|
||
const MAX_FILE_SIZE: usize = 2 * 1024 * 1024; // 10MB
|
||
const PARSING_TIMEOUT_SECS: u64 = 6000;
|
||
const MAX_LOG_ENTRIES: usize = 1000;
|
||
const PARSER_LOGS_PATH: &str = "./cache/parser_logs.json";
|
||
|
||
// Data structures
|
||
#[derive(Debug, Deserialize, Clone, Serialize)]
|
||
pub struct PostFrontmatter {
|
||
pub title: String,
|
||
pub date: String,
|
||
pub tags: Option<Vec<String>>,
|
||
pub summary: Option<String>,
|
||
}
|
||
// Post Data Structures
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
pub struct Post {
|
||
pub slug: String,
|
||
pub title: String,
|
||
pub date: String,
|
||
pub tags: Vec<String>,
|
||
pub summary: Option<String>,
|
||
pub content: String,
|
||
pub created_at: String,
|
||
pub author: String,
|
||
}
|
||
|
||
// Data Structure for Posts Statistics
|
||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||
pub struct PostStats {
|
||
pub slug: String,
|
||
pub cache_hits: u64,
|
||
pub cache_misses: u64,
|
||
pub last_interpret_time_ms: u128,
|
||
pub last_compile_time_ms: u128,
|
||
pub last_cpu_usage_percent: f32,
|
||
pub last_cache_status: String, // "hit" or "miss"
|
||
}
|
||
|
||
// Data Structures for Health Reporting
|
||
#[derive(Debug, Serialize)]
|
||
pub struct HealthReport {
|
||
pub posts_dir_exists: bool,
|
||
pub posts_count: usize,
|
||
pub cache_file_exists: bool,
|
||
pub cache_stats_file_exists: bool,
|
||
pub cache_readable: bool,
|
||
pub cache_stats_readable: bool,
|
||
pub cache_post_count: Option<usize>,
|
||
pub cache_stats_count: Option<usize>,
|
||
pub errors: Vec<String>,
|
||
}
|
||
|
||
// Log Data Structure (frontend related)
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
pub struct LogEntry {
|
||
pub timestamp: String,
|
||
pub level: String, // "info", "warning", "error"
|
||
pub message: String,
|
||
pub slug: Option<String>,
|
||
pub details: Option<String>,
|
||
}
|
||
|
||
// Static caches
|
||
static POST_CACHE: Lazy<RwLock<HashMap<String, Post>>> = Lazy::new(|| RwLock::new(HashMap::new()));
|
||
static ALL_POSTS_CACHE: Lazy<RwLock<Option<Vec<Post>>>> = Lazy::new(|| RwLock::new(None));
|
||
static POST_STATS: Lazy<RwLock<HashMap<String, PostStats>>> = Lazy::new(|| RwLock::new(HashMap::new()));
|
||
static PARSER_LOGS: Lazy<RwLock<VecDeque<LogEntry>>> = Lazy::new(|| RwLock::new(VecDeque::new()));
|
||
|
||
// Ammonia HTML sanitizer configuration
|
||
static AMMONIA: Lazy<ammonia::Builder<'static>> = Lazy::new(|| {
|
||
let mut builder = ammonia::Builder::default();
|
||
|
||
// Add allowed attributes for various HTML tags
|
||
builder.add_tag_attributes("h1", &["style", "id"]);
|
||
builder.add_tag_attributes("h2", &["style", "id"]);
|
||
builder.add_tag_attributes("h3", &["style", "id"]);
|
||
builder.add_tag_attributes("h4", &["style", "id"]);
|
||
builder.add_tag_attributes("h5", &["style", "id"]);
|
||
builder.add_tag_attributes("h6", &["style", "id"]);
|
||
builder.add_tag_attributes("p", &["style"]);
|
||
builder.add_tag_attributes("span", &["style"]);
|
||
builder.add_tag_attributes("strong", &["style"]);
|
||
builder.add_tag_attributes("em", &["style"]);
|
||
builder.add_tag_attributes("b", &["style"]);
|
||
builder.add_tag_attributes("i", &["style"]);
|
||
builder.add_tag_attributes("u", &["style"]);
|
||
builder.add_tag_attributes("mark", &["style"]);
|
||
builder.add_tag_attributes("small", &["style"]);
|
||
builder.add_tag_attributes("abbr", &["style"]);
|
||
builder.add_tag_attributes("cite", &["style"]);
|
||
builder.add_tag_attributes("q", &["style"]);
|
||
builder.add_tag_attributes("code", &["style"]);
|
||
builder.add_tag_attributes("pre", &["style"]);
|
||
builder.add_tag_attributes("kbd", &["style"]);
|
||
builder.add_tag_attributes("samp", &["style"]);
|
||
builder.add_tag_attributes("section", &["style"]);
|
||
builder.add_tag_attributes("article", &["style"]);
|
||
builder.add_tag_attributes("header", &["style"]);
|
||
builder.add_tag_attributes("footer", &["style"]);
|
||
builder.add_tag_attributes("main", &["style"]);
|
||
builder.add_tag_attributes("aside", &["style"]);
|
||
builder.add_tag_attributes("nav", &["style"]);
|
||
builder.add_tag_attributes("ul", &["style"]);
|
||
builder.add_tag_attributes("ol", &["style"]);
|
||
builder.add_tag_attributes("li", &["style"]);
|
||
builder.add_tag_attributes("dl", &["style"]);
|
||
builder.add_tag_attributes("dt", &["style"]);
|
||
builder.add_tag_attributes("dd", &["style"]);
|
||
builder.add_tag_attributes("table", &["style"]);
|
||
builder.add_tag_attributes("thead", &["style"]);
|
||
builder.add_tag_attributes("tbody", &["style"]);
|
||
builder.add_tag_attributes("tfoot", &["style"]);
|
||
builder.add_tag_attributes("tr", &["style"]);
|
||
builder.add_tag_attributes("td", &["style"]);
|
||
builder.add_tag_attributes("th", &["style"]);
|
||
builder.add_tag_attributes("a", &["style"]);
|
||
builder.add_tag_attributes("img", &["style"]);
|
||
builder.add_tag_attributes("video", &["style"]);
|
||
builder.add_tag_attributes("audio", &["style"]);
|
||
builder.add_tag_attributes("source", &["style"]);
|
||
builder.add_tag_attributes("iframe", &["style"]);
|
||
builder.add_tag_attributes("sup", &["style"]);
|
||
builder.add_tag_attributes("sub", &["style"]);
|
||
builder.add_tag_attributes("time", &["style"]);
|
||
builder.add_tag_attributes("var", &["style"]);
|
||
builder.add_tag_attributes("del", &["style"]);
|
||
builder.add_tag_attributes("ins", &["style"]);
|
||
builder.add_tag_attributes("br", &["style"]);
|
||
builder.add_tag_attributes("wbr", &["style"]);
|
||
builder.add_tag_attributes("form", &["style"]);
|
||
builder.add_tag_attributes("input", &["style"]);
|
||
builder.add_tag_attributes("textarea", &["style"]);
|
||
builder.add_tag_attributes("select", &["style"]);
|
||
builder.add_tag_attributes("option", &["style"]);
|
||
builder.add_tag_attributes("button", &["style"]);
|
||
builder.add_tag_attributes("label", &["style"]);
|
||
builder.add_tag_attributes("fieldset", &["style"]);
|
||
builder.add_tag_attributes("legend", &["style"]);
|
||
builder.add_tag_attributes("blockquote", &["style"]);
|
||
builder.add_tag_attributes("font", &["style"]);
|
||
builder.add_tag_attributes("center", &["style"]);
|
||
builder.add_tag_attributes("big", &["style"]);
|
||
builder.add_tag_attributes("tt", &["style"]);
|
||
|
||
// Add class attribute for div
|
||
builder.add_tag_attributes("div", &["style", "class"]);
|
||
|
||
builder
|
||
});
|
||
|
||
// Helper functions
|
||
fn ensure_cache_directory() {
|
||
let cache_dir = PathBuf::from("./cache");
|
||
if !cache_dir.exists() {
|
||
if let Err(e) = fs::create_dir_all(&cache_dir) {
|
||
eprintln!("Failed to create cache directory: {}", e);
|
||
add_log("error", &format!("Failed to create cache directory: {}", e), None, None);
|
||
} else {
|
||
add_log("info", "Created cache directory: ./cache", None, None);
|
||
}
|
||
}
|
||
}
|
||
|
||
fn get_posts_directory() -> PathBuf {
|
||
let is_docker = std::env::var("DOCKER_CONTAINER").is_ok()
|
||
|| std::env::var("KUBERNETES_SERVICE_HOST").is_ok()
|
||
|| std::path::Path::new("/.dockerenv").exists();
|
||
|
||
let candidates = if is_docker {
|
||
vec![
|
||
"/app/docker", // Docker volume mount point (highest priority in Docker)
|
||
"/app/posts", // Fallback in Docker
|
||
"./posts",
|
||
"../posts",
|
||
"/posts",
|
||
"/docker"
|
||
]
|
||
} else {
|
||
vec![
|
||
"./posts",
|
||
"../posts",
|
||
"/posts",
|
||
"/docker",
|
||
"/app/docker" // Lower priority for non-Docker environments
|
||
]
|
||
};
|
||
|
||
for candidate in candidates.iter() {
|
||
let path = PathBuf::from(candidate);
|
||
if path.exists() && path.is_dir() {
|
||
add_log("info", &format!("Using posts directory: {:?}", path), None, None);
|
||
return path;
|
||
}
|
||
}
|
||
|
||
// Fallback: create ./posts if it doesn't exist
|
||
let fallback_path = PathBuf::from("./posts");
|
||
if !fallback_path.exists() {
|
||
if let Err(e) = fs::create_dir_all(&fallback_path) {
|
||
add_log("error", &format!("Failed to create posts directory: {}", e), None, None);
|
||
} else {
|
||
add_log("info", "Created posts directory: ./posts", None, None);
|
||
}
|
||
}
|
||
fallback_path
|
||
}
|
||
|
||
// Function to find Markdown files with improved reliability
|
||
fn find_markdown_files(dir: &Path) -> std::io::Result<Vec<PathBuf>> {
|
||
let mut files = Vec::new();
|
||
let mut errors = Vec::new();
|
||
|
||
if !dir.exists() {
|
||
let error_msg = format!("Directory does not exist: {:?}", dir);
|
||
add_log("error", &error_msg, None, None);
|
||
return Err(std::io::Error::new(std::io::ErrorKind::NotFound, error_msg));
|
||
}
|
||
|
||
if !dir.is_dir() {
|
||
let error_msg = format!("Path is not a directory: {:?}", dir);
|
||
add_log("error", &error_msg, None, None);
|
||
return Err(std::io::Error::new(std::io::ErrorKind::InvalidInput, error_msg));
|
||
}
|
||
|
||
// Try to read directory with retry logic
|
||
let entries = match fs::read_dir(dir) {
|
||
Ok(entries) => entries,
|
||
Err(e) => {
|
||
add_log("error", &format!("Failed to read directory {:?}: {}", dir, e), None, None);
|
||
return Err(e);
|
||
}
|
||
};
|
||
|
||
for entry_result in entries {
|
||
match entry_result {
|
||
Ok(entry) => {
|
||
let path = entry.path();
|
||
|
||
// Skip hidden files and directories
|
||
if let Some(name) = path.file_name() {
|
||
if name.to_string_lossy().starts_with('.') {
|
||
continue;
|
||
}
|
||
}
|
||
|
||
if path.is_dir() {
|
||
// Recursively scan subdirectories
|
||
match find_markdown_files(&path) {
|
||
Ok(subfiles) => files.extend(subfiles),
|
||
Err(e) => {
|
||
let error_msg = format!("Error scanning subdirectory {:?}: {}", path, e);
|
||
add_log("warning", &error_msg, None, None);
|
||
errors.push(error_msg);
|
||
}
|
||
}
|
||
} else if path.extension().map(|e| e == "md").unwrap_or(false) {
|
||
// Verify the file is readable
|
||
match fs::metadata(&path) {
|
||
Ok(metadata) => {
|
||
if metadata.is_file() {
|
||
files.push(path);
|
||
}
|
||
}
|
||
Err(e) => {
|
||
let error_msg = format!("Cannot access file {:?}: {}", path, e);
|
||
add_log("warning", &error_msg, None, None);
|
||
errors.push(error_msg);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
Err(e) => {
|
||
let error_msg = format!("Error reading directory entry: {}", e);
|
||
add_log("warning", &error_msg, None, None);
|
||
errors.push(error_msg);
|
||
}
|
||
}
|
||
}
|
||
|
||
// Log summary
|
||
add_log("info", &format!("Found {} markdown files in {:?}", files.len(), dir), None, None);
|
||
if !errors.is_empty() {
|
||
add_log("warning", &format!("Encountered {} errors during directory scan", errors.len()), None, None);
|
||
}
|
||
|
||
Ok(files)
|
||
}
|
||
|
||
// Generate a SlugPath.
|
||
fn path_to_slug(file_path: &Path, posts_dir: &Path) -> String {
|
||
let relative_path = file_path.strip_prefix(posts_dir).unwrap_or(file_path);
|
||
let without_ext = relative_path.with_extension("");
|
||
without_ext.to_string_lossy()
|
||
.replace(std::path::MAIN_SEPARATOR, "::")
|
||
.replace("/", "::")
|
||
.replace("\\", "::")
|
||
}
|
||
|
||
// Slugify the Path
|
||
fn slug_to_path(slug: &str, posts_dir: &Path) -> PathBuf {
|
||
let parts: Vec<&str> = slug.split("::").collect();
|
||
if parts.len() == 1 {
|
||
posts_dir.join(format!("{}.md", parts[0]))
|
||
} else {
|
||
let mut path = posts_dir.to_path_buf();
|
||
for (i, part) in parts.iter().enumerate() {
|
||
if i == parts.len() - 1 {
|
||
path = path.join(format!("{}.md", part));
|
||
} else {
|
||
path = path.join(part);
|
||
}
|
||
}
|
||
path
|
||
}
|
||
}
|
||
|
||
// Look at the Markdown File and generate a Creation Date based upon gathered things.
|
||
fn get_file_creation_date(path: &Path) -> std::io::Result<DateTime<Utc>> {
|
||
let metadata = fs::metadata(path)?;
|
||
match metadata.created() {
|
||
Ok(created) => Ok(DateTime::<Utc>::from(created)),
|
||
Err(_) => {
|
||
let modified = metadata.modified()?;
|
||
Ok(DateTime::<Utc>::from(modified))
|
||
}
|
||
}
|
||
}
|
||
|
||
// The Frontend expects a plain old string that will be used for the anchor
|
||
// something like this -> #i-am-a-heading
|
||
// This creates a crossreference for Links that scroll to said heading
|
||
fn process_anchor_links(content: &str) -> String {
|
||
let re = regex::Regex::new(r"\[([^\]]+)\]\(#([^)]+)\)").unwrap();
|
||
re.replace_all(content, |caps: ®ex::Captures| {
|
||
let link_text = &caps[1];
|
||
let anchor = &caps[2];
|
||
let slugified = slugify(anchor);
|
||
format!("[{}](#{})", link_text, slugified)
|
||
}).to_string()
|
||
}
|
||
|
||
// Here we just remove the Emoji if it is in the heading.
|
||
// Example "🏳️🌈 Hi!" will turn into "#hi"
|
||
fn strip_emojis(s: &str) -> String {
|
||
s.chars()
|
||
.filter(|c| {
|
||
let c = *c as u32;
|
||
!( (c >= 0x1F600 && c <= 0x1F64F) // Emoticons
|
||
|| (c >= 0x1F300 && c <= 0x1F5FF) // Misc Symbols and Pictographs
|
||
|| (c >= 0x1F680 && c <= 0x1F6FF) // Transport and Map
|
||
|| (c >= 0x2600 && c <= 0x26FF) // Misc symbols
|
||
|| (c >= 0x2700 && c <= 0x27BF) // Dingbats
|
||
|| (c >= 0x1F900 && c <= 0x1F9FF) // Supplemental Symbols and Pictographs
|
||
|| (c >= 0x1FA70 && c <= 0x1FAFF) // Symbols and Pictographs Extended-A
|
||
|| (c >= 0x1F1E6 && c <= 0x1F1FF) // Regional Indicator Symbols
|
||
)
|
||
})
|
||
.collect()
|
||
}
|
||
|
||
// This is a obsolete Function for Custom Tags for HTML
|
||
// Example usage in Text: <warning />
|
||
fn process_custom_tags(content: &str) -> String {
|
||
let mut processed = content.to_string();
|
||
|
||
// Handle simple tags without parameters
|
||
let simple_tags = [
|
||
("<mytag />", "<div class=\"custom-tag mytag\">This is my custom tag content!</div>"),
|
||
("<warning />", "<div class=\"custom-tag warning\" style=\"background: #fff3cd; border: 1px solid #ffeaa7; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">⚠️ Warning: This is a custom warning tag!</div>"),
|
||
("<info />", "<div class=\"custom-tag info\" style=\"background: #d1ecf1; border: 1px solid #bee5eb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">ℹ️ Info: This is a custom info tag!</div>"),
|
||
("<success />", "<div class=\"custom-tag success\" style=\"background: #d4edda; border: 1px solid #c3e6cb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">✅ Success: This is a custom success tag!</div>"),
|
||
("<error />", "<div class=\"custom-tag error\" style=\"background: #f8d7da; border: 1px solid #f5c6cb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">❌ Error: This is a custom error tag!</div>"),
|
||
];
|
||
|
||
for (tag, replacement) in simple_tags.iter() {
|
||
processed = processed.replace(tag, replacement);
|
||
}
|
||
|
||
// Handle tags with parameters
|
||
let tag_with_params = Regex::new(r"<(\w+)\s+([^>]*?[a-zA-Z0-9=])[^>]*/>").unwrap();
|
||
processed = tag_with_params.replace_all(&processed, |caps: ®ex::Captures| {
|
||
let tag_name = &caps[1];
|
||
let params = &caps[2];
|
||
|
||
match tag_name {
|
||
"mytag" => {
|
||
format!("<div class=\"custom-tag mytag\" data-params=\"{}\">Custom content with params: {}</div>", params, params)
|
||
},
|
||
"alert" => {
|
||
if params.contains("type=\"warning\"") {
|
||
"<div class=\"custom-tag alert warning\" style=\"background: #fff3cd; border: 1px solid #ffeaa7; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">⚠️ Warning Alert!</div>".to_string()
|
||
} else if params.contains("type=\"error\"") {
|
||
"<div class=\"custom-tag alert error\" style=\"background: #f8d7da; border: 1px solid #f5c6cb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">❌ Error Alert!</div>".to_string()
|
||
} else {
|
||
"<div class=\"custom-tag alert info\" style=\"background: #d1ecf1; border: 1px solid #bee5eb; padding: 1rem; border-radius: 4px; margin: 1rem 0;\">ℹ️ Info Alert!</div>".to_string()
|
||
}
|
||
},
|
||
_ => format!("<div class=\"custom-tag {}\">Unknown custom tag: {}</div>", tag_name, tag_name)
|
||
}
|
||
}).to_string();
|
||
|
||
processed
|
||
}
|
||
|
||
// Logging functions
|
||
fn add_log(level: &str, message: &str, slug: Option<&str>, details: Option<&str>) {
|
||
let timestamp = chrono::Utc::now().to_rfc3339();
|
||
let log_entry = LogEntry {
|
||
timestamp,
|
||
level: level.to_string(),
|
||
message: message.to_string(),
|
||
slug: slug.map(|s| s.to_string()),
|
||
details: details.map(|s| s.to_string()),
|
||
};
|
||
{
|
||
let mut logs = PARSER_LOGS.write().unwrap();
|
||
logs.push_back(log_entry.clone());
|
||
// Keep only the last MAX_LOG_ENTRIES
|
||
while logs.len() > MAX_LOG_ENTRIES {
|
||
logs.pop_front();
|
||
}
|
||
// Write logs to disk
|
||
let _ = save_parser_logs_to_disk_inner(&logs);
|
||
}
|
||
}
|
||
|
||
fn save_parser_logs_to_disk_inner(logs: &VecDeque<LogEntry>) -> std::io::Result<()> {
|
||
ensure_cache_directory();
|
||
let logs_vec: Vec<_> = logs.iter().cloned().collect();
|
||
let json = serde_json::to_string(&logs_vec)?;
|
||
std::fs::write(PARSER_LOGS_PATH, json)?;
|
||
Ok(())
|
||
}
|
||
|
||
pub fn load_parser_logs_from_disk() {
|
||
if let Ok(data) = std::fs::read_to_string(PARSER_LOGS_PATH) {
|
||
if let Ok(logs_vec) = serde_json::from_str::<Vec<LogEntry>>(&data) {
|
||
let mut logs = PARSER_LOGS.write().unwrap();
|
||
logs.clear();
|
||
for entry in logs_vec {
|
||
logs.push_back(entry);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Main public functions
|
||
pub fn rsparseinfo() -> String {
|
||
let _ = get_all_posts();
|
||
let stats = POST_STATS.read().unwrap();
|
||
let values: Vec<&PostStats> = stats.values().collect();
|
||
if values.is_empty() {
|
||
"[]".to_string()
|
||
} else {
|
||
serde_json::to_string(&values).unwrap_or_else(|_| "[]".to_string())
|
||
}
|
||
}
|
||
|
||
// This Function gets the Post by its Slugified Version.
|
||
// This is basically only used for Caching (loading from it).
|
||
pub fn get_post_by_slug(slug: &str) -> Result<Post, Box<dyn std::error::Error>> {
|
||
add_log("info", "Starting post parsing", Some(slug), None);
|
||
|
||
let mut sys = System::new_with_specifics(RefreshKind::new().with_processes(ProcessRefreshKind::everything()).with_cpu(CpuRefreshKind::everything()));
|
||
sys.refresh_processes();
|
||
let pid = sysinfo::get_current_pid()?;
|
||
let before_cpu = sys.process(pid).map(|p| p.cpu_usage()).unwrap_or(0.0);
|
||
let start = Instant::now();
|
||
|
||
let mut stats = POST_STATS.write().unwrap();
|
||
let entry = stats.entry(slug.to_string()).or_insert_with(|| PostStats {
|
||
slug: slug.to_string(),
|
||
..Default::default()
|
||
});
|
||
|
||
// Try cache first
|
||
if let Some(post) = POST_CACHE.read().unwrap().get(slug).cloned() {
|
||
entry.cache_hits += 1;
|
||
entry.last_interpret_time_ms = 0;
|
||
entry.last_compile_time_ms = 0;
|
||
entry.last_cache_status = "hit".to_string();
|
||
sys.refresh_process(pid);
|
||
entry.last_cpu_usage_percent = sys.process(pid).map(|p| p.cpu_usage()).unwrap_or(0.0) - before_cpu;
|
||
add_log("info", "Cache hit", Some(slug), None);
|
||
return Ok(post);
|
||
}
|
||
|
||
entry.cache_misses += 1;
|
||
entry.last_cache_status = "miss".to_string();
|
||
drop(stats);
|
||
|
||
let posts_dir = get_posts_directory();
|
||
let file_path = slug_to_path(slug, &posts_dir);
|
||
|
||
if !file_path.exists() {
|
||
let error_msg = format!("File not found: {:?}", file_path);
|
||
add_log("error", &error_msg, Some(slug), None);
|
||
return Err(error_msg.into());
|
||
}
|
||
|
||
let file_content = fs::read_to_string(&file_path)?;
|
||
add_log("info", &format!("File loaded: {} bytes", file_content.len()), Some(slug), None);
|
||
|
||
if file_content.len() > MAX_FILE_SIZE {
|
||
let error_msg = format!("File too large: {} bytes (max: {} bytes)", file_content.len(), MAX_FILE_SIZE);
|
||
add_log("error", &error_msg, Some(slug), None);
|
||
return Err(error_msg.into());
|
||
}
|
||
|
||
let matter = Matter::<YAML>::new();
|
||
let result = matter.parse(&file_content);
|
||
|
||
let front: PostFrontmatter = if let Some(data) = result.data {
|
||
match data.deserialize() {
|
||
Ok(front) => front,
|
||
Err(e) => {
|
||
let error_msg = format!("Failed to deserialize frontmatter: {}", e);
|
||
add_log("error", &error_msg, Some(slug), None);
|
||
return Err(error_msg.into());
|
||
}
|
||
}
|
||
} else {
|
||
add_log("error", "No frontmatter found", Some(slug), None);
|
||
return Err("No frontmatter found".into());
|
||
};
|
||
|
||
let created_at = get_file_creation_date(&file_path)?;
|
||
let processed_markdown = process_anchor_links(&result.content);
|
||
let processed_markdown = process_custom_tags(&processed_markdown);
|
||
|
||
add_log("info", "Starting markdown parsing", Some(slug), Some(&format!("Content length: {} chars", processed_markdown.len())));
|
||
|
||
let parser = Parser::new_ext(&processed_markdown, Options::all());
|
||
let mut html_output = String::new();
|
||
let mut heading_text = String::new();
|
||
let mut in_heading = false;
|
||
let mut heading_level = 0;
|
||
let mut in_code_block = false;
|
||
let mut code_block_lang = String::new();
|
||
let mut code_block_content = String::new();
|
||
let mut events = Vec::new();
|
||
let ss = SyntaxSet::load_defaults_newlines();
|
||
let ts = ThemeSet::load_defaults();
|
||
let theme = &ts.themes["base16-ocean.dark"];
|
||
|
||
let start_parsing = Instant::now();
|
||
let mut event_count = 0;
|
||
|
||
for event in parser {
|
||
event_count += 1;
|
||
if start_parsing.elapsed().as_secs() > PARSING_TIMEOUT_SECS {
|
||
let error_msg = "Parsing timeout - file too large";
|
||
add_log("error", error_msg, Some(slug), Some(&format!("Processed {} events", event_count)));
|
||
return Err(error_msg.into());
|
||
}
|
||
|
||
match &event {
|
||
Event::Start(Tag::Heading(level, _, _)) => {
|
||
in_heading = true;
|
||
heading_level = *level as usize;
|
||
heading_text.clear();
|
||
},
|
||
Event::End(Tag::Heading(_, _, _)) => {
|
||
in_heading = false;
|
||
let heading_no_emoji = strip_emojis(&heading_text);
|
||
let id = slugify(&heading_no_emoji);
|
||
let style = "color: #2d3748; margin-top: 1.5em; margin-bottom: 0.5em;";
|
||
events.push(Event::Html(CowStr::Boxed(format!("<h{lvl} id=\"{id}\" style=\"{style}\">", lvl=heading_level, id=id, style=style).into_boxed_str())));
|
||
events.push(Event::Text(CowStr::Boxed(heading_text.clone().into_boxed_str())));
|
||
events.push(Event::Html(CowStr::Boxed(format!("</h{lvl}>", lvl=heading_level).into_boxed_str())));
|
||
},
|
||
Event::Text(text) if in_heading => {
|
||
heading_text.push_str(text);
|
||
},
|
||
Event::Start(Tag::CodeBlock(kind)) => {
|
||
in_code_block = true;
|
||
code_block_content.clear();
|
||
code_block_lang = match kind {
|
||
pulldown_cmark::CodeBlockKind::Fenced(lang) => lang.to_string(),
|
||
pulldown_cmark::CodeBlockKind::Indented => String::new(),
|
||
};
|
||
},
|
||
Event::End(Tag::CodeBlock(_)) => {
|
||
in_code_block = false;
|
||
let highlighted = if !code_block_lang.is_empty() {
|
||
if let Some(syntax) = ss.find_syntax_by_token(&code_block_lang) {
|
||
highlighted_html_for_string(&code_block_content, &ss, syntax, theme).unwrap_or_else(|_| format!("<pre style=\"background: #2d2d2d; color: #f8f8f2; padding: 1em; border-radius: 6px; overflow-x: auto;\"><code style=\"background: none;\">{}</code></pre>", html_escape::encode_text(&code_block_content)))
|
||
} else {
|
||
format!("<pre style=\"background: #2d2d2d; color: #f8f8f2; padding: 1em; border-radius: 6px; overflow-x: auto;\"><code style=\"background: none;\">{}</code></pre>", html_escape::encode_text(&code_block_content))
|
||
}
|
||
} else {
|
||
format!("<pre style=\"background: #2d2d2d; color: #f8f8f2; padding: 1em; border-radius: 6px; overflow-x: auto;\"><code style=\"background: none;\">{}</code></pre>", html_escape::encode_text(&code_block_content))
|
||
};
|
||
events.push(Event::Html(CowStr::Boxed(highlighted.into_boxed_str())));
|
||
},
|
||
Event::Text(text) if in_code_block => {
|
||
code_block_content.push_str(text);
|
||
},
|
||
_ if !in_heading && !in_code_block => {
|
||
events.push(event);
|
||
},
|
||
_ => {},
|
||
}
|
||
}
|
||
|
||
add_log("info", "Markdown parsing completed", Some(slug), Some(&format!("Processed {} events", event_count)));
|
||
|
||
html::push_html(&mut html_output, events.into_iter());
|
||
let sanitized_html = AMMONIA.clean(&html_output).to_string();
|
||
|
||
let interpret_time = start.elapsed();
|
||
let compile_start = Instant::now();
|
||
let post = Post {
|
||
slug: slug.to_string(),
|
||
title: front.title,
|
||
date: front.date,
|
||
tags: front.tags.unwrap_or_default(),
|
||
summary: front.summary,
|
||
content: sanitized_html,
|
||
created_at: created_at.to_rfc3339(),
|
||
author: std::env::var("BLOG_OWNER").unwrap_or_else(|_| "Anonymous".to_string()),
|
||
};
|
||
let compile_time = compile_start.elapsed();
|
||
|
||
// Insert into cache
|
||
// If this no worky , programm fucky wucky? - Check Logs
|
||
POST_CACHE.write().unwrap().insert(slug.to_string(), post.clone());
|
||
|
||
// Update stats
|
||
let mut stats = POST_STATS.write().unwrap();
|
||
let entry = stats.entry(slug.to_string()).or_insert_with(|| PostStats {
|
||
slug: slug.to_string(),
|
||
..Default::default()
|
||
});
|
||
entry.last_interpret_time_ms = interpret_time.as_millis();
|
||
entry.last_compile_time_ms = compile_time.as_millis();
|
||
sys.refresh_process(pid);
|
||
entry.last_cpu_usage_percent = sys.process(pid).map(|p| p.cpu_usage()).unwrap_or(0.0) - before_cpu;
|
||
|
||
add_log("info", "Post parsing completed successfully", Some(slug), Some(&format!("Interpret: {}ms, Compile: {}ms", interpret_time.as_millis(), compile_time.as_millis())));
|
||
|
||
Ok(post)
|
||
}
|
||
|
||
pub fn get_all_posts() -> Result<Vec<Post>, Box<dyn std::error::Error>> {
|
||
// Try cache first
|
||
if let Some(posts) = ALL_POSTS_CACHE.read().unwrap().clone() {
|
||
return Ok(posts);
|
||
}
|
||
|
||
let posts_dir = get_posts_directory();
|
||
let markdown_files = find_markdown_files(&posts_dir)?;
|
||
let mut posts = Vec::new();
|
||
|
||
for file_path in markdown_files {
|
||
let slug = path_to_slug(&file_path, &posts_dir);
|
||
if let Ok(post) = get_post_by_slug(&slug) {
|
||
POST_CACHE.write().unwrap().insert(slug.clone(), post.clone());
|
||
posts.push(post);
|
||
}
|
||
}
|
||
|
||
posts.sort_by(|a, b| b.created_at.cmp(&a.created_at));
|
||
*ALL_POSTS_CACHE.write().unwrap() = Some(posts.clone());
|
||
Ok(posts)
|
||
}
|
||
|
||
pub fn get_posts_by_tag(tag: &str) -> Result<Vec<Post>, Box<dyn std::error::Error>> {
|
||
let all_posts = get_all_posts()?;
|
||
Ok(all_posts.into_iter().filter(|p| p.tags.contains(&tag.to_string())).collect())
|
||
}
|
||
|
||
pub fn watch_posts<F: Fn() + Send + 'static>(on_change: F) -> notify::Result<RecommendedWatcher> {
|
||
let (tx, rx) = channel();
|
||
let mut watcher = RecommendedWatcher::new(tx, Config::default())?;
|
||
watcher.watch(get_posts_directory().as_path(), RecursiveMode::Recursive)?;
|
||
|
||
std::thread::spawn(move || {
|
||
loop {
|
||
match rx.recv() {
|
||
Ok(_event) => {
|
||
POST_CACHE.write().unwrap().clear();
|
||
*ALL_POSTS_CACHE.write().unwrap() = None;
|
||
on_change();
|
||
},
|
||
Err(e) => {
|
||
eprintln!("watch error: {:?}", e);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
});
|
||
Ok(watcher)
|
||
}
|
||
|
||
pub fn load_post_cache_from_disk() {
|
||
if let Ok(data) = fs::read_to_string(POSTS_CACHE_PATH) {
|
||
if let Ok(map) = serde_json::from_str::<HashMap<String, Post>>(&data) {
|
||
*POST_CACHE.write().unwrap() = map;
|
||
}
|
||
}
|
||
if let Ok(data) = fs::read_to_string(POST_STATS_PATH) {
|
||
if let Ok(map) = serde_json::from_str::<HashMap<String, PostStats>>(&data) {
|
||
*POST_STATS.write().unwrap() = map;
|
||
}
|
||
}
|
||
}
|
||
|
||
pub fn save_post_cache_to_disk() {
|
||
ensure_cache_directory();
|
||
if let Ok(map) = serde_json::to_string(&*POST_CACHE.read().unwrap()) {
|
||
let _ = fs::write(POSTS_CACHE_PATH, map);
|
||
}
|
||
if let Ok(map) = serde_json::to_string(&*POST_STATS.read().unwrap()) {
|
||
let _ = fs::write(POST_STATS_PATH, map);
|
||
}
|
||
}
|
||
|
||
pub fn checkhealth() -> HealthReport {
|
||
let mut errors = Vec::new();
|
||
let posts_dir = get_posts_directory();
|
||
let posts_dir_exists = posts_dir.exists() && posts_dir.is_dir();
|
||
let mut posts_count = 0;
|
||
|
||
if posts_dir_exists {
|
||
match std::fs::read_dir(&posts_dir) {
|
||
Ok(entries) => {
|
||
posts_count = entries.filter_map(|e| e.ok())
|
||
.filter(|e| e.path().extension().map(|ext| ext == "md").unwrap_or(false))
|
||
.count();
|
||
},
|
||
Err(e) => errors.push(format!("Failed to read posts dir: {}", e)),
|
||
}
|
||
} else {
|
||
errors.push("Posts directory does not exist".to_string());
|
||
}
|
||
|
||
let cache_file_exists = Path::new(POSTS_CACHE_PATH).exists();
|
||
let cache_stats_file_exists = Path::new(POST_STATS_PATH).exists();
|
||
let (mut cache_readable, mut cache_post_count) = (false, None);
|
||
|
||
if cache_file_exists {
|
||
match std::fs::read_to_string(POSTS_CACHE_PATH) {
|
||
Ok(data) => {
|
||
match serde_json::from_str::<HashMap<String, Post>>(&data) {
|
||
Ok(map) => {
|
||
cache_readable = true;
|
||
cache_post_count = Some(map.len());
|
||
},
|
||
Err(e) => errors.push(format!("Cache file not valid JSON: {}", e)),
|
||
}
|
||
},
|
||
Err(e) => errors.push(format!("Failed to read cache file: {}", e)),
|
||
}
|
||
}
|
||
|
||
let (mut cache_stats_readable, mut cache_stats_count) = (false, None);
|
||
if cache_stats_file_exists {
|
||
match std::fs::read_to_string(POST_STATS_PATH) {
|
||
Ok(data) => {
|
||
match serde_json::from_str::<HashMap<String, PostStats>>(&data) {
|
||
Ok(map) => {
|
||
cache_stats_readable = true;
|
||
cache_stats_count = Some(map.len());
|
||
},
|
||
Err(e) => errors.push(format!("Cache stats file not valid JSON: {}", e)),
|
||
}
|
||
},
|
||
Err(e) => errors.push(format!("Failed to read cache stats file: {}", e)),
|
||
}
|
||
}
|
||
|
||
HealthReport {
|
||
posts_dir_exists,
|
||
posts_count,
|
||
cache_file_exists,
|
||
cache_stats_file_exists,
|
||
cache_readable,
|
||
cache_stats_readable,
|
||
cache_post_count,
|
||
cache_stats_count,
|
||
errors,
|
||
}
|
||
}
|
||
|
||
pub fn get_parser_logs() -> Vec<LogEntry> {
|
||
// Always reload from disk to ensure up-to-date logs
|
||
load_parser_logs_from_disk();
|
||
let logs = PARSER_LOGS.read().unwrap();
|
||
logs.iter().cloned().collect()
|
||
}
|
||
|
||
pub fn clear_parser_logs() {
|
||
PARSER_LOGS.write().unwrap().clear();
|
||
if let Err(e) = save_parser_logs_to_disk_inner(&VecDeque::new()) {
|
||
eprintln!("Failed to save empty logs to disk: {}", e);
|
||
}
|
||
}
|
||
|
||
// Force reinterpret all posts by clearing cache and re-parsing
|
||
pub fn force_reinterpret_all_posts() -> Result<Vec<Post>, Box<dyn std::error::Error>> {
|
||
add_log("info", "Starting force reinterpret of all posts", None, None);
|
||
|
||
// Clear all caches
|
||
POST_CACHE.write().unwrap().clear();
|
||
ALL_POSTS_CACHE.write().unwrap().take();
|
||
POST_STATS.write().unwrap().clear();
|
||
|
||
add_log("info", "Cleared all caches", None, None);
|
||
|
||
// Get posts directory and find all markdown files
|
||
let posts_dir = get_posts_directory();
|
||
let markdown_files = find_markdown_files(&posts_dir)?;
|
||
|
||
add_log("info", &format!("Found {} markdown files to reinterpret", markdown_files.len()), None, None);
|
||
|
||
let mut posts = Vec::new();
|
||
let mut success_count = 0;
|
||
let mut error_count = 0;
|
||
|
||
for file_path in markdown_files {
|
||
let slug = path_to_slug(&file_path, &posts_dir);
|
||
match get_post_by_slug(&slug) {
|
||
Ok(post) => {
|
||
posts.push(post);
|
||
success_count += 1;
|
||
add_log("info", &format!("Successfully reinterpreted: {}", slug), Some(&slug), None);
|
||
}
|
||
Err(e) => {
|
||
error_count += 1;
|
||
add_log("error", &format!("Failed to reinterpret {}: {}", slug, e), Some(&slug), None);
|
||
}
|
||
}
|
||
}
|
||
|
||
// Update the all posts cache
|
||
ALL_POSTS_CACHE.write().unwrap().replace(posts.clone());
|
||
|
||
// Save cache to disk
|
||
save_post_cache_to_disk();
|
||
|
||
add_log("info", &format!("Force reinterpret completed. Success: {}, Errors: {}", success_count, error_count), None, None);
|
||
|
||
Ok(posts)
|
||
}
|
||
|
||
// Force reparse a single post by clearing its cache and re-parsing
|
||
pub fn force_reparse_single_post(slug: &str) -> Result<Post, Box<dyn std::error::Error>> {
|
||
add_log("info", &format!("Starting force reparse of post: {}", slug), Some(slug), None);
|
||
|
||
// Clear this specific post from all caches
|
||
POST_CACHE.write().unwrap().remove(slug);
|
||
POST_STATS.write().unwrap().remove(slug);
|
||
|
||
// Clear the all posts cache since it might contain this post
|
||
ALL_POSTS_CACHE.write().unwrap().take();
|
||
|
||
add_log("info", &format!("Cleared cache for post: {}", slug), Some(slug), None);
|
||
|
||
// Re-parse the post
|
||
let post = get_post_by_slug(slug)?;
|
||
|
||
// Update the all posts cache with the new post
|
||
let mut all_posts_cache = ALL_POSTS_CACHE.write().unwrap();
|
||
if let Some(ref mut posts) = *all_posts_cache {
|
||
// Remove old version if it exists
|
||
posts.retain(|p| p.slug != slug);
|
||
// Add new version
|
||
posts.push(post.clone());
|
||
// Sort by creation date
|
||
posts.sort_by(|a, b| b.created_at.cmp(&a.created_at));
|
||
}
|
||
|
||
// Save cache to disk
|
||
save_post_cache_to_disk();
|
||
|
||
add_log("info", &format!("Successfully reparsed post: {}", slug), Some(slug), None);
|
||
|
||
Ok(post)
|
||
} |