my-website-v2/backend/task/src/tasks/import_posts.rs
Wyatt J. Miller 1503db9509 working import task
wip: not complete, got to fix some of markdown options
2025-05-17 21:04:56 -04:00

147 lines
4.1 KiB
Rust

use std::fs;
use std::io::Read;
use crate::utils::task_log;
use serde::{Deserialize, Deserializer};
pub fn register(pool: &sqlx::Pool<sqlx::Postgres>) {
let p = pool.clone();
tokio::spawn(async move {
let _ = import_posts("app/", &p).await;
});
}
async fn import_posts(
dir_path: &str,
pool: &sqlx::Pool<sqlx::Postgres>,
) -> Result<(), Box<dyn std::error::Error>> {
println!("Beginning post import process");
// Start task logging
let task = task_log::start(1, pool).await?;
// Setup markdown options
let options = MarkdownOptions {
options: markdown::Constructs::gfm(),
};
// Read directory contents
let entries = fs::read_dir(dir_path)?;
// Process each file
for entry_result in entries {
let file = entry_result?;
let file_path = file.path();
// Skip non-file entries
if !file_path.is_file() {
continue;
}
let file_name = file.file_name();
let file_name_str = match file_name.to_str() {
Some(name) => name,
None => {
eprintln!("Skipping file with non-UTF8 filename: {:?}", file_path);
continue;
}
};
println!("Processing file: {}", file_name_str);
// Check if file already exists in database
let exists_query = sqlx::query_as!(
FilenameExists,
"SELECT EXISTS(SELECT 1 FROM posts p WHERE p.filename = $1) as filename",
file_name_str
)
.fetch_one(pool)
.await?;
// Skip if file already exists in database
if !exists_query.filename.unwrap_or(false) {
println!("Importing new file: {}", file_name_str);
// Process file contents
let file_md_contents = process_read_file(&file_path)?;
// println!("{:?}", file_md_contents);
// Extract metadata
let document = crate::utils::front_matter::YamlFrontMatter::parse::<MarkdownMetadata>(
&file_md_contents,
)?;
let content =
markdown::to_html_with_options(&document.content, &markdown::Options::default());
println!("{:?}", content);
// println!("{:?}", document);
let title = document.metadata.title;
let content_final = content.unwrap();
// println!("{:?}", title);
// Insert into database
let results = sqlx::query_as::<_, InsertPosts>(
"INSERT INTO posts (title, body, filename, author_id) VALUES ($1, $2, $3, $4) RETURNING title, body, filename, author_id"
)
.bind(title)
.bind(content_final)
.bind(file_name_str)
.bind(1) // Consider making author_id a parameter
.fetch_one(pool)
.await?;
println!("{:?}", results);
println!("Successfully imported: {}", file_name_str);
} else {
println!("Skipping existing file: {}", file_name_str);
}
}
// Mark task as completed
task_log::update(task.task_id, String::from("Completed"), pool).await?;
Ok(())
}
fn process_read_file(file_path: &std::path::Path) -> Result<String, std::io::Error> {
let mut file = std::fs::read_to_string(file_path)?;
Ok(file)
}
#[derive(Debug, sqlx::FromRow)]
struct FilenameExists {
filename: Option<bool>,
}
#[derive(Debug, sqlx::FromRow)]
struct InsertPosts {
title: String,
body: String,
filename: String,
author_id: i32,
}
struct MarkdownOptions {
options: markdown::Constructs,
}
#[derive(Deserialize, Debug)]
struct MarkdownMetadata {
layout: String,
title: String,
#[serde(deserialize_with = "deserialize_datetime")]
date: chrono::DateTime<chrono::Utc>,
published: bool,
}
fn deserialize_datetime<'de, D>(deserializer: D) -> Result<chrono::DateTime<chrono::Utc>, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
chrono::DateTime::parse_from_rfc3339(&s)
.map(|dt| dt.with_timezone(&chrono::Utc))
.map_err(serde::de::Error::custom)
}