added cache, s3 to taskmanager, ask cache if result is the same, among others

This commit is contained in:
2025-07-14 23:30:29 -04:00
parent 57952ec41d
commit d53f3da4c6
11 changed files with 241 additions and 53 deletions

View File

@@ -1,6 +1,6 @@
pub use fred::{ pub use fred::{
clients::Pool, clients::Pool,
interfaces::KeysInterface, interfaces::{ClientLike, KeysInterface},
prelude::*, prelude::*,
types::{Expiration, SetOptions}, types::{Expiration, SetOptions},
}; };

View File

@@ -1,6 +1,6 @@
use axum::Router; use axum::Router;
use cache::ClientLike;
use config::config; use config::config;
use fred::prelude::*;
use sqlx::postgres::PgPoolOptions; use sqlx::postgres::PgPoolOptions;
use std::fs::File; use std::fs::File;
use std::sync::Arc; use std::sync::Arc;
@@ -8,7 +8,7 @@ use std::time::Duration;
use tokio::net::TcpListener; use tokio::net::TcpListener;
use tokio::signal; use tokio::signal;
use tokio::sync::Mutex; use tokio::sync::Mutex;
use tower_governor::{governor::GovernorConfigBuilder, GovernorLayer}; // use tower_governor::{governor::GovernorConfigBuilder, GovernorLayer};
use tower_http::{ use tower_http::{
cors::{Any, CorsLayer}, cors::{Any, CorsLayer},
trace::{self, TraceLayer}, trace::{self, TraceLayer},
@@ -101,13 +101,13 @@ async fn main() {
.expect("Failed to connect to database"); .expect("Failed to connect to database");
let pool_size = 8; let pool_size = 8;
let config = Config::from_url(&redis_url).unwrap(); // TODO: fix the unwrap <<< let config = cache::Config::from_url(&redis_url).unwrap(); // TODO: fix the unwrap <<<
let redis_pool = Builder::from_config(config) let redis_pool = cache::Builder::from_config(config)
.with_performance_config(|config| { .with_performance_config(|config| {
config.default_command_timeout = Duration::from_secs(60); config.default_command_timeout = Duration::from_secs(60);
}) })
.set_policy(ReconnectPolicy::new_exponential(0, 100, 30_000, 2)) .set_policy(cache::ReconnectPolicy::new_exponential(0, 100, 30_000, 2))
.build_pool(pool_size) .build_pool(pool_size)
.expect("Failed to create cache pool"); .expect("Failed to create cache pool");

View File

@@ -73,17 +73,18 @@ pub fn generate_rss(
format!( format!(
r#"<?xml version="1.0" encoding="UTF-8"?> r#"<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"> <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel> <channel>
<title>{safe_title}</title> <title>{safe_title}</title>
<description>{safe_description}</description> <description>{safe_description}</description>
<link>{link}</link> <link>{link}</link>
<language>en-us</language> <language>en-us</language>
<ttl>60</ttl> <ttl>60</ttl>
<generator>Kyouma 1.0.0-SE</generator> <generator>Kyouma 1.0.0-SE</generator>
<atom:link href="https://wyattjmiller.com/posts.xml" rel="self" type="application/rss+xml" /> <atom:link href="https://wyattjmiller.com/posts.xml" rel="self" type="application/rss+xml" />
{} {}
</channel> </channel>
</rss>"#, </rss>
"#,
rss_entries rss_entries
) )
} }

View File

@@ -23,7 +23,6 @@ impl SitemapEntry {
pub fn generate_sitemap(entries: &HashMap<String, SitemapEntry>) -> String { pub fn generate_sitemap(entries: &HashMap<String, SitemapEntry>) -> String {
let urls = entries let urls = entries
.values() .values()
.into_iter()
.map(|entry| entry.to_item()) .map(|entry| entry.to_item())
.collect::<String>(); .collect::<String>();
format!( format!(
@@ -39,21 +38,21 @@ pub fn generate_sitemap(entries: &HashMap<String, SitemapEntry>) -> String {
pub fn get_static_pages(entries: &mut HashMap<String, SitemapEntry>, web_url: &String) { pub fn get_static_pages(entries: &mut HashMap<String, SitemapEntry>, web_url: &String) {
entries.insert( entries.insert(
(entries.len() + 1).to_string(), "10000".to_string(),
SitemapEntry { SitemapEntry {
location: web_url.clone(), location: web_url.clone(),
lastmod: chrono::Utc::now(), lastmod: chrono::Utc::now(),
}, },
); );
entries.insert( entries.insert(
(entries.len() + 1).to_string(), "10001".to_string(),
SitemapEntry { SitemapEntry {
location: format!("{}/posts", web_url), location: format!("{}/posts", web_url),
lastmod: chrono::Utc::now(), lastmod: chrono::Utc::now(),
}, },
); );
entries.insert( entries.insert(
(entries.len() + 1).to_string(), "10002".to_string(),
SitemapEntry { SitemapEntry {
location: format!("{}/projects", web_url), location: format!("{}/projects", web_url),
lastmod: chrono::Utc::now(), lastmod: chrono::Utc::now(),

View File

@@ -4,7 +4,7 @@ use aws_config::{BehaviorVersion, Region};
use aws_sdk_s3::{Client, Config, config::Credentials}; use aws_sdk_s3::{Client, Config, config::Credentials};
use std::env; use std::env;
#[derive(Debug)] #[derive(Clone, Debug)]
pub struct S3ClientConfig { pub struct S3ClientConfig {
pub access_key: String, pub access_key: String,
secret_key: String, secret_key: String,
@@ -13,8 +13,10 @@ pub struct S3ClientConfig {
region: String, region: String,
} }
#[derive(Clone)]
pub struct S3Client { pub struct S3Client {
client: Client, client: Client,
pub client_config: S3ClientConfig,
} }
impl S3ClientConfig { impl S3ClientConfig {
@@ -68,6 +70,7 @@ impl S3Client {
Self { Self {
client: Client::from_conf(s3_config), client: Client::from_conf(s3_config),
client_config: config.clone(),
} }
} }
} }

100
backend/task/Cargo.lock generated
View File

@@ -71,6 +71,12 @@ version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
[[package]]
name = "arc-swap"
version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
[[package]] [[package]]
name = "async-channel" name = "async-channel"
version = "1.9.0" version = "1.9.0"
@@ -796,6 +802,15 @@ dependencies = [
"either", "either",
] ]
[[package]]
name = "cache"
version = "0.1.0"
dependencies = [
"fred",
"serde",
"serde_json",
]
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.1.21" version = "1.1.21"
@@ -877,6 +892,12 @@ version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
[[package]]
name = "cookie-factory"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "396de984970346b0d9e93d1415082923c679e5ae5c3ee3dcbd104f5610af126b"
[[package]] [[package]]
name = "core-foundation" name = "core-foundation"
version = "0.9.4" version = "0.9.4"
@@ -940,6 +961,12 @@ dependencies = [
"regex", "regex",
] ]
[[package]]
name = "crc16"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "338089f42c427b86394a5ee60ff321da23a5c89c9d89514c829687b26359fcff"
[[package]] [[package]]
name = "crc32fast" name = "crc32fast"
version = "1.4.2" version = "1.4.2"
@@ -1227,6 +1254,15 @@ dependencies = [
"miniz_oxide", "miniz_oxide",
] ]
[[package]]
name = "float-cmp"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8"
dependencies = [
"num-traits",
]
[[package]] [[package]]
name = "flume" name = "flume"
version = "0.11.0" version = "0.11.0"
@@ -1274,6 +1310,43 @@ dependencies = [
"percent-encoding", "percent-encoding",
] ]
[[package]]
name = "fred"
version = "10.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a7b2fd0f08b23315c13b6156f971aeedb6f75fb16a29ac1872d2eabccc1490e"
dependencies = [
"arc-swap",
"async-trait",
"bytes",
"bytes-utils",
"float-cmp",
"fred-macros",
"futures",
"log",
"parking_lot",
"rand 0.8.5",
"redis-protocol",
"semver",
"socket2",
"tokio",
"tokio-stream",
"tokio-util",
"url",
"urlencoding",
]
[[package]]
name = "fred-macros"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1458c6e22d36d61507034d5afecc64f105c1d39712b7ac6ec3b352c423f715cc"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "fs_extra" name = "fs_extra"
version = "1.3.0" version = "1.3.0"
@@ -2695,6 +2768,20 @@ dependencies = [
"rand_core 0.5.1", "rand_core 0.5.1",
] ]
[[package]]
name = "redis-protocol"
version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cdba59219406899220fc4cdfd17a95191ba9c9afb719b5fa5a083d63109a9f1"
dependencies = [
"bytes",
"bytes-utils",
"cookie-factory",
"crc16",
"log",
"nom 7.1.3",
]
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.5.4" version = "0.5.4"
@@ -3085,18 +3172,18 @@ checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.210" version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.210" version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@@ -3105,9 +3192,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.128" version = "1.0.140"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
dependencies = [ dependencies = [
"itoa", "itoa",
"memchr", "memchr",
@@ -3580,6 +3667,7 @@ dependencies = [
name = "task" name = "task"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"cache",
"chrono", "chrono",
"dotenvy", "dotenvy",
"futures", "futures",

View File

@@ -7,6 +7,7 @@ edition = "2021"
[dependencies] [dependencies]
storage = { version = "0.1.0", path = "../storage" } storage = { version = "0.1.0", path = "../storage" }
cache = { version = "0.1.0", path = "../cache" }
tokio = { version = "1.19.2", features = ["full"] } tokio = { version = "1.19.2", features = ["full"] }
reqwest = { version = "0.12.20", features = ["json", "rustls-tls"] } reqwest = { version = "0.12.20", features = ["json", "rustls-tls"] }
job_scheduler = "1.2.1" job_scheduler = "1.2.1"

View File

@@ -12,4 +12,9 @@ This is a task runner/scheduler programs that will fire off various tasks. These
For `task` to work properly, please make sure to first create the `.env` file, then fill out the following environment variables: For `task` to work properly, please make sure to first create the `.env` file, then fill out the following environment variables:
- `BASE_URI_API` - needed for communicating with `public`
- `DATABASE_URL` - needed for communicating to Postgres - `DATABASE_URL` - needed for communicating to Postgres
- `REDIS_URL` - needed for communicating with the cache (Redis or Valkey)
- `S3_ACCESS_KEY` - needed for Amazon S3 (or compatible services) storage
- `S3_SECRET_KEY` - needed for Amazon S3 (or compatible services) storage
- `S3_BUCKET` - needed for Amazon S3 (or compatible services) storage

View File

@@ -1,8 +1,10 @@
use cache::ClientLike;
use chrono::Utc; use chrono::Utc;
use sqlx::{postgres::PgPoolOptions, Pool, Postgres}; use sqlx::{postgres::PgPoolOptions, Pool, Postgres};
use std::env; use std::env;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use storage::services::aws;
use tasks::*; use tasks::*;
//mod config; //mod config;
@@ -11,6 +13,8 @@ mod utils;
pub struct TaskManager<'a> { pub struct TaskManager<'a> {
pool: Pool<Postgres>, pool: Pool<Postgres>,
cache: cache::Pool,
s3_client: aws::S3Client,
jobs: Vec<TaskJob>, jobs: Vec<TaskJob>,
last_activated: Option<chrono::DateTime<Utc>>, last_activated: Option<chrono::DateTime<Utc>>,
last_job: Option<TaskJob>, last_job: Option<TaskJob>,
@@ -49,6 +53,8 @@ async fn main() {
println!("Hello, world!"); println!("Hello, world!");
dotenvy::dotenv().unwrap(); dotenvy::dotenv().unwrap();
// setup database
let database_url = let database_url =
env::var("DATABASE_URL").expect("Environment variable DATABASE_URL is not found"); env::var("DATABASE_URL").expect("Environment variable DATABASE_URL is not found");
let pool = PgPoolOptions::new() let pool = PgPoolOptions::new()
@@ -58,7 +64,35 @@ async fn main() {
.await .await
.expect("Failed to connect to the database"); .expect("Failed to connect to the database");
let mut manager = TaskManager::new(pool); // setup redis/valkey
let redis_url = match std::env::var("REDIS_URL").unwrap().as_str() {
// TODO: fix the unwrap ^
"" => "redis://localhost:6379".to_string(),
x => x.to_string(),
};
let pool_size = 8;
let config = cache::Config::from_url(&redis_url).unwrap(); // TODO: fix the unwrap <<<
let redis_pool = cache::Builder::from_config(config)
.with_performance_config(|config| {
config.default_command_timeout = Duration::from_secs(60);
})
.set_policy(cache::ReconnectPolicy::new_exponential(0, 100, 30_000, 2))
.build_pool(pool_size)
.expect("Failed to create cache pool");
if std::env::var("REDIS_URL").unwrap() != "" {
// TODO: fix the unwrap ^
redis_pool.init().await.expect("Failed to connect to cache");
let _ = redis_pool.flushall::<i32>(false).await;
}
// setup storage
let s3_client_config = aws::S3ClientConfig::from_env().unwrap();
let s3_client = aws::S3Client::new(&s3_client_config);
let mut manager = TaskManager::new(pool, redis_pool, s3_client);
manager.register_jobs().await.unwrap(); manager.register_jobs().await.unwrap();
loop { loop {
@@ -68,9 +102,11 @@ async fn main() {
} }
impl<'a> TaskManager<'a> { impl<'a> TaskManager<'a> {
fn new(pool: Pool<Postgres>) -> Self { fn new(pool: Pool<Postgres>, cache: cache::Pool, s3_client: aws::S3Client) -> Self {
TaskManager { TaskManager {
pool, pool,
cache,
s3_client,
jobs: Vec::new(), jobs: Vec::new(),
last_activated: None, last_activated: None,
last_job: None, last_job: None,
@@ -100,11 +136,15 @@ impl<'a> TaskManager<'a> {
} }
2 => { 2 => {
let pool = Arc::new(self.pool.clone()); let pool = Arc::new(self.pool.clone());
Box::new(move || upload_rss::register(&pool)) let cache = Arc::new(self.cache.clone());
let s3_client = Arc::new(self.s3_client.clone());
Box::new(move || upload_rss::register(&pool, &cache, &s3_client))
} }
3 => { 3 => {
let pool = Arc::new(self.pool.clone()); let pool = Arc::new(self.pool.clone());
Box::new(move || upload_sitemap::register(&pool)) let cache = Arc::new(self.cache.clone());
let s3_client = Arc::new(self.s3_client.clone());
Box::new(move || upload_sitemap::register(&pool, &cache, &s3_client))
} }
id => return Err(format!("Unknown task_id: {}", id).into()), id => return Err(format!("Unknown task_id: {}", id).into()),
}; };

View File

@@ -2,19 +2,24 @@ use crate::utils::{
request::{Request, Response}, request::{Request, Response},
task_log, task_log,
}; };
use storage::services::{ use cache::KeysInterface;
aws::{S3Client, S3ClientConfig}, use storage::services::{aws::S3Client, ObjectStorageClient};
ObjectStorageClient,
};
pub fn register(pool: &sqlx::Pool<sqlx::Postgres>) { pub fn register(pool: &sqlx::Pool<sqlx::Postgres>, cache: &cache::Pool, s3_client: &S3Client) {
let p = pool.clone(); let p = pool.clone();
let c = cache.clone();
let s3 = s3_client.to_owned();
tokio::spawn(async move { tokio::spawn(async move {
let _ = upload_rss(&p).await; let _ = upload_rss(&p, &c, s3).await;
}); });
} }
async fn upload_rss(pool: &sqlx::Pool<sqlx::Postgres>) -> Result<(), Box<dyn std::error::Error>> { async fn upload_rss(
pool: &sqlx::Pool<sqlx::Postgres>,
cache: &cache::Pool,
s3_client: S3Client,
) -> Result<(), Box<dyn std::error::Error>> {
// start task logging // start task logging
task_log::start(2, pool).await?; task_log::start(2, pool).await?;
@@ -25,15 +30,37 @@ async fn upload_rss(pool: &sqlx::Pool<sqlx::Postgres>) -> Result<(), Box<dyn std
// upload the sucker to obj storage // upload the sucker to obj storage
if let Response::Xml(rss) = rss_result { if let Response::Xml(rss) = rss_result {
let client_config = S3ClientConfig::from_env().unwrap(); let cached: &Option<String> = &cache.get(String::from("rss")).await.unwrap_or(None);
let s3_client = S3Client::new(&client_config); let cache_clone = cache.clone();
if let Some(cached_value) = cached {
if *cached_value == rss {
println!("Response is the same in the cache, exiting");
return Ok(());
}
}
let r = rss.clone();
let _ = s3_client let _ = s3_client
.put_object( .put_object(
client_config.bucket.as_str(), s3_client.client_config.bucket.as_str(),
"feed.xml", "feed.xml",
rss.as_bytes().to_vec(), rss.as_bytes().to_vec(),
) )
.await; .await?;
tokio::spawn(async move {
cache_clone
.set::<String, String, &String>(
String::from("rss"),
&r,
Some(cache::Expiration::EX(3600)),
None,
false,
)
.await
.unwrap();
});
println!("Finished uploading RSS feed"); println!("Finished uploading RSS feed");
} }

View File

@@ -2,20 +2,23 @@ use crate::utils::{
request::{Request, Response}, request::{Request, Response},
task_log, task_log,
}; };
use storage::services::{ use cache::KeysInterface;
aws::{S3Client, S3ClientConfig}, use storage::services::{aws::S3Client, ObjectStorageClient};
ObjectStorageClient,
};
pub fn register(pool: &sqlx::Pool<sqlx::Postgres>) { pub fn register(pool: &sqlx::Pool<sqlx::Postgres>, cache: &cache::Pool, s3_client: &S3Client) {
let p = pool.clone(); let p = pool.clone();
let c = cache.clone();
let s3 = s3_client.to_owned();
tokio::spawn(async move { tokio::spawn(async move {
let _ = upload_sitemap(&p).await; let _ = upload_sitemap(&p, &c, s3).await;
}); });
} }
async fn upload_sitemap( async fn upload_sitemap(
pool: &sqlx::Pool<sqlx::Postgres>, pool: &sqlx::Pool<sqlx::Postgres>,
cache: &cache::Pool,
s3_client: S3Client,
) -> Result<(), Box<dyn std::error::Error>> { ) -> Result<(), Box<dyn std::error::Error>> {
// start task logging // start task logging
task_log::start(3, pool).await?; task_log::start(3, pool).await?;
@@ -27,15 +30,36 @@ async fn upload_sitemap(
// upload the sucker to obj storage // upload the sucker to obj storage
if let Response::Xml(sitemap) = sitemap_result { if let Response::Xml(sitemap) = sitemap_result {
let client_config = S3ClientConfig::from_env().unwrap(); let cached: &Option<String> = &cache.get(String::from("sitemap")).await.unwrap_or(None);
let s3_client = S3Client::new(&client_config); let cache_clone = cache.clone();
if let Some(cached_value) = cached {
if *cached_value == sitemap {
println!("Response is the same in the cache, exiting");
return Ok(());
}
}
let s = sitemap.clone();
let _ = s3_client let _ = s3_client
.put_object( .put_object(
client_config.bucket.as_str(), s3_client.client_config.bucket.as_str(),
"sitemap.xml", "sitemap.xml",
sitemap.as_bytes().to_vec(), sitemap.as_bytes().to_vec(),
) )
.await; .await?;
tokio::spawn(async move {
cache_clone
.set::<String, String, &String>(
String::from("sitemap"),
&s,
Some(cache::Expiration::EX(3600)),
None,
false,
)
.await
.unwrap();
});
println!("Finished uploading sitemap!"); println!("Finished uploading sitemap!");
} }