This commit is contained in:
Kasper Juul Hermansen 2023-04-03 02:01:16 +02:00
commit 7f0ab1fc9a
Signed by: kjuulh
GPG Key ID: 57B6E1465221F912
5 changed files with 1378 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
/target
.env
target/
dest/

1146
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

23
Cargo.toml Normal file
View File

@ -0,0 +1,23 @@
[package]
name = "pull-articles"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
git2 = { version = "0.16.1" }
clap = "4.2.1"
color-eyre = "0.6.2"
dotenv = "0.15.0"
eyre = "0.6.8"
serde = { version = "1.0.159", features = ["derive"] }
serde_json = "1.0.95"
serde_yaml = "0.9.19"
tempdir = "0.3.7"
tokio = { version = "1.27.0", features = ["full"] }
tracing = { version = "0.1.37", features = ["log"] }
tracing-subscriber = "0.3.16"
serde_toml = "0.0.1"
toml = { version = "0.7.3", features = ["preserve_order"] }
tokio-scoped = "0.2.0"

202
src/main.rs Normal file
View File

@ -0,0 +1,202 @@
use std::path::PathBuf;
use git2::build::{CheckoutBuilder, RepoBuilder};
use git2::{Cred, FetchOptions, RemoteCallbacks};
use serde::{Deserialize, Serialize};
use tempdir::TempDir;
#[tokio::main]
async fn main() -> eyre::Result<()> {
let _ = dotenv::dotenv();
color_eyre::install().unwrap();
tracing_subscriber::fmt().pretty().init();
let matches = clap::Command::new("pull-articles")
.arg(clap::Arg::new("repo").long("repo").required(true))
.arg(clap::Arg::new("path").long("path").required(true))
.arg(clap::Arg::new("out").long("out").required(true))
.get_matches();
let repo = matches.get_one::<String>("repo").unwrap();
let path = matches.get_one::<String>("path").unwrap();
let out = matches.get_one::<String>("out").unwrap();
tracing::info!(repo = repo, path = path, out = out, "pulling articles");
let tmpdir = TempDir::new("pull-articles")?;
let tmpdir = tmpdir.path();
tracing::info!(
repo = repo,
dest_dir = tmpdir.display().to_string(),
"clone repo"
);
let mut cb = RemoteCallbacks::new();
cb.credentials(|_, _, _| {
let username = std::env::var("GIT_USERNAME").expect("GIT_USERNAME to be set");
let password = std::env::var("GIT_PASSWORD").expect("GIT_PASSWORD to be set");
Cred::userpass_plaintext(&username, &password)
});
let co = CheckoutBuilder::new();
let mut fo = FetchOptions::new();
fo.remote_callbacks(cb);
let mut repo_dir = tmpdir.to_path_buf();
repo_dir.push("repo");
RepoBuilder::new()
.fetch_options(fo)
.with_checkout(co)
.clone(repo, &repo_dir)?;
let mut repo_dir = repo_dir.clone();
repo_dir.push(path);
tracing::info!(repo_dir = repo_dir.display().to_string(), "reading files");
let out_dir = PathBuf::from(out);
tokio::fs::create_dir_all(&out_dir).await?;
let mut dir = tokio::fs::read_dir(repo_dir).await?;
while let Some(file) = dir.next_entry().await? {
if let Ok(ft) = file.file_type().await {
if ft.is_file() {
let file_content = tokio::fs::read(file.path()).await?;
let file_str = std::str::from_utf8(file_content.as_slice())?;
let (frontmatter, content) = extract_frontmatter(file_str)?;
let transformed_frontmatter = transform_frontmatter(frontmatter)?;
let new_article = format!("{}\n{}", transformed_frontmatter, content);
let mut out_file = out_dir.clone();
out_file.push(file.file_name());
tokio::fs::write(out_file, new_article).await?;
}
}
}
Ok(())
}
fn extract_frontmatter(content: impl Into<String>) -> eyre::Result<(String, String)> {
let content: String = content.into();
let start_marker = content
.find("---")
.ok_or(eyre::anyhow!("could not find start ---"))?;
let content = &content[start_marker + 4..];
let end_marker = content
.find("---")
.ok_or(eyre::anyhow!("could not find start ---"))?;
let frontmatter = &content[start_marker..end_marker];
let rest = &content[end_marker + 4..];
Ok((frontmatter.to_string(), rest.to_string()))
}
fn transform_frontmatter(frontmatter: String) -> eyre::Result<String> {
let obsidian_post: ObsidianPost = serde_yaml::from_str(&frontmatter)?;
let zola_post = ZolaPost {
title: obsidian_post.title,
description: Some(obsidian_post.description),
date: obsidian_post.created,
updated: obsidian_post
.updates
.map(|u| u.last().map(|u| u.time.clone()))
.flatten(),
draft: obsidian_post.hidden,
slug: obsidian_post.slug,
authors: Some(vec!["kjuulh".into()]),
};
let transformed_frontmatter = toml::to_string(&zola_post)?;
Ok(format!("+++\n{transformed_frontmatter}+++"))
}
#[derive(Clone, Debug, Deserialize, Serialize)]
struct ObsidianPostChange {
time: String,
description: Option<String>,
}
#[derive(Clone, Debug, Deserialize, Serialize)]
struct ObsidianPost {
#[serde(rename(serialize = "type", deserialize = "type"))]
blog_type: String,
title: String,
description: String,
hidden: Option<bool>,
created: String,
updates: Option<Vec<ObsidianPostChange>>,
tags: Vec<String>,
slug: Option<String>,
}
#[derive(Clone, Debug, Deserialize, Serialize)]
struct ZolaPost {
title: String,
description: Option<String>,
date: String,
updated: Option<String>,
draft: Option<bool>,
slug: Option<String>,
authors: Option<Vec<String>>,
}
#[cfg(test)]
mod test {
use crate::{extract_frontmatter, transform_frontmatter};
#[test]
fn can_extract_frontmatter_from_article() -> eyre::Result<()> {
let article = r#"---
type: "blog-post"
title: "Advancing career"
description: "2023-04-01-advanding-career"
hidden: true
created: "2023-04-01"
updates:
- time: "2023-04-01"
description: "first iteration"
tags:
- '#blog'
---
# Introduction
My current conundrum is figuring out how I want to advance my career, what directions to take, and what options to look out for.
In the start of my career and during my studies I wanted to figure out what kind of work I enjoyed and gave me energy, and also what I am most suited for. None of these are of course set in stone, and I don't believe there is a perfect choice, but that is neither here nor there.
"#;
let actual = extract_frontmatter(article)?;
assert_eq!(actual.0, "type: \"blog-post\"\ntitle: \"Advancing career\"\ndescription: \"2023-04-01-advanding-career\"\nhidden: true\ncreated: \"2023-04-01\"\nupdates:\n- time: \"2023-04-01\"\n description: \"first iteration\"\ntags:\n- '#blog'\n");
assert_eq!(actual.1, "\n# Introduction\n\nMy current conundrum is figuring out how I want to advance my career, what directions to take, and what options to look out for. \n\nIn the start of my career and during my studies I wanted to figure out what kind of work I enjoyed and gave me energy, and also what I am most suited for. None of these are of course set in stone, and I don't believe there is a perfect choice, but that is neither here nor there.\n");
Ok(())
}
#[test]
fn can_transform_frontmatter() -> eyre::Result<()> {
let frontmatter = r#"type: "blog-post"
title: "Advancing career"
description: "2023-04-01-advanding-career"
hidden: true
created: "2023-04-01"
updates:
- time: "2023-04-01"
description: "first iteration"
tags:
- '#blog'"#;
let res = transform_frontmatter(frontmatter.into())?;
assert_eq!(&res, "+++\ntitle = \"Advancing career\"\ndescription = \"2023-04-01-advanding-career\"\ndate = \"2023-04-01\"\nupdated = \"2023-04-01\"\ndraft = true\nauthors = [\"kjuulh\"]\n\n+++\n");
Ok(())
}
}

3
test.sh Executable file
View File

@ -0,0 +1,3 @@
#!/bin/bash
cargo run -- --repo https://git.front.kjuulh.io/kjuulh/obsidian.git --path areas/blog/posts --out dest/posts