feat: add basic redpanda rising wave setup

Signed-off-by: kjuulh <contact@kjuulh.io>
This commit is contained in:
Kasper Juul Hermansen 2024-08-05 21:41:30 +02:00
commit f8ef7701ea
Signed by: kjuulh
GPG Key ID: D85D7535F18F35FA
17 changed files with 3043 additions and 0 deletions

2
.drone.yml Normal file
View File

@ -0,0 +1,2 @@
kind: template
load: cuddle-rust-cli-plan.yaml

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
target/
.cuddle/

2593
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

14
Cargo.toml Normal file
View File

@ -0,0 +1,14 @@
[workspace]
members = ["crates/*"]
resolver = "2"
[workspace.dependencies]
kafka-ingest = { path = "crates/kafka-ingest" }
anyhow = { version = "1" }
tokio = { version = "1", features = ["full"] }
tracing = { version = "0.1", features = ["log"] }
tracing-subscriber = { version = "0.3.18" }
clap = { version = "4", features = ["derive", "env"] }
dotenv = { version = "0.15" }
axum = { version = "0.7" }

2
README.md Normal file
View File

@ -0,0 +1,2 @@
# kafka-ingest

1
crates/kafka-ingest/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

View File

@ -0,0 +1,31 @@
[package]
name = "kafka-ingest"
version = "0.1.0"
edition = "2021"
[dependencies]
anyhow.workspace = true
tokio.workspace = true
tracing.workspace = true
tracing-subscriber.workspace = true
clap.workspace = true
dotenv.workspace = true
axum.workspace = true
serde = { version = "1.0.197", features = ["derive"] }
sqlx = { version = "0.7.3", features = [
"runtime-tokio",
"tls-rustls",
"postgres",
"uuid",
"time",
] }
uuid = { version = "1.7.0", features = ["v4"] }
tower-http = { version = "0.5.2", features = ["cors", "trace"] }
kafka = "0.10.0"
drift = { git = "https://github.com/kjuulh/drift", branch = "main" }
num = { version = "0.4.3", features = ["num-bigint", "rand", "serde"] }
chrono = { version = "0.4.38", features = ["serde"] }
rand = "0.8.5"
serde_json = "1.0.122"

View File

@ -0,0 +1,84 @@
use std::time::Duration;
use anyhow::Context;
use chrono::{TimeDelta, Utc};
use clap::{Parser, Subcommand};
use kafka::producer::Record;
use rand::Rng;
use serde::Serialize;
#[derive(Parser)]
#[command(author, version, about, long_about = None, subcommand_required = true)]
struct Command {
#[command(subcommand)]
command: Option<Commands>,
}
#[derive(Subcommand)]
enum Commands {
Hello {},
StartStreaming {},
}
#[derive(Clone, Serialize, Debug)]
struct AdSource {
user_id: i64,
ad_id: i64,
click_timestamp: String,
impression_timestamp: String,
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
dotenv::dotenv().ok();
tracing_subscriber::fmt::init();
let cli = Command::parse();
tracing::debug!("Starting cli");
match cli.command.unwrap() {
Commands::Hello {} => println!("Hello!"),
Commands::StartStreaming {} => {
let send_event = drift::schedule(std::time::Duration::from_millis(50), || async {
tracing::debug!("sending event");
let mut rng = rand::thread_rng();
let mut producer =
kafka::producer::Producer::from_hosts(vec!["localhost:9092".into()])
.with_ack_timeout(Duration::from_secs(1))
.with_required_acks(kafka::client::RequiredAcks::One)
.create()
.map_err(|e| drift::DriftError::JobError(e.into()))?;
let msg = AdSource {
user_id: rng.gen_range(0..64),
ad_id: rng.gen_range(0..64),
click_timestamp: Utc::now()
.checked_add_signed(TimeDelta::milliseconds(500))
.unwrap()
.to_rfc3339(),
impression_timestamp: Utc::now().to_rfc3339(),
};
producer
.send(&Record::from_value(
"ad_clicks",
serde_json::to_string(&msg)
.context("failed to serialize type")
.map_err(drift::DriftError::JobError)?,
))
.map_err(|e| drift::DriftError::JobError(e.into()))?;
Ok(())
});
println!("waiting for closure press ctrl-c to cancel");
if let Ok(()) = tokio::signal::ctrl_c().await {
send_event.cancel();
}
}
}
Ok(())
}

21
cuddle.yaml Normal file
View File

@ -0,0 +1,21 @@
# yaml-language-server: $schema=https://git.front.kjuulh.io/kjuulh/cuddle/raw/branch/main/schemas/base.json
base: "git@git.front.kjuulh.io:kjuulh/cuddle-rust-cli-plan.git"
vars:
service: "kafka-ingest"
registry: kasperhermansen
clusters:
clank-prod:
replicas: "3"
namespace: prod
deployment:
registry: git@git.front.kjuulh.io:kjuulh/clank-clusters
env:
prod:
clusters:
- clank-prod

3
renovate.json Normal file
View File

@ -0,0 +1,3 @@
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json"
}

View File

@ -0,0 +1 @@

13
templates/create_mvs.sql Normal file
View File

@ -0,0 +1,13 @@
-- The number of clicks on the ad within one minute after the ad was shown.
create materialized view m_click_statistic as
select
count(user_id) as clicks_count,
ad_id
from
ad_source
where
click_timestamp is not null
and impression_timestamp < click_timestamp
and impression_timestamp + interval '1' minute >= click_timestamp
group by
ad_id;

View File

@ -0,0 +1,13 @@
-- impression_timestamp: The time when the ad was shown.
-- click_timestamp: The time when the ad was clicked.
create source ad_source (
user_id bigint,
ad_id bigint,
click_timestamp timestamptz,
impression_timestamp timestamptz
) with (
connector = 'kafka',
topic = 'ad_clicks',
properties.bootstrap.server = 'message_queue:29092',
scan.startup.mode = 'earliest'
) FORMAT PLAIN ENCODE JSON;

View File

@ -0,0 +1,219 @@
x-image: &image
image: risingwavelabs/risingwave:v1.10.0
services:
risingwave:
<<: *image
command: "standalone --meta-opts=\" \
--listen-addr 0.0.0.0:5690 \
--advertise-addr 0.0.0.0:5690 \
--dashboard-host 0.0.0.0:5691 \
--prometheus-host 0.0.0.0:1250 \
--prometheus-endpoint http://prometheus-0:9500 \
--backend sql \
--sql-endpoint postgres://postgres:@postgres-0:5432/metadata \
--state-store hummock+minio://hummockadmin:hummockadmin@minio-0:9301/hummock001 \
--data-directory hummock_001 \
--config-path /risingwave.toml\" \
--compute-opts=\" \
--config-path /risingwave.toml \
--listen-addr 0.0.0.0:5688 \
--prometheus-listener-addr 0.0.0.0:1250 \
--advertise-addr 0.0.0.0:5688 \
--async-stack-trace verbose \
#--parallelism 4 \
#--total-memory-bytes 8589934592 \
--role both \
--meta-address http://0.0.0.0:5690\" \
--frontend-opts=\" \
--config-path /risingwave.toml \
--listen-addr 0.0.0.0:4566 \
--advertise-addr 0.0.0.0:4566 \
--prometheus-listener-addr 0.0.0.0:1250 \
--health-check-listener-addr 0.0.0.0:6786 \
--meta-addr http://0.0.0.0:5690\" \
--compactor-opts=\" \
--listen-addr 0.0.0.0:6660 \
--prometheus-listener-addr 0.0.0.0:1250 \
--advertise-addr 0.0.0.0:6660 \
--meta-address http://0.0.0.0:5690\""
expose:
- "6660"
- "4566"
- "5688"
- "5690"
- "1250"
- "5691"
ports:
- "4566:4566"
- "5690:5690"
- "5691:5691"
- "1250:1250"
# networks:
# - redpanda_network
depends_on:
- postgres-0
- minio-0
configs:
- source: risingwave.toml
target: /risingwave.toml
environment:
RUST_BACKTRACE: "1"
# If ENABLE_TELEMETRY is not set, telemetry will start by default
ENABLE_TELEMETRY: true
RW_TELEMETRY_TYPE: "docker-compose"
RW_SECRET_STORE_PRIVATE_KEY_HEX: 0123456789abcdef
container_name: risingwave
healthcheck:
test:
- CMD-SHELL
- bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/6660; exit $$?;'
- bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/5688; exit $$?;'
- bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/4566; exit $$?;'
- bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/5690; exit $$?;'
interval: 1s
timeout: 5s
restart: always
deploy:
resources:
limits:
memory: 28G
reservations:
memory: 28G
postgres-0:
image: "postgres:15-alpine"
environment:
- POSTGRES_HOST_AUTH_METHOD=trust
- POSTGRES_USER=postgres
- POSTGRES_DB=metadata
- POSTGRES_INITDB_ARGS=--encoding=UTF-8 --lc-collate=C --lc-ctype=C
expose:
- "5432"
ports:
- "8432:5432"
volumes:
- "postgres-0:/var/lib/postgresql/data"
healthcheck:
test: [ "CMD-SHELL", "pg_isready -U postgres" ]
interval: 2s
timeout: 5s
retries: 5
restart: always
minio-0:
image: "quay.io/minio/minio:latest"
command:
- server
- "--address"
- "0.0.0.0:9301"
- "--console-address"
- "0.0.0.0:9400"
- /data
expose:
- "9301"
- "9400"
ports:
- "9301:9301"
- "9400:9400"
depends_on: [ ]
volumes:
- "minio-0:/data"
entrypoint: "
/bin/sh -c '
set -e
mkdir -p \"/data/hummock001\"
/usr/bin/docker-entrypoint.sh \"$$0\" \"$$@\"
'"
environment:
MINIO_CI_CD: "1"
MINIO_PROMETHEUS_AUTH_TYPE: public
MINIO_PROMETHEUS_URL: "http://prometheus-0:9500"
MINIO_ROOT_PASSWORD: hummockadmin
MINIO_ROOT_USER: hummockadmin
MINIO_DOMAIN: "minio-0"
container_name: minio-0
healthcheck:
test:
- CMD-SHELL
- bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/9301; exit $$?;'
interval: 1s
timeout: 5s
retries: 5
restart: always
message_queue:
image: "docker.redpanda.com/redpandadata/redpanda:v24.2.1"
command:
- redpanda
- start
- --smp 1
- --kafka-addr internal://0.0.0.0:29092,external://0.0.0.0:9092
- --advertise-kafka-addr internal://message_queue:29092,external://localhost:9092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
- --rpc-addr message_queue:33145
- --advertise-rpc-addr message_queue:33145
- --mode dev-container
- --default-log-level=info
expose:
- "29092"
- "9092"
- "9644"
ports:
- "29092:29092"
- "9092:9092"
- "9644:9644"
- "8081:8081"
depends_on: [ ]
volumes:
- "message_queue:/var/lib/redpanda/data"
environment: { }
container_name: message_queue
healthcheck:
test: curl -f localhost:9644/v1/status/ready
interval: 1s
timeout: 5s
retries: 5
restart: always
console:
container_name: redpanda-console
image: docker.redpanda.com/redpandadata/console:v2.7.0
entrypoint: /bin/sh
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml; /app/console'
environment:
CONFIG_FILEPATH: /tmp/config.yml
CONSOLE_CONFIG_FILE: |
kafka:
brokers: ["message_queue:29092"]
schemaRegistry:
enabled: true
urls: ["http://message_queue:8081"]
redpanda:
adminApi:
enabled: true
urls: ["http://message_queue:9644"]
ports:
- 8080:8080
depends_on:
- message_queue
networks:
redpanda_network:
driver: bridge
volumes:
postgres-0:
external: false
minio-0:
external: false
message_queue:
external: false
configs:
risingwave.toml:
content: |
# Empty

View File

@ -0,0 +1,37 @@
name: kafka-ingest
services: {}
# redpanda-0:
# command:
# - redpanda
# - start
# - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
# # Address the broker advertises to clients that connect to the Kafka API.
# # Use the internal addresses to connect to the Redpanda brokers'
# # from inside the same Docker network.
# # Use the external addresses to connect to the Redpanda brokers'
# # from outside the Docker network.
# - --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
# - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
# # Address the broker advertises to clients that connect to the HTTP Proxy.
# - --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
# - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
# # Redpanda brokers use the RPC API to communicate with each other internally.
# - --rpc-addr redpanda-0:33145
# - --advertise-rpc-addr redpanda-0:33145
# # Mode dev-container uses well-known configuration properties for development in containers.
# - --mode dev-container
# # Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
# - --smp 1
# - --default-log-level=info
# image: docker.redpanda.com/redpandadata/redpanda:v24.2.1
# container_name: redpanda-0
# volumes:
# - redpanda-0:/var/lib/redpanda/data
# networks:
# - redpanda_network
# ports:
# - 18081:18081
# - 18082:18082
# - 19092:19092
# - 19644:9644

6
templates/query.sql Normal file
View File

@ -0,0 +1,6 @@
select
*
from
m_click_statistic
limit
10;

View File

@ -0,0 +1 @@