diff --git a/.gitignore b/.gitignore index 75d1871..2f2aa51 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .cuddle/ +target/ diff --git a/client-application/Cargo.lock b/client-application/Cargo.lock new file mode 100644 index 0000000..1fe086b --- /dev/null +++ b/client-application/Cargo.lock @@ -0,0 +1,1237 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +dependencies = [ + "anstyle", + "once_cell", + "windows-sys 0.59.0", +] + +[[package]] +name = "anyhow" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" + +[[package]] +name = "async-trait" +version = "0.1.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + +[[package]] +name = "bitflags" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" + +[[package]] +name = "bumpalo" +version = "3.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" + +[[package]] +name = "cc" +version = "1.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4730490333d58093109dc02c23174c3f4d490998c3fed3cc8e82d57afedb9cf" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-targets", +] + +[[package]] +name = "clap" +version = "4.5.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "769b0145982b4b48713e01ec42d61614425f27b7058bda7180a3a41f30104796" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b26884eb4b57140e4d2d93652abfa49498b938b3c9179f9fc487b0acc3edad7" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "client-application" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "clap", + "dotenvy", + "kafka", + "nodrift", + "rand 0.9.0", + "serde", + "serde_json", + "tokio", + "tracing", + "tracing-subscriber", + "uuid", +] + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "crc" +version = "3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + +[[package]] +name = "flate2" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.13.3+wasi-0.2.2", + "windows-targets", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itoa" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "kafka" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2054ba4edcb4dcda4209e138c7e88caf26d4a325b3db76fbdb6ca5eecc23e426" +dependencies = [ + "byteorder", + "crc", + "flate2", + "fnv", + "openssl", + "openssl-sys", + "ref_slice", + "snap", + "thiserror", + "tracing", + "twox-hash", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.169" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "miniz_oxide" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +dependencies = [ + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", + "windows-sys 0.52.0", +] + +[[package]] +name = "nodrift" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78273a5b0f2fcc2e64f0d026bcdb15b378055229810036f95c36ffe91dccbc1e" +dependencies = [ + "anyhow", + "async-trait", + "thiserror", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "openssl" +version = "0.10.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5e534d133a060a3c19daec1eb3e98ec6f4685978834f2dbadfe2ec215bab64e" +dependencies = [ + "bitflags", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "openssl-sys" +version = "0.9.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy 0.7.35", +] + +[[package]] +name = "proc-macro2" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.0", + "zerocopy 0.8.14", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.0", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.15", +] + +[[package]] +name = "rand_core" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b08f3c9802962f7e1b25113931d94f43ed9725bebc59db9d0c3e9a23b67e15ff" +dependencies = [ + "getrandom 0.3.1", + "zerocopy 0.8.14", +] + +[[package]] +name = "redox_syscall" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" +dependencies = [ + "bitflags", +] + +[[package]] +name = "ref_slice" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4ed1d73fb92eba9b841ba2aef69533a060ccc0d3ec71c90aeda5996d4afb7a9" + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustversion" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" + +[[package]] +name = "ryu" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.138" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +dependencies = [ + "libc", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + +[[package]] +name = "socket2" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.96" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "tokio" +version = "1.43.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.52.0", +] + +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-util" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "rand 0.8.5", + "static_assertions", +] + +[[package]] +name = "unicode-ident" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uuid" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3758f5e68192bb96cc8f9b7e2c2cfdabb435499a28499a42f8f984092adad4b" +dependencies = [ + "serde", +] + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive 0.7.35", +] + +[[package]] +name = "zerocopy" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a367f292d93d4eab890745e75a778da40909cab4d6ff8173693812f79c4a2468" +dependencies = [ + "zerocopy-derive 0.8.14", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3931cb58c62c13adec22e38686b559c86a30565e16ad6e8510a337cedc611e1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/client-application/Cargo.toml b/client-application/Cargo.toml new file mode 100644 index 0000000..d622049 --- /dev/null +++ b/client-application/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "client-application" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1.0.95" +chrono = { version = "0.4.39", features = ["serde"] } +clap = { version = "4.5.27", features = ["derive", "env"] } +dotenvy = "0.15.7" +kafka = "0.10.0" +nodrift = "0.3.0" +rand = "0.9.0" +serde = { version = "1.0.217", features = ["derive"] } +serde_json = "1.0.138" +tokio = { version = "1.43.0", features = ["full"] } +tracing = { version = "0.1.41", features = ["log"] } +tracing-subscriber = "0.3.19" +uuid = { version = "1.12.1", features = ["serde"] } diff --git a/client-application/Dockerfile b/client-application/Dockerfile new file mode 100644 index 0000000..5a65dc1 --- /dev/null +++ b/client-application/Dockerfile @@ -0,0 +1,17 @@ +FROM rustlang/rust:nightly AS builder + +WORKDIR /mnt/src + +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock +COPY src/ src/ + +RUN cargo build --release + +FROM debian:bookworm AS production + +RUN apt update && apt upgrade -y && apt install libssl-dev -y + +COPY --from=builder /mnt/src/target/release/client-application /usr/local/bin/client-application + +ENTRYPOINT ["/usr/local/bin/client-application"] diff --git a/client-application/src/main.rs b/client-application/src/main.rs new file mode 100644 index 0000000..65e4b4e --- /dev/null +++ b/client-application/src/main.rs @@ -0,0 +1,111 @@ +use std::time::Duration; + +use anyhow::Context; +use chrono::{TimeDelta, Utc}; +use clap::{Parser, Subcommand}; +use kafka::producer::Record; +use rand::Rng; +use serde::Serialize; + +#[derive(Parser)] +#[command(author, version, about, long_about = None, subcommand_required = true)] +struct Command { + #[command(subcommand)] + command: Option, +} + +#[derive(Subcommand)] +enum Commands { + Produce { + #[arg(long)] + host: String, + + #[arg(long)] + topic: String, + + #[arg(long = "delay-ms")] + delay_ms: u64, + }, +} + +#[derive(Clone, Serialize, Debug)] +struct AdSource { + user_id: i64, + ad_id: i64, + click_timestamp: String, + impression_timestamp: String, +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + dotenvy::dotenv().ok(); + tracing_subscriber::fmt::init(); + + let cli = Command::parse(); + tracing::debug!("Starting cli"); + + match cli.command.unwrap() { + Commands::Produce { + topic, + delay_ms, + host, + } => { + let send_event = + nodrift::schedule(std::time::Duration::from_millis(delay_ms), move || { + let host = host.clone(); + let topic = topic.clone(); + + async move { + tracing::info!("sending event"); + let mut rng = rand::rng(); + + let mut producer = kafka::producer::Producer::from_hosts(vec![host]) + .with_ack_timeout(Duration::from_secs(1)) + .with_required_acks(kafka::client::RequiredAcks::One) + .create() + .map_err(|e| nodrift::DriftError::JobError(e.into()))?; + + let msg = AdSource { + user_id: rng.random_range(0..64), + ad_id: rng.random_range(0..64), + click_timestamp: format!( + "{}", + Utc::now() + .checked_add_signed(TimeDelta::milliseconds(500)) + .unwrap() + .format("%Y-%m-%dT%H:%M:%S") + ), + impression_timestamp: format!( + "{}", + Utc::now().to_utc().format("%Y-%m-%dT%H:%M:%S") + ), + }; + + producer + .send(&Record::from_value( + &topic, + serde_json::to_string(&msg) + .context("failed to serialize type") + .map_err(nodrift::DriftError::JobError)?, + )) + .map_err(|e| nodrift::DriftError::JobError(e.into()))?; + + Ok(()) + } + }); + + println!("waiting for closure press ctrl-c to cancel"); + + tokio::select! { + _ = send_event.cancelled() => { + tokio::time::sleep(Duration::from_secs(5)).await; + return Ok(()) + } + _ = tokio::signal::ctrl_c() => { + send_event.cancel(); + return Ok(()) + } + } + } + } +} diff --git a/docker-compose.yml b/docker-compose.yml index 2dbf4de..24b1983 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -27,6 +27,21 @@ services: - PLAINTEXT://redpanda:29092,OUTSIDE://localhost:9092 - --check=false + client_application: + container_name: client_application + build: + context: ./client-application + restart: unless-stopped + environment: + RUST_LOG: info + command: + - produce + - --host=redpanda:29092 + - --topic=event-stream + - --delay-ms=500 + depends_on: + - connect + connect: image: confluentinc/cp-kafka-connect-base:7.8.0 depends_on: @@ -85,7 +100,7 @@ services: -H 'Accept: application/json' http://localhost:8083/connectors/IcebergSinkConnector/config \ -d '{ "tasks.max": "1", - "topics": "payments", + "topics": "event-stream", "connector.class": "io.tabular.iceberg.connect.IcebergSinkConnector", "iceberg.catalog.s3.endpoint": "http://minio:9000", "iceberg.catalog.s3.secret-access-key": "minioadmin", @@ -96,7 +111,7 @@ services: "iceberg.catalog.client.region": "eu-west-1", "iceberg.catalog.type": "rest", "iceberg.control.commitIntervalMs": "1000", - "iceberg.tables": "orders.payments", + "iceberg.tables": "marketing.ad_clicks", "value.converter.schemas.enable": "false", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "key.converter": "org.apache.kafka.connect.storage.StringConverter", @@ -164,7 +179,7 @@ services: # Batch & Iceberg manipulation spark-iceberg: - image: tabulario/spark-iceberg + image: tabulario/spark-iceberg:3.5.1_1.5.0 hostname: spark-iceberg container_name: spark-iceberg build: spark/ diff --git a/notebooks/iceberg.ipynb b/notebooks/iceberg.ipynb index b754616..c907124 100644 --- a/notebooks/iceberg.ipynb +++ b/notebooks/iceberg.ipynb @@ -10,7 +10,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "25/01/31 22:57:42 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n" + "25/02/01 00:22:25 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n" ] }, { @@ -24,7 +24,7 @@ " \n", " \n", " \n", - " orders\n", + " marketing\n", " \n", " \n", "" @@ -33,7 +33,7 @@ "+-----------+\n", "| namespace |\n", "+-----------+\n", - "| orders |\n", + "| marketing |\n", "+-----------+" ] }, @@ -50,10 +50,17 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "70349765-e5f1-43a5-a141-cc2d54c69a58", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/02/01 00:37:06 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n" + ] + }, { "data": { "text/html": [ @@ -67,8 +74,8 @@ " \n", " \n", " \n", - " orders\n", - " payments\n", + " marketing\n", + " ad_clicks\n", " False\n", " \n", " \n", @@ -78,11 +85,11 @@ "+-----------+-----------+-------------+\n", "| namespace | tableName | isTemporary |\n", "+-----------+-----------+-------------+\n", - "| orders | payments | False |\n", + "| marketing | ad_clicks | False |\n", "+-----------+-----------+-------------+" ] }, - "execution_count": 2, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -90,12 +97,12 @@ "source": [ "%%sql\n", "\n", - "SHOW TABLES FROM orders" + "SHOW TABLES FROM marketing" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 31, "id": "fabaed9c-9049-4996-9d26-b20f66303911", "metadata": {}, "outputs": [ @@ -112,7 +119,7 @@ " \n", " \n", " current-snapshot-id\n", - " none\n", + " 6641965456052712871\n", " \n", " \n", " format\n", @@ -130,17 +137,17 @@ "" ], "text/plain": [ - "+---------------------------------+-----------------+\n", - "| key | value |\n", - "+---------------------------------+-----------------+\n", - "| current-snapshot-id | none |\n", - "| format | iceberg/parquet |\n", - "| format-version | 2 |\n", - "| write.parquet.compression-codec | zstd |\n", - "+---------------------------------+-----------------+" + "+---------------------------------+---------------------+\n", + "| key | value |\n", + "+---------------------------------+---------------------+\n", + "| current-snapshot-id | 6641965456052712871 |\n", + "| format | iceberg/parquet |\n", + "| format-version | 2 |\n", + "| write.parquet.compression-codec | zstd |\n", + "+---------------------------------+---------------------+" ] }, - "execution_count": 3, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -148,12 +155,12 @@ "source": [ "%%sql\n", "\n", - "SHOW TBLPROPERTIES orders.payments" + "SHOW TBLPROPERTIES marketing.ad_clicks" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 33, "id": "6317d9c6-140e-4a63-890e-2173fbb9503e", "metadata": {}, "outputs": [ @@ -168,7 +175,7 @@ " \n", " \n", " \n", - " 0\n", + " 637\n", " \n", " \n", "" @@ -177,11 +184,11 @@ "+----------+\n", "| count(1) |\n", "+----------+\n", - "| 0 |\n", + "| 637 |\n", "+----------+" ] }, - "execution_count": 4, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -190,12 +197,12 @@ "%%sql\n", "\n", "SELECT COUNT(*)\n", - "FROM orders.payments" + "FROM marketing.ad_clicks" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 22, "id": "2a1ff132-dc65-4943-a9be-416ba5a13c26", "metadata": {}, "outputs": [ @@ -205,26 +212,94 @@ "\n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", "
idtypecreated_atdocumentpayeramountuser_idad_idclick_timestampimpression_timestamp
21512025-02-01 00:42:092025-02-01 00:42:09
17552025-02-01 00:42:102025-02-01 00:42:09
31322025-02-01 00:42:102025-02-01 00:42:10
63592025-02-01 00:42:042025-02-01 00:42:03
60292025-02-01 00:42:042025-02-01 00:42:04
6312025-02-01 00:42:032025-02-01 00:42:03
52182025-02-01 00:41:252025-02-01 00:41:25
32272025-02-01 00:41:522025-02-01 00:41:51
1062025-02-01 00:41:462025-02-01 00:41:46
17132025-02-01 00:40:302025-02-01 00:40:29
" ], "text/plain": [ - "+----+------+------------+----------+-------+--------+\n", - "| id | type | created_at | document | payer | amount |\n", - "+----+------+------------+----------+-------+--------+\n", - "+----+------+------------+----------+-------+--------+" + "+---------+-------+---------------------+----------------------+\n", + "| user_id | ad_id | click_timestamp | impression_timestamp |\n", + "+---------+-------+---------------------+----------------------+\n", + "| 21 | 51 | 2025-02-01 00:42:09 | 2025-02-01 00:42:09 |\n", + "| 17 | 55 | 2025-02-01 00:42:10 | 2025-02-01 00:42:09 |\n", + "| 31 | 32 | 2025-02-01 00:42:10 | 2025-02-01 00:42:10 |\n", + "| 63 | 59 | 2025-02-01 00:42:04 | 2025-02-01 00:42:03 |\n", + "| 60 | 29 | 2025-02-01 00:42:04 | 2025-02-01 00:42:04 |\n", + "| 6 | 31 | 2025-02-01 00:42:03 | 2025-02-01 00:42:03 |\n", + "| 52 | 18 | 2025-02-01 00:41:25 | 2025-02-01 00:41:25 |\n", + "| 32 | 27 | 2025-02-01 00:41:52 | 2025-02-01 00:41:51 |\n", + "| 10 | 6 | 2025-02-01 00:41:46 | 2025-02-01 00:41:46 |\n", + "| 17 | 13 | 2025-02-01 00:40:30 | 2025-02-01 00:40:29 |\n", + "+---------+-------+---------------------+----------------------+" ] }, - "execution_count": 5, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -233,13 +308,13 @@ "%%sql\n", "\n", "SELECT *\n", - "FROM orders.payments\n", + "FROM marketing.ad_clicks\n", "LIMIT 10" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 19, "id": "a2688a95-594c-45ad-9d49-70a1bcd59a1b", "metadata": {}, "outputs": [ @@ -263,17 +338,157 @@ " \n", " \n", " \n", + " \n", + " Row(ad_id=42)\n", + " 0\n", + " 19\n", + " 4\n", + " 5429\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 2025-02-01 00:41:49.202000\n", + " 7965471739473975852\n", + " \n", + " \n", + " Row(ad_id=38)\n", + " 0\n", + " 17\n", + " 1\n", + " 1582\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 2025-02-01 00:42:13.249000\n", + " 6641965456052712871\n", + " \n", + " \n", + " Row(ad_id=10)\n", + " 0\n", + " 16\n", + " 4\n", + " 5286\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 2025-02-01 00:41:31.587000\n", + " 4059346813755015811\n", + " \n", + " \n", + " Row(ad_id=3)\n", + " 0\n", + " 15\n", + " 1\n", + " 1543\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 2025-02-01 00:42:13.249000\n", + " 6641965456052712871\n", + " \n", + " \n", + " Row(ad_id=49)\n", + " 0\n", + " 15\n", + " 4\n", + " 5359\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 2025-02-01 00:41:43.138000\n", + " 1865904111199103577\n", + " \n", + " \n", + " Row(ad_id=5)\n", + " 0\n", + " 14\n", + " 1\n", + " 1526\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 2025-02-01 00:41:08.813000\n", + " 2155865929954566188\n", + " \n", + " \n", + " Row(ad_id=2)\n", + " 0\n", + " 14\n", + " 3\n", + " 4105\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 2025-02-01 00:42:06.311000\n", + " 827301497454031138\n", + " \n", + " \n", + " Row(ad_id=41)\n", + " 0\n", + " 13\n", + " 4\n", + " 5253\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 2025-02-01 00:41:41.144000\n", + " 1472536140048912459\n", + " \n", + " \n", + " Row(ad_id=34)\n", + " 0\n", + " 13\n", + " 1\n", + " 1508\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 2025-02-01 00:41:08.813000\n", + " 2155865929954566188\n", + " \n", + " \n", + " Row(ad_id=60)\n", + " 0\n", + " 13\n", + " 3\n", + " 4007\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 2025-02-01 00:41:17.518000\n", + " 3047889973353044630\n", + " \n", " \n", "" ], "text/plain": [ - "+-----------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+-----------------+--------------------------+\n", - "| partition | spec_id | record_count | file_count | total_data_file_size_in_bytes | position_delete_record_count | position_delete_file_count | equality_delete_record_count | equality_delete_file_count | last_updated_at | last_updated_snapshot_id |\n", - "+-----------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+-----------------+--------------------------+\n", - "+-----------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+-----------------+--------------------------+" + "+---------------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+----------------------------+--------------------------+\n", + "| partition | spec_id | record_count | file_count | total_data_file_size_in_bytes | position_delete_record_count | position_delete_file_count | equality_delete_record_count | equality_delete_file_count | last_updated_at | last_updated_snapshot_id |\n", + "+---------------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+----------------------------+--------------------------+\n", + "| Row(ad_id=42) | 0 | 19 | 4 | 5429 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:49.202000 | 7965471739473975852 |\n", + "| Row(ad_id=38) | 0 | 17 | 1 | 1582 | 0 | 0 | 0 | 0 | 2025-02-01 00:42:13.249000 | 6641965456052712871 |\n", + "| Row(ad_id=10) | 0 | 16 | 4 | 5286 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:31.587000 | 4059346813755015811 |\n", + "| Row(ad_id=3) | 0 | 15 | 1 | 1543 | 0 | 0 | 0 | 0 | 2025-02-01 00:42:13.249000 | 6641965456052712871 |\n", + "| Row(ad_id=49) | 0 | 15 | 4 | 5359 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:43.138000 | 1865904111199103577 |\n", + "| Row(ad_id=5) | 0 | 14 | 1 | 1526 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:08.813000 | 2155865929954566188 |\n", + "| Row(ad_id=2) | 0 | 14 | 3 | 4105 | 0 | 0 | 0 | 0 | 2025-02-01 00:42:06.311000 | 827301497454031138 |\n", + "| Row(ad_id=41) | 0 | 13 | 4 | 5253 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:41.144000 | 1472536140048912459 |\n", + "| Row(ad_id=34) | 0 | 13 | 1 | 1508 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:08.813000 | 2155865929954566188 |\n", + "| Row(ad_id=60) | 0 | 13 | 3 | 4007 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:17.518000 | 3047889973353044630 |\n", + "+---------------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+----------------------------+--------------------------+" ] }, - "execution_count": 6, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -282,11 +497,58 @@ "%%sql\n", "\n", "SELECT * \n", - "FROM orders.payments.partitions\n", + "FROM marketing.ad_clicks.partitions\n", "ORDER BY record_count DESC\n", "LIMIT 10" ] }, + { + "cell_type": "code", + "execution_count": 13, + "id": "89c67f0b-6bed-44fe-9c3d-99dda30477a9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rewritten_data_files_countadded_data_files_countrewritten_bytes_countfailed_data_files_count
6111784090
" + ], + "text/plain": [ + "+----------------------------+------------------------+-----------------------+-------------------------+\n", + "| rewritten_data_files_count | added_data_files_count | rewritten_bytes_count | failed_data_files_count |\n", + "+----------------------------+------------------------+-----------------------+-------------------------+\n", + "| 61 | 11 | 78409 | 0 |\n", + "+----------------------------+------------------------+-----------------------+-------------------------+" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql\n", + "\n", + "CALL system.rewrite_data_files(table => 'marketing.ad_clicks')\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -294,6 +556,14 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80a37e69-713f-418a-9f19-f154f00408aa", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/spark/create_table.py b/spark/create_table.py index 9252574..5f9d1cc 100644 --- a/spark/create_table.py +++ b/spark/create_table.py @@ -3,18 +3,16 @@ from pyspark.sql import SparkSession spark = SparkSession.builder.appName("").getOrCreate() print("creating database") -spark.sql('CREATE DATABASE IF NOT EXISTS orders') +spark.sql('CREATE DATABASE IF NOT EXISTS marketing') print("creating table") spark.sql(''' - CREATE TABLE IF NOT EXISTS orders.payments ( - id STRING, - type STRING, - created_at TIMESTAMP, - document STRING, - payer STRING, - amount INT + CREATE TABLE IF NOT EXISTS marketing.ad_clicks ( + user_id INT, + ad_id INT, + click_timestamp TIMESTAMP, + impression_timestamp TIMESTAMP ) USING iceberg - PARTITIONED BY (document) + PARTITIONED BY (ad_id) ''')