kafka-ingest-concept/templates/docker-compose.iceberg.yaml

65 lines
1.9 KiB
YAML
Raw Normal View History

x-spark-common:
&spark-air
build:
context: .
dockerfile: spark.Dockerfile
target: spark
services:
spark:
<<: *spark-air
environment:
- SPARK_MODE=master
ports:
- '7077:7077'
configs:
- source: run_sql
target: /spark-script/run_sql.sh
mode: 0755
- source: create_table
target: /spark-script/create-table.sql
mode: 0755
- source: query_table
target: /spark-script/query-table.sql
mode: 0755
spark-worker:
<<: *spark-air
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark:7077
- SPARK_WORKER_MEMORY=1G
- SPARK_WORKER_CORES=1
configs:
run_sql:
content: |
set -ex
spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:1.1.0,org.apache.hadoop:hadoop-aws:3.3.2\
--master spark://spark:7077 \
--conf spark.sql.catalog.demo=org.apache.iceberg.spark.SparkCatalog \
--conf spark.sql.catalog.demo.type=hadoop \
--conf spark.sql.catalog.demo.warehouse=s3a://hummock001/iceberg-data \
--conf spark.sql.catalog.demo.hadoop.fs.s3a.endpoint=http://minio-0:9301 \
--conf spark.sql.catalog.demo.hadoop.fs.s3a.path.style.access=true \
--conf spark.sql.catalog.demo.hadoop.fs.s3a.access.key=hummockadmin \
--conf spark.sql.catalog.demo.hadoop.fs.s3a.secret.key=hummockadmin \
--conf spark.sql.defaultCatalog=demo \
-f /spark-script/$1.sql
query_table:
content: |
SELECT * from demo.demo_db.demo_table;
create_table:
content: |
drop table if exists demo.demo_db.demo_table;
CREATE TABLE demo.demo_db.demo_table
(
user_id bigint,
ad_id bigint,
click_timestamp timestamp,
impression_timestamp timestamp
) TBLPROPERTIES ('format-version'='2');
name: iceberg