services: # Ingest redpanda: image: redpandadata/redpanda:v24.3.4 container_name: redpanda ports: - "9092:9092" - "8081:8081" - "8082:8082" - "29092:29092" command: - redpanda - start - --overprovisioned - --smp - "1" - --memory - "1G" - --reserve-memory - "0M" - --node-id - "0" - --kafka-addr - PLAINTEXT://0.0.0.0:29092,OUTSIDE://0.0.0.0:9092 - --advertise-kafka-addr - PLAINTEXT://redpanda:29092,OUTSIDE://localhost:9092 - --check=false client_application: container_name: client_application build: context: ./client-application restart: unless-stopped environment: RUST_LOG: info command: - produce - --host=redpanda:29092 - --topic=event-stream - --delay-ms=500 depends_on: - connect connect: image: confluentinc/cp-kafka-connect-base:7.8.0 depends_on: - redpanda hostname: connect container_name: connect ports: - 8083:8083 environment: CONNECT_BOOTSTRAP_SERVERS: 'redpanda:29092' CONNECT_REST_ADVERTISED_HOST_NAME: "connect" CONNECT_REST_PORT: 8083 CONNECT_GROUP_ID: connect-cluster-group CONNECT_CONFIG_STORAGE_TOPIC: _kafka-connect-configs CONNECT_OFFSET_STORAGE_TOPIC: _kafka-connect-offsets CONNECT_STATUS_STORAGE_TOPIC: _kafka-connect-status CONNECT_KEY_CONVERTER: org.apache.kafka.connect.storage.StringConverter CONNECT_KEY_CONVERTER_SCHEMAS_ENABLE: "false" CONNECT_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter CONNECT_VALUE_CONVERTER_SCHEMAS_ENABLE: "false" CONNECT_INTERNAL_KEY_CONVERTER: "org.apache.kafka.connect.json.StringConverter" CONNECT_INTERNAL_KEY_CONVERTER_SCHEMAS_ENABLE: "false" CONNECT_INTERNAL_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" CONNECT_INTERNAL_VALUE_CONVERTER_SCHEMAS_ENABLE: "false" CONNECT_LOG4J_ROOT_LOGLEVEL: "INFO" CONNECT_LOG4J_LOGGERS: "org.apache.kafka.connect.runtime.rest=WARN,org.reflections=ERROR" CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: "1" CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: "1" CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: "1" CONNECT_PLUGIN_PATH: '/usr/share/java,/usr/share/confluent-hub-components/,/connectors/' AWS_ACCESS_KEY_ID: "minioadmin" AWS_SECRET_ACCESS_KEY: "minioadmin" command: - bash - -c - | # echo "Installing connector plugins" confluent-hub install --no-prompt tabular/iceberg-kafka-connect:0.4.11 # echo "Launching Kafka Connect worker" /etc/confluent/docker/run & # echo "Waiting for Kafka Connect to start listening on localhost ⏳" while : ; do curl_status=$$(curl -s -o /dev/null -w %{http_code} http://localhost:8083/connectors) echo -e $$(date) " Kafka Connect listener HTTP state: " $$curl_status " (waiting for 200)" if [ $$curl_status -eq 200 ] ; then break fi sleep 5 done echo -e "\n--\n+> Creating connector" curl -X PUT \ -H 'Content-Type: application/json' \ -H 'Accept: application/json' http://localhost:8083/connectors/IcebergSinkConnector/config \ -d '{ "tasks.max": "1", "topics": "event-stream", "connector.class": "io.tabular.iceberg.connect.IcebergSinkConnector", "iceberg.catalog.s3.endpoint": "http://minio:9000", "iceberg.catalog.s3.secret-access-key": "minioadmin", "iceberg.catalog.s3.access-key-id": "minioadmin", "iceberg.catalog.s3.path-style-access": "true", "iceberg.catalog.uri": "http://rest:8181", "iceberg.catalog.warehouse": "s3://warehouse/", "iceberg.catalog.client.region": "eu-west-1", "iceberg.catalog.type": "rest", "iceberg.control.commitIntervalMs": "1000", "iceberg.tables": "marketing.ad_clicks", "value.converter.schemas.enable": "false", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "schemas.enable": "false" }' sleep infinity console: image: redpandadata/console:v2.1.1 entrypoint: /bin/sh command: -c "echo \"$$CONSOLE_CONFIG_FILE\" > /tmp/config.yml; /app/console" environment: CONFIG_FILEPATH: /tmp/config.yml CONSOLE_CONFIG_FILE: | kafka: brokers: ["redpanda:29092"] schemaRegistry: enabled: true urls: ["http://redpanda:8081"] connect: enabled: true clusters: - name: local-connect-cluster url: http://connect:8083 redpanda: adminApi: enabled: true urls: ["http://redpanda:9644"] ports: - 18080:8080 depends_on: - redpanda - connect # Buckets minio: image: minio/minio hostname: minio container_name: minio environment: - MINIO_ROOT_USER=minioadmin - MINIO_ROOT_PASSWORD=minioadmin - MINIO_DOMAIN=minio networks: default: aliases: - warehouse.minio ports: - 9001:9001 - 9000:9000 command: ["server", "/data", "--console-address", ":9001"] aws: image: amazon/aws-cli container_name: aws-cli command: | -c "sleep 2 && \ aws --endpoint-url http://minio:9000 s3 mb s3://warehouse --region eu-west-1 || exit 0" entrypoint: [/bin/bash] environment: AWS_ACCESS_KEY_ID: "minioadmin" AWS_SECRET_ACCESS_KEY: "minioadmin" depends_on: - minio # Batch & Iceberg manipulation spark-iceberg: image: tabulario/spark-iceberg:3.5.1_1.5.0 hostname: spark-iceberg container_name: spark-iceberg build: spark/ depends_on: - rest - minio environment: AWS_ACCESS_KEY_ID: minioadmin AWS_SECRET_ACCESS_KEY: minioadmin AWS_REGION: eu-west-1 SPARK_DEFAULTS: | spark.sql.extensions org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions spark.sql.catalog.iceberg org.apache.iceberg.spark.SparkCatalog spark.sql.catalog.iceberg.catalog-impl org.apache.iceberg.rest.RESTCatalog spark.sql.catalog.iceberg.uri http://rest:8181 spark.sql.catalog.iceberg.io-impl org.apache.iceberg.aws.s3.S3FileIO spark.sql.catalog.iceberg.warehouse s3://warehouse/wh/ spark.sql.catalog.iceberg.s3.endpoint http://minio:9000 spark.sql.catalog.iceberg.s3.path-style-access true spark.sql.defaultCatalog iceberg spark.sql.catalogImplementation in-memory spark.eventLog.enabled true spark.eventLog.dir /home/iceberg/spark-events spark.history.fs.logDirectory /home/iceberg/spark-events spark.jars.packages org.apache.hadoop:hadoop-aws:3.2.0 ports: - 8888:8888 - 8080:8080 - 10000:10000 - 10001:10001 volumes: - ./spark:/home/iceberg/scripts - ./notebooks:/home/iceberg/notebooks/notebooks command: ["echo \"$$SPARK_DEFAULTS\" > /opt/spark/conf/spark-defaults.conf && spark-submit /home/iceberg/scripts/create_table.py && notebook"] # Catalog rest: image: tabulario/iceberg-rest hostname: rest container_name: rest ports: - 8181:8181 environment: - AWS_ACCESS_KEY_ID=minioadmin - AWS_SECRET_ACCESS_KEY=minioadmin - AWS_REGION=eu-west-1 - CATALOG_WAREHOUSE=s3://warehouse/ - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO - CATALOG_S3_ENDPOINT=http://minio:9000 - CATALOG_S3_PATH__STYLE__ACCESS=True # Query layer starrocks-fe: image: starrocks/fe-ubuntu:3.3.5 hostname: starrocks-fe container_name: starrocks-fe restart: unless-stopped user: root command: | bash /opt/starrocks/fe/bin/start_fe.sh --host_type FQDN ports: - 8030:8030 - 9020:9020 - 9030:9030 environment: - AWS_ACCESS_KEY_ID=minioadmin - AWS_SECRET_ACCESS_KEY=minioadmin - AWS_REGION=eu-west-1 healthcheck: test: 'mysql -u root -h starrocks-fe -P 9030 -e "SHOW FRONTENDS\G" |grep "Alive: true"' interval: 10s timeout: 5s retries: 3 starrocks-init-tables: image: starrocks/fe-ubuntu:3.3.5 hostname: starrocks-init-tables container_name: starrocks-init-tables user: root restart: no environment: SETUP_SQL: | CREATE EXTERNAL CATALOG 'iceberg' COMMENT "Iceberg table" PROPERTIES ( "type"="iceberg", "iceberg.catalog.type"="rest", "iceberg.catalog.uri"="http://rest:8181", "iceberg.catalog.warehouse"="warehouse", "aws.s3.access_key"="minioadmin", "aws.s3.secret_key"="minioadmin", "aws.s3.endpoint"="http://minio:9000", "aws.s3.enable_path_style_access"="true", "client.factory"="com.starrocks.connector.iceberg.IcebergAwsClientFactory" ); depends_on: starrocks-fe: condition: service_healthy command: | bash -c ' until mysql -P 9030 -h starrocks-fe -u root -e "SELECT 1" >/dev/null 2>&1; do echo "Waiting for StarRocks FE to be ready..." sleep 5 done if ! mysql -P 9030 -h starrocks-fe -u root -e "SHOW CATALOGS" | grep -q iceberg; then echo "Creating Iceberg catalog..." mysql -P 9030 -h starrocks-fe -u root -e "$$SETUP_SQL" echo "Iceberg catalog created successfully" else echo "Iceberg catalog already exists" fi ' starrocks-be: image: starrocks/be-ubuntu:3.3.5 command: - /bin/bash - -c - | ulimit -u 65535; ulimit -n 65535; echo "# Enable data cache" >> /opt/starrocks/be/conf/be.conf echo "block_cache_enable = true" >> /opt/starrocks/be/conf/be.conf echo "block_cache_mem_size = 536870912" >> /opt/starrocks/be/conf/be.conf echo "block_cache_disk_size = 1073741824" >> /opt/starrocks/be/conf/be.conf sleep 15s mysql --connect-timeout 2 -h starrocks-fe -P 9030 -u root -e "ALTER SYSTEM ADD BACKEND \"starrocks-be:9050\";" bash /opt/starrocks/be/bin/start_be.sh ports: - 8040:8040 hostname: starrocks-be container_name: starrocks-be user: root restart: unless-stopped depends_on: - starrocks-fe healthcheck: test: 'mysql -u root -h starrocks-fe -P 9030 -e "SHOW BACKENDS\G" |grep "Alive: true"' interval: 10s timeout: 5s retries: 3 environment: - HOST_TYPE=FQDN