feat: added starrocks

This commit is contained in:
Kasper Juul Hermansen 2025-02-01 00:28:53 +01:00
parent d27096896b
commit aa1cec9986
Signed by: kjuulh
SSH Key Fingerprint: SHA256:RjXh0p7U6opxnfd3ga/Y9TCo18FYlHFdSpRIV72S/QM
2 changed files with 413 additions and 95 deletions

View File

@ -1,4 +1,6 @@
services:
# Ingest
redpanda:
image: redpandadata/redpanda:v24.3.4
container_name: redpanda
@ -129,6 +131,7 @@ services:
- redpanda
- connect
# Buckets
minio:
image: minio/minio
hostname: minio
@ -159,6 +162,7 @@ services:
depends_on:
- minio
# Batch & Iceberg manipulation
spark-iceberg:
image: tabulario/spark-iceberg
hostname: spark-iceberg
@ -196,6 +200,7 @@ services:
- ./notebooks:/home/iceberg/notebooks/notebooks
command: ["echo \"$$SPARK_DEFAULTS\" > /opt/spark/conf/spark-defaults.conf && spark-submit /home/iceberg/scripts/create_table.py && notebook"]
# Catalog
rest:
image: tabulario/iceberg-rest
hostname: rest
@ -210,3 +215,99 @@ services:
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
- CATALOG_S3_ENDPOINT=http://minio:9000
- CATALOG_S3_PATH__STYLE__ACCESS=True
# Query layer
starrocks-fe:
image: starrocks/fe-ubuntu:3.3.5
hostname: starrocks-fe
container_name: starrocks-fe
restart: unless-stopped
user: root
command: |
bash /opt/starrocks/fe/bin/start_fe.sh --host_type FQDN
ports:
- 8030:8030
- 9020:9020
- 9030:9030
environment:
- AWS_ACCESS_KEY_ID=minioadmin
- AWS_SECRET_ACCESS_KEY=minioadmin
- AWS_REGION=eu-west-1
healthcheck:
test: 'mysql -u root -h starrocks-fe -P 9030 -e "SHOW FRONTENDS\G" |grep "Alive: true"'
interval: 10s
timeout: 5s
retries: 3
starrocks-init-tables:
image: starrocks/fe-ubuntu:3.3.5
hostname: starrocks-init-tables
container_name: starrocks-init-tables
user: root
restart: no
environment:
SETUP_SQL: |
CREATE EXTERNAL CATALOG 'iceberg'
COMMENT "Iceberg table"
PROPERTIES
(
"type"="iceberg",
"iceberg.catalog.type"="rest",
"iceberg.catalog.uri"="http://rest:8181",
"iceberg.catalog.warehouse"="warehouse",
"aws.s3.access_key"="minioadmin",
"aws.s3.secret_key"="minioadmin",
"aws.s3.endpoint"="http://minio:9000",
"aws.s3.enable_path_style_access"="true",
"client.factory"="com.starrocks.connector.iceberg.IcebergAwsClientFactory"
);
depends_on:
starrocks-fe:
condition: service_healthy
command: |
bash -c '
until mysql -P 9030 -h starrocks-fe -u root -e "SELECT 1" >/dev/null 2>&1; do
echo "Waiting for StarRocks FE to be ready..."
sleep 5
done
if ! mysql -P 9030 -h starrocks-fe -u root -e "SHOW CATALOGS" | grep -q iceberg; then
echo "Creating Iceberg catalog..."
mysql -P 9030 -h starrocks-fe -u root -e "$$SETUP_SQL"
echo "Iceberg catalog created successfully"
else
echo "Iceberg catalog already exists"
fi
'
starrocks-be:
image: starrocks/be-ubuntu:3.3.5
command:
- /bin/bash
- -c
- |
ulimit -u 65535;
ulimit -n 65535;
echo "# Enable data cache" >> /opt/starrocks/be/conf/be.conf
echo "block_cache_enable = true" >> /opt/starrocks/be/conf/be.conf
echo "block_cache_mem_size = 536870912" >> /opt/starrocks/be/conf/be.conf
echo "block_cache_disk_size = 1073741824" >> /opt/starrocks/be/conf/be.conf
sleep 15s
mysql --connect-timeout 2 -h starrocks-fe -P 9030 -u root -e "ALTER SYSTEM ADD BACKEND \"starrocks-be:9050\";"
bash /opt/starrocks/be/bin/start_be.sh
ports:
- 8040:8040
hostname: starrocks-be
container_name: starrocks-be
user: root
restart: unless-stopped
depends_on:
- starrocks-fe
healthcheck:
test: 'mysql -u root -h starrocks-fe -P 9030 -e "SHOW BACKENDS\G" |grep "Alive: true"'
interval: 10s
timeout: 5s
retries: 3
environment:
- HOST_TYPE=FQDN

View File

@ -2,10 +2,46 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "29f8d24e-e4bf-484d-afd4-cb82ff6cd50d",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"25/01/31 22:57:42 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>namespace</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>orders</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+-----------+\n",
"| namespace |\n",
"+-----------+\n",
"| orders |\n",
"+-----------+"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
@ -14,10 +50,43 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "70349765-e5f1-43a5-a141-cc2d54c69a58",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>namespace</th>\n",
" <th>tableName</th>\n",
" <th>isTemporary</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>orders</td>\n",
" <td>payments</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+-----------+-----------+-------------+\n",
"| namespace | tableName | isTemporary |\n",
"+-----------+-----------+-------------+\n",
"| orders | payments | False |\n",
"+-----------+-----------+-------------+"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
@ -26,10 +95,56 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "fabaed9c-9049-4996-9d26-b20f66303911",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>key</th>\n",
" <th>value</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>current-snapshot-id</td>\n",
" <td>none</td>\n",
" </tr>\n",
" <tr>\n",
" <td>format</td>\n",
" <td>iceberg/parquet</td>\n",
" </tr>\n",
" <tr>\n",
" <td>format-version</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>write.parquet.compression-codec</td>\n",
" <td>zstd</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+---------------------------------+-----------------+\n",
"| key | value |\n",
"+---------------------------------+-----------------+\n",
"| current-snapshot-id | none |\n",
"| format | iceberg/parquet |\n",
"| format-version | 2 |\n",
"| write.parquet.compression-codec | zstd |\n",
"+---------------------------------+-----------------+"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
@ -38,10 +153,39 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"id": "6317d9c6-140e-4a63-890e-2173fbb9503e",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>count(1)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+----------+\n",
"| count(1) |\n",
"+----------+\n",
"| 0 |\n",
"+----------+"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
@ -51,10 +195,40 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "2a1ff132-dc65-4943-a9be-416ba5a13c26",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>id</th>\n",
" <th>type</th>\n",
" <th>created_at</th>\n",
" <th>document</th>\n",
" <th>payer</th>\n",
" <th>amount</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+----+------+------------+----------+-------+--------+\n",
"| id | type | created_at | document | payer | amount |\n",
"+----+------+------------+----------+-------+--------+\n",
"+----+------+------------+----------+-------+--------+"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
@ -65,10 +239,45 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "a2688a95-594c-45ad-9d49-70a1bcd59a1b",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>partition</th>\n",
" <th>spec_id</th>\n",
" <th>record_count</th>\n",
" <th>file_count</th>\n",
" <th>total_data_file_size_in_bytes</th>\n",
" <th>position_delete_record_count</th>\n",
" <th>position_delete_file_count</th>\n",
" <th>equality_delete_record_count</th>\n",
" <th>equality_delete_file_count</th>\n",
" <th>last_updated_at</th>\n",
" <th>last_updated_snapshot_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+-----------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+-----------------+--------------------------+\n",
"| partition | spec_id | record_count | file_count | total_data_file_size_in_bytes | position_delete_record_count | position_delete_file_count | equality_delete_record_count | equality_delete_file_count | last_updated_at | last_updated_snapshot_id |\n",
"+-----------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+-----------------+--------------------------+\n",
"+-----------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+-----------------+--------------------------+"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
@ -77,6 +286,14 @@
"ORDER BY record_count DESC\n",
"LIMIT 10"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "72b17cc4-6c92-47b7-9b86-ae6a7f73af4d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@ -95,7 +312,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
"version": "3.9.18"
}
},
"nbformat": 4,