{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "29f8d24e-e4bf-484d-afd4-cb82ff6cd50d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"25/02/01 00:22:25 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n"
]
},
{
"data": {
"text/html": [
"
\n",
" \n",
" \n",
" namespace | \n",
"
\n",
" \n",
" \n",
" \n",
" marketing | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
"+-----------+\n",
"| namespace |\n",
"+-----------+\n",
"| marketing |\n",
"+-----------+"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SHOW DATABASES"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "70349765-e5f1-43a5-a141-cc2d54c69a58",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"25/02/01 00:37:06 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" namespace | \n",
" tableName | \n",
" isTemporary | \n",
"
\n",
" \n",
" \n",
" \n",
" marketing | \n",
" ad_clicks | \n",
" False | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
"+-----------+-----------+-------------+\n",
"| namespace | tableName | isTemporary |\n",
"+-----------+-----------+-------------+\n",
"| marketing | ad_clicks | False |\n",
"+-----------+-----------+-------------+"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SHOW TABLES FROM marketing"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "fabaed9c-9049-4996-9d26-b20f66303911",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" key | \n",
" value | \n",
"
\n",
" \n",
" \n",
" \n",
" current-snapshot-id | \n",
" 6641965456052712871 | \n",
"
\n",
" \n",
" format | \n",
" iceberg/parquet | \n",
"
\n",
" \n",
" format-version | \n",
" 2 | \n",
"
\n",
" \n",
" write.parquet.compression-codec | \n",
" zstd | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
"+---------------------------------+---------------------+\n",
"| key | value |\n",
"+---------------------------------+---------------------+\n",
"| current-snapshot-id | 6641965456052712871 |\n",
"| format | iceberg/parquet |\n",
"| format-version | 2 |\n",
"| write.parquet.compression-codec | zstd |\n",
"+---------------------------------+---------------------+"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SHOW TBLPROPERTIES marketing.ad_clicks"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "6317d9c6-140e-4a63-890e-2173fbb9503e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" count(1) | \n",
"
\n",
" \n",
" \n",
" \n",
" 637 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
"+----------+\n",
"| count(1) |\n",
"+----------+\n",
"| 637 |\n",
"+----------+"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SELECT COUNT(*)\n",
"FROM marketing.ad_clicks"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "2a1ff132-dc65-4943-a9be-416ba5a13c26",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" user_id | \n",
" ad_id | \n",
" click_timestamp | \n",
" impression_timestamp | \n",
"
\n",
" \n",
" \n",
" \n",
" 21 | \n",
" 51 | \n",
" 2025-02-01 00:42:09 | \n",
" 2025-02-01 00:42:09 | \n",
"
\n",
" \n",
" 17 | \n",
" 55 | \n",
" 2025-02-01 00:42:10 | \n",
" 2025-02-01 00:42:09 | \n",
"
\n",
" \n",
" 31 | \n",
" 32 | \n",
" 2025-02-01 00:42:10 | \n",
" 2025-02-01 00:42:10 | \n",
"
\n",
" \n",
" 63 | \n",
" 59 | \n",
" 2025-02-01 00:42:04 | \n",
" 2025-02-01 00:42:03 | \n",
"
\n",
" \n",
" 60 | \n",
" 29 | \n",
" 2025-02-01 00:42:04 | \n",
" 2025-02-01 00:42:04 | \n",
"
\n",
" \n",
" 6 | \n",
" 31 | \n",
" 2025-02-01 00:42:03 | \n",
" 2025-02-01 00:42:03 | \n",
"
\n",
" \n",
" 52 | \n",
" 18 | \n",
" 2025-02-01 00:41:25 | \n",
" 2025-02-01 00:41:25 | \n",
"
\n",
" \n",
" 32 | \n",
" 27 | \n",
" 2025-02-01 00:41:52 | \n",
" 2025-02-01 00:41:51 | \n",
"
\n",
" \n",
" 10 | \n",
" 6 | \n",
" 2025-02-01 00:41:46 | \n",
" 2025-02-01 00:41:46 | \n",
"
\n",
" \n",
" 17 | \n",
" 13 | \n",
" 2025-02-01 00:40:30 | \n",
" 2025-02-01 00:40:29 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
"+---------+-------+---------------------+----------------------+\n",
"| user_id | ad_id | click_timestamp | impression_timestamp |\n",
"+---------+-------+---------------------+----------------------+\n",
"| 21 | 51 | 2025-02-01 00:42:09 | 2025-02-01 00:42:09 |\n",
"| 17 | 55 | 2025-02-01 00:42:10 | 2025-02-01 00:42:09 |\n",
"| 31 | 32 | 2025-02-01 00:42:10 | 2025-02-01 00:42:10 |\n",
"| 63 | 59 | 2025-02-01 00:42:04 | 2025-02-01 00:42:03 |\n",
"| 60 | 29 | 2025-02-01 00:42:04 | 2025-02-01 00:42:04 |\n",
"| 6 | 31 | 2025-02-01 00:42:03 | 2025-02-01 00:42:03 |\n",
"| 52 | 18 | 2025-02-01 00:41:25 | 2025-02-01 00:41:25 |\n",
"| 32 | 27 | 2025-02-01 00:41:52 | 2025-02-01 00:41:51 |\n",
"| 10 | 6 | 2025-02-01 00:41:46 | 2025-02-01 00:41:46 |\n",
"| 17 | 13 | 2025-02-01 00:40:30 | 2025-02-01 00:40:29 |\n",
"+---------+-------+---------------------+----------------------+"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SELECT *\n",
"FROM marketing.ad_clicks\n",
"LIMIT 10"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "a2688a95-594c-45ad-9d49-70a1bcd59a1b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" partition | \n",
" spec_id | \n",
" record_count | \n",
" file_count | \n",
" total_data_file_size_in_bytes | \n",
" position_delete_record_count | \n",
" position_delete_file_count | \n",
" equality_delete_record_count | \n",
" equality_delete_file_count | \n",
" last_updated_at | \n",
" last_updated_snapshot_id | \n",
"
\n",
" \n",
" \n",
" \n",
" Row(ad_id=42) | \n",
" 0 | \n",
" 19 | \n",
" 4 | \n",
" 5429 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 2025-02-01 00:41:49.202000 | \n",
" 7965471739473975852 | \n",
"
\n",
" \n",
" Row(ad_id=38) | \n",
" 0 | \n",
" 17 | \n",
" 1 | \n",
" 1582 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 2025-02-01 00:42:13.249000 | \n",
" 6641965456052712871 | \n",
"
\n",
" \n",
" Row(ad_id=10) | \n",
" 0 | \n",
" 16 | \n",
" 4 | \n",
" 5286 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 2025-02-01 00:41:31.587000 | \n",
" 4059346813755015811 | \n",
"
\n",
" \n",
" Row(ad_id=3) | \n",
" 0 | \n",
" 15 | \n",
" 1 | \n",
" 1543 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 2025-02-01 00:42:13.249000 | \n",
" 6641965456052712871 | \n",
"
\n",
" \n",
" Row(ad_id=49) | \n",
" 0 | \n",
" 15 | \n",
" 4 | \n",
" 5359 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 2025-02-01 00:41:43.138000 | \n",
" 1865904111199103577 | \n",
"
\n",
" \n",
" Row(ad_id=5) | \n",
" 0 | \n",
" 14 | \n",
" 1 | \n",
" 1526 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 2025-02-01 00:41:08.813000 | \n",
" 2155865929954566188 | \n",
"
\n",
" \n",
" Row(ad_id=2) | \n",
" 0 | \n",
" 14 | \n",
" 3 | \n",
" 4105 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 2025-02-01 00:42:06.311000 | \n",
" 827301497454031138 | \n",
"
\n",
" \n",
" Row(ad_id=41) | \n",
" 0 | \n",
" 13 | \n",
" 4 | \n",
" 5253 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 2025-02-01 00:41:41.144000 | \n",
" 1472536140048912459 | \n",
"
\n",
" \n",
" Row(ad_id=34) | \n",
" 0 | \n",
" 13 | \n",
" 1 | \n",
" 1508 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 2025-02-01 00:41:08.813000 | \n",
" 2155865929954566188 | \n",
"
\n",
" \n",
" Row(ad_id=60) | \n",
" 0 | \n",
" 13 | \n",
" 3 | \n",
" 4007 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 2025-02-01 00:41:17.518000 | \n",
" 3047889973353044630 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
"+---------------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+----------------------------+--------------------------+\n",
"| partition | spec_id | record_count | file_count | total_data_file_size_in_bytes | position_delete_record_count | position_delete_file_count | equality_delete_record_count | equality_delete_file_count | last_updated_at | last_updated_snapshot_id |\n",
"+---------------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+----------------------------+--------------------------+\n",
"| Row(ad_id=42) | 0 | 19 | 4 | 5429 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:49.202000 | 7965471739473975852 |\n",
"| Row(ad_id=38) | 0 | 17 | 1 | 1582 | 0 | 0 | 0 | 0 | 2025-02-01 00:42:13.249000 | 6641965456052712871 |\n",
"| Row(ad_id=10) | 0 | 16 | 4 | 5286 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:31.587000 | 4059346813755015811 |\n",
"| Row(ad_id=3) | 0 | 15 | 1 | 1543 | 0 | 0 | 0 | 0 | 2025-02-01 00:42:13.249000 | 6641965456052712871 |\n",
"| Row(ad_id=49) | 0 | 15 | 4 | 5359 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:43.138000 | 1865904111199103577 |\n",
"| Row(ad_id=5) | 0 | 14 | 1 | 1526 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:08.813000 | 2155865929954566188 |\n",
"| Row(ad_id=2) | 0 | 14 | 3 | 4105 | 0 | 0 | 0 | 0 | 2025-02-01 00:42:06.311000 | 827301497454031138 |\n",
"| Row(ad_id=41) | 0 | 13 | 4 | 5253 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:41.144000 | 1472536140048912459 |\n",
"| Row(ad_id=34) | 0 | 13 | 1 | 1508 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:08.813000 | 2155865929954566188 |\n",
"| Row(ad_id=60) | 0 | 13 | 3 | 4007 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:17.518000 | 3047889973353044630 |\n",
"+---------------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+----------------------------+--------------------------+"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SELECT * \n",
"FROM marketing.ad_clicks.partitions\n",
"ORDER BY record_count DESC\n",
"LIMIT 10"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "89c67f0b-6bed-44fe-9c3d-99dda30477a9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" rewritten_data_files_count | \n",
" added_data_files_count | \n",
" rewritten_bytes_count | \n",
" failed_data_files_count | \n",
"
\n",
" \n",
" \n",
" \n",
" 61 | \n",
" 11 | \n",
" 78409 | \n",
" 0 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
"+----------------------------+------------------------+-----------------------+-------------------------+\n",
"| rewritten_data_files_count | added_data_files_count | rewritten_bytes_count | failed_data_files_count |\n",
"+----------------------------+------------------------+-----------------------+-------------------------+\n",
"| 61 | 11 | 78409 | 0 |\n",
"+----------------------------+------------------------+-----------------------+-------------------------+"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"CALL system.rewrite_data_files(table => 'marketing.ad_clicks')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "72b17cc4-6c92-47b7-9b86-ae6a7f73af4d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "80a37e69-713f-418a-9f19-f154f00408aa",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}