591 lines
21 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "29f8d24e-e4bf-484d-afd4-cb82ff6cd50d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"25/02/01 00:22:25 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>namespace</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>marketing</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+-----------+\n",
"| namespace |\n",
"+-----------+\n",
"| marketing |\n",
"+-----------+"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SHOW DATABASES"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "70349765-e5f1-43a5-a141-cc2d54c69a58",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"25/02/01 00:37:06 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>namespace</th>\n",
" <th>tableName</th>\n",
" <th>isTemporary</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>marketing</td>\n",
" <td>ad_clicks</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+-----------+-----------+-------------+\n",
"| namespace | tableName | isTemporary |\n",
"+-----------+-----------+-------------+\n",
"| marketing | ad_clicks | False |\n",
"+-----------+-----------+-------------+"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SHOW TABLES FROM marketing"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "fabaed9c-9049-4996-9d26-b20f66303911",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>key</th>\n",
" <th>value</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>current-snapshot-id</td>\n",
" <td>6641965456052712871</td>\n",
" </tr>\n",
" <tr>\n",
" <td>format</td>\n",
" <td>iceberg/parquet</td>\n",
" </tr>\n",
" <tr>\n",
" <td>format-version</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>write.parquet.compression-codec</td>\n",
" <td>zstd</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+---------------------------------+---------------------+\n",
"| key | value |\n",
"+---------------------------------+---------------------+\n",
"| current-snapshot-id | 6641965456052712871 |\n",
"| format | iceberg/parquet |\n",
"| format-version | 2 |\n",
"| write.parquet.compression-codec | zstd |\n",
"+---------------------------------+---------------------+"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SHOW TBLPROPERTIES marketing.ad_clicks"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "6317d9c6-140e-4a63-890e-2173fbb9503e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>count(1)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>637</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+----------+\n",
"| count(1) |\n",
"+----------+\n",
"| 637 |\n",
"+----------+"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SELECT COUNT(*)\n",
"FROM marketing.ad_clicks"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "2a1ff132-dc65-4943-a9be-416ba5a13c26",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>user_id</th>\n",
" <th>ad_id</th>\n",
" <th>click_timestamp</th>\n",
" <th>impression_timestamp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>21</td>\n",
" <td>51</td>\n",
" <td>2025-02-01 00:42:09</td>\n",
" <td>2025-02-01 00:42:09</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17</td>\n",
" <td>55</td>\n",
" <td>2025-02-01 00:42:10</td>\n",
" <td>2025-02-01 00:42:09</td>\n",
" </tr>\n",
" <tr>\n",
" <td>31</td>\n",
" <td>32</td>\n",
" <td>2025-02-01 00:42:10</td>\n",
" <td>2025-02-01 00:42:10</td>\n",
" </tr>\n",
" <tr>\n",
" <td>63</td>\n",
" <td>59</td>\n",
" <td>2025-02-01 00:42:04</td>\n",
" <td>2025-02-01 00:42:03</td>\n",
" </tr>\n",
" <tr>\n",
" <td>60</td>\n",
" <td>29</td>\n",
" <td>2025-02-01 00:42:04</td>\n",
" <td>2025-02-01 00:42:04</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>31</td>\n",
" <td>2025-02-01 00:42:03</td>\n",
" <td>2025-02-01 00:42:03</td>\n",
" </tr>\n",
" <tr>\n",
" <td>52</td>\n",
" <td>18</td>\n",
" <td>2025-02-01 00:41:25</td>\n",
" <td>2025-02-01 00:41:25</td>\n",
" </tr>\n",
" <tr>\n",
" <td>32</td>\n",
" <td>27</td>\n",
" <td>2025-02-01 00:41:52</td>\n",
" <td>2025-02-01 00:41:51</td>\n",
" </tr>\n",
" <tr>\n",
" <td>10</td>\n",
" <td>6</td>\n",
" <td>2025-02-01 00:41:46</td>\n",
" <td>2025-02-01 00:41:46</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17</td>\n",
" <td>13</td>\n",
" <td>2025-02-01 00:40:30</td>\n",
" <td>2025-02-01 00:40:29</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+---------+-------+---------------------+----------------------+\n",
"| user_id | ad_id | click_timestamp | impression_timestamp |\n",
"+---------+-------+---------------------+----------------------+\n",
"| 21 | 51 | 2025-02-01 00:42:09 | 2025-02-01 00:42:09 |\n",
"| 17 | 55 | 2025-02-01 00:42:10 | 2025-02-01 00:42:09 |\n",
"| 31 | 32 | 2025-02-01 00:42:10 | 2025-02-01 00:42:10 |\n",
"| 63 | 59 | 2025-02-01 00:42:04 | 2025-02-01 00:42:03 |\n",
"| 60 | 29 | 2025-02-01 00:42:04 | 2025-02-01 00:42:04 |\n",
"| 6 | 31 | 2025-02-01 00:42:03 | 2025-02-01 00:42:03 |\n",
"| 52 | 18 | 2025-02-01 00:41:25 | 2025-02-01 00:41:25 |\n",
"| 32 | 27 | 2025-02-01 00:41:52 | 2025-02-01 00:41:51 |\n",
"| 10 | 6 | 2025-02-01 00:41:46 | 2025-02-01 00:41:46 |\n",
"| 17 | 13 | 2025-02-01 00:40:30 | 2025-02-01 00:40:29 |\n",
"+---------+-------+---------------------+----------------------+"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SELECT *\n",
"FROM marketing.ad_clicks\n",
"LIMIT 10"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "a2688a95-594c-45ad-9d49-70a1bcd59a1b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>partition</th>\n",
" <th>spec_id</th>\n",
" <th>record_count</th>\n",
" <th>file_count</th>\n",
" <th>total_data_file_size_in_bytes</th>\n",
" <th>position_delete_record_count</th>\n",
" <th>position_delete_file_count</th>\n",
" <th>equality_delete_record_count</th>\n",
" <th>equality_delete_file_count</th>\n",
" <th>last_updated_at</th>\n",
" <th>last_updated_snapshot_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>Row(ad_id=42)</td>\n",
" <td>0</td>\n",
" <td>19</td>\n",
" <td>4</td>\n",
" <td>5429</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2025-02-01 00:41:49.202000</td>\n",
" <td>7965471739473975852</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Row(ad_id=38)</td>\n",
" <td>0</td>\n",
" <td>17</td>\n",
" <td>1</td>\n",
" <td>1582</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2025-02-01 00:42:13.249000</td>\n",
" <td>6641965456052712871</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Row(ad_id=10)</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>4</td>\n",
" <td>5286</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2025-02-01 00:41:31.587000</td>\n",
" <td>4059346813755015811</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Row(ad_id=3)</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" <td>1</td>\n",
" <td>1543</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2025-02-01 00:42:13.249000</td>\n",
" <td>6641965456052712871</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Row(ad_id=49)</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" <td>4</td>\n",
" <td>5359</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2025-02-01 00:41:43.138000</td>\n",
" <td>1865904111199103577</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Row(ad_id=5)</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>1</td>\n",
" <td>1526</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2025-02-01 00:41:08.813000</td>\n",
" <td>2155865929954566188</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Row(ad_id=2)</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>3</td>\n",
" <td>4105</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2025-02-01 00:42:06.311000</td>\n",
" <td>827301497454031138</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Row(ad_id=41)</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>4</td>\n",
" <td>5253</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2025-02-01 00:41:41.144000</td>\n",
" <td>1472536140048912459</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Row(ad_id=34)</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>1</td>\n",
" <td>1508</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2025-02-01 00:41:08.813000</td>\n",
" <td>2155865929954566188</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Row(ad_id=60)</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>3</td>\n",
" <td>4007</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2025-02-01 00:41:17.518000</td>\n",
" <td>3047889973353044630</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+---------------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+----------------------------+--------------------------+\n",
"| partition | spec_id | record_count | file_count | total_data_file_size_in_bytes | position_delete_record_count | position_delete_file_count | equality_delete_record_count | equality_delete_file_count | last_updated_at | last_updated_snapshot_id |\n",
"+---------------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+----------------------------+--------------------------+\n",
"| Row(ad_id=42) | 0 | 19 | 4 | 5429 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:49.202000 | 7965471739473975852 |\n",
"| Row(ad_id=38) | 0 | 17 | 1 | 1582 | 0 | 0 | 0 | 0 | 2025-02-01 00:42:13.249000 | 6641965456052712871 |\n",
"| Row(ad_id=10) | 0 | 16 | 4 | 5286 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:31.587000 | 4059346813755015811 |\n",
"| Row(ad_id=3) | 0 | 15 | 1 | 1543 | 0 | 0 | 0 | 0 | 2025-02-01 00:42:13.249000 | 6641965456052712871 |\n",
"| Row(ad_id=49) | 0 | 15 | 4 | 5359 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:43.138000 | 1865904111199103577 |\n",
"| Row(ad_id=5) | 0 | 14 | 1 | 1526 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:08.813000 | 2155865929954566188 |\n",
"| Row(ad_id=2) | 0 | 14 | 3 | 4105 | 0 | 0 | 0 | 0 | 2025-02-01 00:42:06.311000 | 827301497454031138 |\n",
"| Row(ad_id=41) | 0 | 13 | 4 | 5253 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:41.144000 | 1472536140048912459 |\n",
"| Row(ad_id=34) | 0 | 13 | 1 | 1508 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:08.813000 | 2155865929954566188 |\n",
"| Row(ad_id=60) | 0 | 13 | 3 | 4007 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:17.518000 | 3047889973353044630 |\n",
"+---------------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+----------------------------+--------------------------+"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"SELECT * \n",
"FROM marketing.ad_clicks.partitions\n",
"ORDER BY record_count DESC\n",
"LIMIT 10"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "89c67f0b-6bed-44fe-9c3d-99dda30477a9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <thead>\n",
" <tr>\n",
" <th>rewritten_data_files_count</th>\n",
" <th>added_data_files_count</th>\n",
" <th>rewritten_bytes_count</th>\n",
" <th>failed_data_files_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>61</td>\n",
" <td>11</td>\n",
" <td>78409</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"+----------------------------+------------------------+-----------------------+-------------------------+\n",
"| rewritten_data_files_count | added_data_files_count | rewritten_bytes_count | failed_data_files_count |\n",
"+----------------------------+------------------------+-----------------------+-------------------------+\n",
"| 61 | 11 | 78409 | 0 |\n",
"+----------------------------+------------------------+-----------------------+-------------------------+"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"\n",
"CALL system.rewrite_data_files(table => 'marketing.ad_clicks')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "72b17cc4-6c92-47b7-9b86-ae6a7f73af4d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "80a37e69-713f-418a-9f19-f154f00408aa",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}