{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "29f8d24e-e4bf-484d-afd4-cb82ff6cd50d", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "25/02/01 00:22:25 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namespace
marketing
" ], "text/plain": [ "+-----------+\n", "| namespace |\n", "+-----------+\n", "| marketing |\n", "+-----------+" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%sql\n", "\n", "SHOW DATABASES" ] }, { "cell_type": "code", "execution_count": 1, "id": "70349765-e5f1-43a5-a141-cc2d54c69a58", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "25/02/01 00:37:06 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namespacetableNameisTemporary
marketingad_clicksFalse
" ], "text/plain": [ "+-----------+-----------+-------------+\n", "| namespace | tableName | isTemporary |\n", "+-----------+-----------+-------------+\n", "| marketing | ad_clicks | False |\n", "+-----------+-----------+-------------+" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%sql\n", "\n", "SHOW TABLES FROM marketing" ] }, { "cell_type": "code", "execution_count": 31, "id": "fabaed9c-9049-4996-9d26-b20f66303911", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvalue
current-snapshot-id6641965456052712871
formaticeberg/parquet
format-version2
write.parquet.compression-codeczstd
" ], "text/plain": [ "+---------------------------------+---------------------+\n", "| key | value |\n", "+---------------------------------+---------------------+\n", "| current-snapshot-id | 6641965456052712871 |\n", "| format | iceberg/parquet |\n", "| format-version | 2 |\n", "| write.parquet.compression-codec | zstd |\n", "+---------------------------------+---------------------+" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%sql\n", "\n", "SHOW TBLPROPERTIES marketing.ad_clicks" ] }, { "cell_type": "code", "execution_count": 33, "id": "6317d9c6-140e-4a63-890e-2173fbb9503e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
count(1)
637
" ], "text/plain": [ "+----------+\n", "| count(1) |\n", "+----------+\n", "| 637 |\n", "+----------+" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%sql\n", "\n", "SELECT COUNT(*)\n", "FROM marketing.ad_clicks" ] }, { "cell_type": "code", "execution_count": 22, "id": "2a1ff132-dc65-4943-a9be-416ba5a13c26", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idad_idclick_timestampimpression_timestamp
21512025-02-01 00:42:092025-02-01 00:42:09
17552025-02-01 00:42:102025-02-01 00:42:09
31322025-02-01 00:42:102025-02-01 00:42:10
63592025-02-01 00:42:042025-02-01 00:42:03
60292025-02-01 00:42:042025-02-01 00:42:04
6312025-02-01 00:42:032025-02-01 00:42:03
52182025-02-01 00:41:252025-02-01 00:41:25
32272025-02-01 00:41:522025-02-01 00:41:51
1062025-02-01 00:41:462025-02-01 00:41:46
17132025-02-01 00:40:302025-02-01 00:40:29
" ], "text/plain": [ "+---------+-------+---------------------+----------------------+\n", "| user_id | ad_id | click_timestamp | impression_timestamp |\n", "+---------+-------+---------------------+----------------------+\n", "| 21 | 51 | 2025-02-01 00:42:09 | 2025-02-01 00:42:09 |\n", "| 17 | 55 | 2025-02-01 00:42:10 | 2025-02-01 00:42:09 |\n", "| 31 | 32 | 2025-02-01 00:42:10 | 2025-02-01 00:42:10 |\n", "| 63 | 59 | 2025-02-01 00:42:04 | 2025-02-01 00:42:03 |\n", "| 60 | 29 | 2025-02-01 00:42:04 | 2025-02-01 00:42:04 |\n", "| 6 | 31 | 2025-02-01 00:42:03 | 2025-02-01 00:42:03 |\n", "| 52 | 18 | 2025-02-01 00:41:25 | 2025-02-01 00:41:25 |\n", "| 32 | 27 | 2025-02-01 00:41:52 | 2025-02-01 00:41:51 |\n", "| 10 | 6 | 2025-02-01 00:41:46 | 2025-02-01 00:41:46 |\n", "| 17 | 13 | 2025-02-01 00:40:30 | 2025-02-01 00:40:29 |\n", "+---------+-------+---------------------+----------------------+" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%sql\n", "\n", "SELECT *\n", "FROM marketing.ad_clicks\n", "LIMIT 10" ] }, { "cell_type": "code", "execution_count": 19, "id": "a2688a95-594c-45ad-9d49-70a1bcd59a1b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
partitionspec_idrecord_countfile_counttotal_data_file_size_in_bytesposition_delete_record_countposition_delete_file_countequality_delete_record_countequality_delete_file_countlast_updated_atlast_updated_snapshot_id
Row(ad_id=42)0194542900002025-02-01 00:41:49.2020007965471739473975852
Row(ad_id=38)0171158200002025-02-01 00:42:13.2490006641965456052712871
Row(ad_id=10)0164528600002025-02-01 00:41:31.5870004059346813755015811
Row(ad_id=3)0151154300002025-02-01 00:42:13.2490006641965456052712871
Row(ad_id=49)0154535900002025-02-01 00:41:43.1380001865904111199103577
Row(ad_id=5)0141152600002025-02-01 00:41:08.8130002155865929954566188
Row(ad_id=2)0143410500002025-02-01 00:42:06.311000827301497454031138
Row(ad_id=41)0134525300002025-02-01 00:41:41.1440001472536140048912459
Row(ad_id=34)0131150800002025-02-01 00:41:08.8130002155865929954566188
Row(ad_id=60)0133400700002025-02-01 00:41:17.5180003047889973353044630
" ], "text/plain": [ "+---------------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+----------------------------+--------------------------+\n", "| partition | spec_id | record_count | file_count | total_data_file_size_in_bytes | position_delete_record_count | position_delete_file_count | equality_delete_record_count | equality_delete_file_count | last_updated_at | last_updated_snapshot_id |\n", "+---------------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+----------------------------+--------------------------+\n", "| Row(ad_id=42) | 0 | 19 | 4 | 5429 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:49.202000 | 7965471739473975852 |\n", "| Row(ad_id=38) | 0 | 17 | 1 | 1582 | 0 | 0 | 0 | 0 | 2025-02-01 00:42:13.249000 | 6641965456052712871 |\n", "| Row(ad_id=10) | 0 | 16 | 4 | 5286 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:31.587000 | 4059346813755015811 |\n", "| Row(ad_id=3) | 0 | 15 | 1 | 1543 | 0 | 0 | 0 | 0 | 2025-02-01 00:42:13.249000 | 6641965456052712871 |\n", "| Row(ad_id=49) | 0 | 15 | 4 | 5359 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:43.138000 | 1865904111199103577 |\n", "| Row(ad_id=5) | 0 | 14 | 1 | 1526 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:08.813000 | 2155865929954566188 |\n", "| Row(ad_id=2) | 0 | 14 | 3 | 4105 | 0 | 0 | 0 | 0 | 2025-02-01 00:42:06.311000 | 827301497454031138 |\n", "| Row(ad_id=41) | 0 | 13 | 4 | 5253 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:41.144000 | 1472536140048912459 |\n", "| Row(ad_id=34) | 0 | 13 | 1 | 1508 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:08.813000 | 2155865929954566188 |\n", "| Row(ad_id=60) | 0 | 13 | 3 | 4007 | 0 | 0 | 0 | 0 | 2025-02-01 00:41:17.518000 | 3047889973353044630 |\n", "+---------------+---------+--------------+------------+-------------------------------+------------------------------+----------------------------+------------------------------+----------------------------+----------------------------+--------------------------+" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%sql\n", "\n", "SELECT * \n", "FROM marketing.ad_clicks.partitions\n", "ORDER BY record_count DESC\n", "LIMIT 10" ] }, { "cell_type": "code", "execution_count": 13, "id": "89c67f0b-6bed-44fe-9c3d-99dda30477a9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rewritten_data_files_countadded_data_files_countrewritten_bytes_countfailed_data_files_count
6111784090
" ], "text/plain": [ "+----------------------------+------------------------+-----------------------+-------------------------+\n", "| rewritten_data_files_count | added_data_files_count | rewritten_bytes_count | failed_data_files_count |\n", "+----------------------------+------------------------+-----------------------+-------------------------+\n", "| 61 | 11 | 78409 | 0 |\n", "+----------------------------+------------------------+-----------------------+-------------------------+" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%sql\n", "\n", "CALL system.rewrite_data_files(table => 'marketing.ad_clicks')\n" ] }, { "cell_type": "code", "execution_count": null, "id": "72b17cc4-6c92-47b7-9b86-ae6a7f73af4d", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "80a37e69-713f-418a-9f19-f154f00408aa", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.18" } }, "nbformat": 4, "nbformat_minor": 5 }