Files
data_pipeline/clickhouse_task/create_table.py
T
Ankit Malik 7fbbffec65 first commit
2026-06-12 10:54:00 +05:30

55 lines
1.2 KiB
Python

import polars as pl
from sqlalchemy import text
from log import *
def create_clickhouse_table(
df: pl.DataFrame,
table_name: str,
clickhouse_engine
):
type_mapping = {
pl.Int8: "Nullable(Int8)",
pl.Int16: "Nullable(Int16)",
pl.Int32: "Nullable(Int32)",
pl.Int64: "Nullable(Int64)",
pl.UInt8: "Nullable(UInt8)",
pl.UInt16: "Nullable(UInt16)",
pl.UInt32: "Nullable(UInt32)",
pl.UInt64: "Nullable(UInt64)",
pl.Float32: "Nullable(Float32)",
pl.Float64: "Nullable(Float64)",
pl.Boolean: "Nullable(Bool)",
pl.String: "Nullable(String)",
pl.Date: "Nullable(Date)",
pl.Datetime: "Nullable(DateTime)",
}
columns = []
for col_name, dtype in df.schema.items():
clickhouse_type = type_mapping.get(
dtype,
"Nullable(String)"
)
columns.append(
f"`{col_name}` {clickhouse_type}"
)
create_sql = f"""
CREATE TABLE IF NOT EXISTS {table_name}
(
{', '.join(columns)}
)
ENGINE = MergeTree()
ORDER BY tuple()
"""
with clickhouse_engine.begin() as conn:
conn.execute(text(create_sql))
log.info(f"Table ready: {table_name}")