first commit
This commit is contained in:
@@ -0,0 +1,376 @@
|
|||||||
|
import pyodbc
|
||||||
|
import pandas as pd
|
||||||
|
import clickhouse_connect
|
||||||
|
import numpy as np
|
||||||
|
from datetime import datetime
|
||||||
|
import traceback
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Ignore Warning
|
||||||
|
# ---------------------------------------------------
|
||||||
|
warnings.filterwarnings(
|
||||||
|
'ignore',
|
||||||
|
'pandas only supports SQLAlchemy connectable'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("ETL Started :", datetime.now())
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# SQL SERVER CONNECTION
|
||||||
|
# ---------------------------------------------------
|
||||||
|
SQL_CONN_STR = (
|
||||||
|
'DRIVER={ODBC Driver 17 for SQL Server};'
|
||||||
|
'SERVER=10.200.25.65;'
|
||||||
|
'DATABASE=CPMIndiaBusinessInsight;'
|
||||||
|
'UID=bsgteam_test;'
|
||||||
|
'PWD=B$gt3@m#00512;'
|
||||||
|
'TrustServerCertificate=yes;'
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# CLICKHOUSE CONFIG
|
||||||
|
# ---------------------------------------------------
|
||||||
|
CH_CONFIG = {
|
||||||
|
'host': '172.188.12.194',
|
||||||
|
'port': 8123,
|
||||||
|
'username': 'default',
|
||||||
|
'password': 'dipanshu_k',
|
||||||
|
'database': 'DaburIndia_BI'
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# TABLE DETAILS
|
||||||
|
# ---------------------------------------------------
|
||||||
|
TABLE_NAME = 'Employee_Master'
|
||||||
|
PROJECT_ID = 41654
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# CLEAN DATAFRAME
|
||||||
|
# ---------------------------------------------------
|
||||||
|
def clean_dataframe(df):
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
# Replace NaN
|
||||||
|
df = df.replace({np.nan: None})
|
||||||
|
|
||||||
|
for col in df.columns:
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# HANDLE DATE COLUMNS
|
||||||
|
# ---------------------------------------------------
|
||||||
|
if 'date' in col.lower():
|
||||||
|
|
||||||
|
print(f"Cleaning Date Column : {col}")
|
||||||
|
|
||||||
|
# Convert to datetime
|
||||||
|
df[col] = pd.to_datetime(
|
||||||
|
df[col],
|
||||||
|
errors='coerce'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remove invalid dates
|
||||||
|
df[col] = df[col].where(
|
||||||
|
(df[col].dt.year >= 1970) &
|
||||||
|
(df[col].dt.year <= 2100)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert to Python Date
|
||||||
|
df[col] = df[col].apply(
|
||||||
|
lambda x:
|
||||||
|
x.date()
|
||||||
|
if pd.notnull(x)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# FORCE OBJECT COLUMNS TO STRING
|
||||||
|
# ---------------------------------------------------
|
||||||
|
else:
|
||||||
|
|
||||||
|
cleaned_col = []
|
||||||
|
|
||||||
|
for val in df[col]:
|
||||||
|
|
||||||
|
# NULL
|
||||||
|
if pd.isnull(val):
|
||||||
|
|
||||||
|
cleaned_col.append(None)
|
||||||
|
|
||||||
|
# STRING
|
||||||
|
elif isinstance(val, str):
|
||||||
|
|
||||||
|
cleaned_col.append(val)
|
||||||
|
|
||||||
|
# INTEGER
|
||||||
|
elif isinstance(
|
||||||
|
val,
|
||||||
|
(
|
||||||
|
int,
|
||||||
|
np.integer
|
||||||
|
)
|
||||||
|
):
|
||||||
|
|
||||||
|
cleaned_col.append(int(val))
|
||||||
|
|
||||||
|
# FLOAT
|
||||||
|
elif isinstance(
|
||||||
|
val,
|
||||||
|
(
|
||||||
|
float,
|
||||||
|
np.floating
|
||||||
|
)
|
||||||
|
):
|
||||||
|
|
||||||
|
cleaned_col.append(str(val))
|
||||||
|
|
||||||
|
# BOOLEAN
|
||||||
|
elif isinstance(val, bool):
|
||||||
|
|
||||||
|
cleaned_col.append(str(val))
|
||||||
|
|
||||||
|
# DATETIME
|
||||||
|
elif isinstance(
|
||||||
|
val,
|
||||||
|
(
|
||||||
|
datetime,
|
||||||
|
pd.Timestamp
|
||||||
|
)
|
||||||
|
):
|
||||||
|
|
||||||
|
cleaned_col.append(str(val))
|
||||||
|
|
||||||
|
# OTHER
|
||||||
|
else:
|
||||||
|
|
||||||
|
cleaned_col.append(str(val))
|
||||||
|
|
||||||
|
df[col] = cleaned_col
|
||||||
|
|
||||||
|
except Exception as col_error:
|
||||||
|
|
||||||
|
print("\n===================================")
|
||||||
|
print(f"COLUMN FAILED : {col}")
|
||||||
|
print(str(col_error))
|
||||||
|
print("===================================")
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
except Exception as clean_error:
|
||||||
|
|
||||||
|
print("\n===================================")
|
||||||
|
print("DATA CLEAN FAILED")
|
||||||
|
print(str(clean_error))
|
||||||
|
print("===================================")
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# CONNECT SQL SERVER
|
||||||
|
# ---------------------------------------------------
|
||||||
|
sql_conn = pyodbc.connect(SQL_CONN_STR)
|
||||||
|
|
||||||
|
print("Connected to SQL Server")
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# CONNECT CLICKHOUSE
|
||||||
|
# ---------------------------------------------------
|
||||||
|
ch_client = clickhouse_connect.get_client(**CH_CONFIG)
|
||||||
|
|
||||||
|
print("Connected to ClickHouse")
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# QUERY
|
||||||
|
# ---------------------------------------------------
|
||||||
|
query = f"""
|
||||||
|
SELECT *
|
||||||
|
FROM dbo.[{TABLE_NAME}]
|
||||||
|
WHERE Project_Id = {PROJECT_ID}
|
||||||
|
"""
|
||||||
|
|
||||||
|
print("\nExecuting Query:")
|
||||||
|
print(query)
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# CHUNK SIZE
|
||||||
|
# ---------------------------------------------------
|
||||||
|
chunk_size = 100000
|
||||||
|
|
||||||
|
total_rows = 0
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# READ DATA
|
||||||
|
# ---------------------------------------------------
|
||||||
|
for chunk in pd.read_sql(
|
||||||
|
query,
|
||||||
|
sql_conn,
|
||||||
|
chunksize=chunk_size
|
||||||
|
):
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
print("\n===================================")
|
||||||
|
print(f"Processing {len(chunk)} Rows")
|
||||||
|
print("===================================")
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# CLEAN DATA
|
||||||
|
# ---------------------------------------------------
|
||||||
|
chunk = clean_dataframe(chunk)
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# FINAL DEBUG
|
||||||
|
# ---------------------------------------------------
|
||||||
|
print("\nFINAL COLUMN TYPES")
|
||||||
|
|
||||||
|
for col in chunk.columns:
|
||||||
|
|
||||||
|
sample = chunk[col].dropna()
|
||||||
|
|
||||||
|
if len(sample) > 0:
|
||||||
|
|
||||||
|
print(
|
||||||
|
col,
|
||||||
|
type(sample.iloc[0]),
|
||||||
|
sample.iloc[0]
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# SAMPLE DATA
|
||||||
|
# ---------------------------------------------------
|
||||||
|
print("\nSAMPLE DATA")
|
||||||
|
print(chunk.head(2))
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# INSERT INTO CLICKHOUSE
|
||||||
|
# ---------------------------------------------------
|
||||||
|
print("\nInserting into ClickHouse...")
|
||||||
|
|
||||||
|
ch_client.insert_df(
|
||||||
|
table=TABLE_NAME,
|
||||||
|
df=chunk,
|
||||||
|
database=CH_CONFIG['database']
|
||||||
|
)
|
||||||
|
|
||||||
|
total_rows += len(chunk)
|
||||||
|
|
||||||
|
print(f"\nInserted Total Rows : {total_rows}")
|
||||||
|
|
||||||
|
except Exception as chunk_error:
|
||||||
|
|
||||||
|
print("\n===================================")
|
||||||
|
print("CHUNK INSERT FAILED")
|
||||||
|
print("===================================")
|
||||||
|
|
||||||
|
print(str(chunk_error))
|
||||||
|
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# SAVE ERROR LOG
|
||||||
|
# ---------------------------------------------------
|
||||||
|
with open(
|
||||||
|
"clickhouse_chunk_error.log",
|
||||||
|
"a",
|
||||||
|
encoding="utf-8"
|
||||||
|
) as log:
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
"\n\n====================================="
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
f"\nTIME : {datetime.now()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
f"\nTABLE : {TABLE_NAME}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
f"\nERROR : {str(chunk_error)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
f"\nTRACEBACK :\n{traceback.format_exc()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
"\n====================================="
|
||||||
|
)
|
||||||
|
|
||||||
|
continue
|
||||||
|
|
||||||
|
print("\n===================================")
|
||||||
|
print("ETL COMPLETED SUCCESSFULLY")
|
||||||
|
print(f"TOTAL ROWS INSERTED : {total_rows}")
|
||||||
|
print("===================================")
|
||||||
|
|
||||||
|
except Exception as main_error:
|
||||||
|
|
||||||
|
print("\n===================================")
|
||||||
|
print("MAIN ERROR")
|
||||||
|
print("===================================")
|
||||||
|
|
||||||
|
print(str(main_error))
|
||||||
|
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
with open(
|
||||||
|
"clickhouse_main_error.log",
|
||||||
|
"a",
|
||||||
|
encoding="utf-8"
|
||||||
|
) as log:
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
"\n\n====================================="
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
f"\nTIME : {datetime.now()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
f"\nERROR : {str(main_error)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
f"\nTRACEBACK :\n{traceback.format_exc()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
"\n====================================="
|
||||||
|
)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# CLOSE SQL SERVER
|
||||||
|
# ---------------------------------------------------
|
||||||
|
try:
|
||||||
|
|
||||||
|
sql_conn.close()
|
||||||
|
|
||||||
|
print("\nSQL Server Connection Closed")
|
||||||
|
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# CLOSE CLICKHOUSE
|
||||||
|
# ---------------------------------------------------
|
||||||
|
try:
|
||||||
|
|
||||||
|
ch_client.close()
|
||||||
|
|
||||||
|
print("ClickHouse Connection Closed")
|
||||||
|
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
print("\nETL Finished :", datetime.now())
|
||||||
@@ -0,0 +1,453 @@
|
|||||||
|
|
||||||
|
import pyodbc
|
||||||
|
import pandas as pd
|
||||||
|
import clickhouse_connect
|
||||||
|
import numpy as np
|
||||||
|
from datetime import datetime
|
||||||
|
import traceback
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
# =========================================================
|
||||||
|
# IGNORE WARNINGS
|
||||||
|
# =========================================================
|
||||||
|
warnings.filterwarnings(
|
||||||
|
'ignore',
|
||||||
|
'pandas only supports SQLAlchemy connectable'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("ETL Started :", datetime.now())
|
||||||
|
|
||||||
|
# =========================================================
|
||||||
|
# SQL SERVER CONNECTION
|
||||||
|
# =========================================================
|
||||||
|
SQL_CONN_STR = (
|
||||||
|
'DRIVER={ODBC Driver 17 for SQL Server};'
|
||||||
|
'SERVER=10.200.25.65;'
|
||||||
|
'DATABASE=CPMIndiaBusinessInsight;'
|
||||||
|
'UID=bsgteam_test;'
|
||||||
|
'PWD=B$gt3@m#00512;'
|
||||||
|
'TrustServerCertificate=yes;'
|
||||||
|
)
|
||||||
|
|
||||||
|
# =========================================================
|
||||||
|
# CLICKHOUSE CONFIG
|
||||||
|
# =========================================================
|
||||||
|
CH_CONFIG = {
|
||||||
|
'host': '172.188.12.194',
|
||||||
|
'port': 8123,
|
||||||
|
'username': 'default',
|
||||||
|
'password': 'dipanshu_k',
|
||||||
|
'database': 'DaburIndia_BI'
|
||||||
|
}
|
||||||
|
|
||||||
|
# =========================================================
|
||||||
|
# TABLE NAME
|
||||||
|
# =========================================================
|
||||||
|
TABLE_NAME = 'Store_Master'
|
||||||
|
PROJECT_ID = 41654
|
||||||
|
|
||||||
|
# =========================================================
|
||||||
|
# CLEAN DATAFRAME
|
||||||
|
# =========================================================
|
||||||
|
def clean_dataframe(df):
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
# ---------------------------------------------
|
||||||
|
# Replace NaN
|
||||||
|
# ---------------------------------------------
|
||||||
|
df = df.replace({np.nan: None})
|
||||||
|
|
||||||
|
# ---------------------------------------------
|
||||||
|
# Process Column Wise
|
||||||
|
# ---------------------------------------------
|
||||||
|
for col in df.columns:
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
col_lower = col.lower()
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# DATE COLUMNS
|
||||||
|
# =====================================
|
||||||
|
if 'date' in col_lower:
|
||||||
|
|
||||||
|
print(f"Cleaning Date Column : {col}")
|
||||||
|
|
||||||
|
df[col] = pd.to_datetime(
|
||||||
|
df[col],
|
||||||
|
errors='coerce'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remove invalid dates
|
||||||
|
df[col] = df[col].where(
|
||||||
|
(df[col].dt.year >= 1970) &
|
||||||
|
(df[col].dt.year <= 2100)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert to Python Date
|
||||||
|
df[col] = df[col].apply(
|
||||||
|
lambda x:
|
||||||
|
x.date()
|
||||||
|
if pd.notnull(x)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# INTEGER COLUMNS
|
||||||
|
# =====================================
|
||||||
|
elif pd.api.types.is_integer_dtype(df[col]):
|
||||||
|
|
||||||
|
df[col] = pd.to_numeric(
|
||||||
|
df[col],
|
||||||
|
errors='coerce'
|
||||||
|
)
|
||||||
|
|
||||||
|
df[col] = df[col].apply(
|
||||||
|
lambda x:
|
||||||
|
int(x)
|
||||||
|
if pd.notnull(x)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# FLOAT COLUMNS
|
||||||
|
# =====================================
|
||||||
|
elif pd.api.types.is_float_dtype(df[col]):
|
||||||
|
|
||||||
|
non_null = df[col].dropna()
|
||||||
|
|
||||||
|
# ---------------------------------
|
||||||
|
# Convert whole float to int
|
||||||
|
# Example:
|
||||||
|
# 3240.0 -> 3240
|
||||||
|
# ---------------------------------
|
||||||
|
if len(non_null) > 0 and (
|
||||||
|
(non_null % 1 == 0).all()
|
||||||
|
):
|
||||||
|
|
||||||
|
df[col] = df[col].apply(
|
||||||
|
lambda x:
|
||||||
|
int(x)
|
||||||
|
if pd.notnull(x)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
df[col] = df[col].apply(
|
||||||
|
lambda x:
|
||||||
|
float(x)
|
||||||
|
if pd.notnull(x)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# OBJECT / STRING COLUMNS
|
||||||
|
# =====================================
|
||||||
|
else:
|
||||||
|
|
||||||
|
cleaned = []
|
||||||
|
|
||||||
|
for val in df[col]:
|
||||||
|
|
||||||
|
# NULL
|
||||||
|
if pd.isnull(val):
|
||||||
|
|
||||||
|
cleaned.append(None)
|
||||||
|
|
||||||
|
# INTEGER
|
||||||
|
elif isinstance(
|
||||||
|
val,
|
||||||
|
(
|
||||||
|
int,
|
||||||
|
np.integer
|
||||||
|
)
|
||||||
|
):
|
||||||
|
|
||||||
|
cleaned.append(int(val))
|
||||||
|
|
||||||
|
# FLOAT
|
||||||
|
elif isinstance(
|
||||||
|
val,
|
||||||
|
(
|
||||||
|
float,
|
||||||
|
np.floating
|
||||||
|
)
|
||||||
|
):
|
||||||
|
|
||||||
|
if np.isnan(val):
|
||||||
|
|
||||||
|
cleaned.append(None)
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
# IMPORTANT FIX
|
||||||
|
# Avoid '3240.0' string issue
|
||||||
|
if val.is_integer():
|
||||||
|
|
||||||
|
cleaned.append(int(val))
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
cleaned.append(float(val))
|
||||||
|
|
||||||
|
# STRING
|
||||||
|
elif isinstance(val, str):
|
||||||
|
|
||||||
|
cleaned.append(val.strip())
|
||||||
|
|
||||||
|
# BOOLEAN
|
||||||
|
elif isinstance(val, bool):
|
||||||
|
|
||||||
|
cleaned.append(int(val))
|
||||||
|
|
||||||
|
# DATETIME
|
||||||
|
elif isinstance(
|
||||||
|
val,
|
||||||
|
(
|
||||||
|
datetime,
|
||||||
|
pd.Timestamp
|
||||||
|
)
|
||||||
|
):
|
||||||
|
|
||||||
|
cleaned.append(str(val))
|
||||||
|
|
||||||
|
# OTHER
|
||||||
|
else:
|
||||||
|
|
||||||
|
cleaned.append(str(val))
|
||||||
|
|
||||||
|
df[col] = cleaned
|
||||||
|
|
||||||
|
except Exception as col_error:
|
||||||
|
|
||||||
|
print("\n================================")
|
||||||
|
print(f"COLUMN FAILED : {col}")
|
||||||
|
print(str(col_error))
|
||||||
|
print("================================")
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
except Exception as clean_error:
|
||||||
|
|
||||||
|
print("\n================================")
|
||||||
|
print("DATA CLEAN FAILED")
|
||||||
|
print(str(clean_error))
|
||||||
|
print("================================")
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
# =========================================================
|
||||||
|
# MAIN PROCESS
|
||||||
|
# =========================================================
|
||||||
|
try:
|
||||||
|
|
||||||
|
# =====================================================
|
||||||
|
# CONNECT SQL SERVER
|
||||||
|
# =====================================================
|
||||||
|
sql_conn = pyodbc.connect(SQL_CONN_STR)
|
||||||
|
|
||||||
|
print("Connected to SQL Server")
|
||||||
|
|
||||||
|
# =====================================================
|
||||||
|
# CONNECT CLICKHOUSE
|
||||||
|
# =====================================================
|
||||||
|
ch_client = clickhouse_connect.get_client(**CH_CONFIG)
|
||||||
|
|
||||||
|
print("Connected to ClickHouse")
|
||||||
|
|
||||||
|
# =====================================================
|
||||||
|
# QUERY
|
||||||
|
# =====================================================
|
||||||
|
query = f"""
|
||||||
|
SELECT *
|
||||||
|
FROM dbo.[{TABLE_NAME}]
|
||||||
|
WHERE Project_Id = {PROJECT_ID}
|
||||||
|
"""
|
||||||
|
|
||||||
|
print("\nExecuting Query:")
|
||||||
|
print(query)
|
||||||
|
|
||||||
|
# =====================================================
|
||||||
|
# CHUNK SIZE
|
||||||
|
# =====================================================
|
||||||
|
chunk_size = 100000
|
||||||
|
|
||||||
|
total_rows = 0
|
||||||
|
|
||||||
|
# =====================================================
|
||||||
|
# READ DATA
|
||||||
|
# =====================================================
|
||||||
|
for chunk in pd.read_sql(
|
||||||
|
query,
|
||||||
|
sql_conn,
|
||||||
|
chunksize=chunk_size
|
||||||
|
):
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
print("\n================================")
|
||||||
|
print(f"Processing {len(chunk)} Rows")
|
||||||
|
print("================================")
|
||||||
|
|
||||||
|
# =================================================
|
||||||
|
# CLEAN DATA
|
||||||
|
# =================================================
|
||||||
|
chunk = clean_dataframe(chunk)
|
||||||
|
|
||||||
|
# =================================================
|
||||||
|
# DEBUG COLUMN TYPES
|
||||||
|
# =================================================
|
||||||
|
print("\nCOLUMN TYPES")
|
||||||
|
|
||||||
|
for col in chunk.columns:
|
||||||
|
|
||||||
|
sample = chunk[col].dropna()
|
||||||
|
|
||||||
|
if len(sample) > 0:
|
||||||
|
|
||||||
|
print(
|
||||||
|
col,
|
||||||
|
type(sample.iloc[0]),
|
||||||
|
sample.iloc[0]
|
||||||
|
)
|
||||||
|
|
||||||
|
# =================================================
|
||||||
|
# SAMPLE DATA
|
||||||
|
# =================================================
|
||||||
|
print("\nSAMPLE DATA")
|
||||||
|
print(chunk.head(2))
|
||||||
|
|
||||||
|
# =================================================
|
||||||
|
# INSERT INTO CLICKHOUSE
|
||||||
|
# =================================================
|
||||||
|
print("\nInserting into ClickHouse...")
|
||||||
|
|
||||||
|
ch_client.insert_df(
|
||||||
|
table=TABLE_NAME,
|
||||||
|
df=chunk,
|
||||||
|
database=CH_CONFIG['database']
|
||||||
|
)
|
||||||
|
|
||||||
|
total_rows += len(chunk)
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"\nInserted Total Rows : {total_rows}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as chunk_error:
|
||||||
|
|
||||||
|
print("\n================================")
|
||||||
|
print("CHUNK INSERT FAILED")
|
||||||
|
print("================================")
|
||||||
|
|
||||||
|
print(str(chunk_error))
|
||||||
|
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
# =============================================
|
||||||
|
# SAVE ERROR LOG
|
||||||
|
# =============================================
|
||||||
|
with open(
|
||||||
|
"clickhouse_chunk_error.log",
|
||||||
|
"a",
|
||||||
|
encoding="utf-8"
|
||||||
|
) as log:
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
"\n\n================================"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
f"\nTIME : {datetime.now()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
f"\nTABLE : {TABLE_NAME}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
f"\nERROR : {str(chunk_error)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
f"\nTRACEBACK :\n"
|
||||||
|
f"{traceback.format_exc()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
"\n================================"
|
||||||
|
)
|
||||||
|
|
||||||
|
continue
|
||||||
|
|
||||||
|
print("\n================================")
|
||||||
|
print("ETL COMPLETED SUCCESSFULLY")
|
||||||
|
print(f"TOTAL ROWS INSERTED : {total_rows}")
|
||||||
|
print("================================")
|
||||||
|
|
||||||
|
# =========================================================
|
||||||
|
# MAIN ERROR
|
||||||
|
# =========================================================
|
||||||
|
except Exception as main_error:
|
||||||
|
|
||||||
|
print("\n================================")
|
||||||
|
print("MAIN ERROR")
|
||||||
|
print("================================")
|
||||||
|
|
||||||
|
print(str(main_error))
|
||||||
|
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
with open(
|
||||||
|
"clickhouse_main_error.log",
|
||||||
|
"a",
|
||||||
|
encoding="utf-8"
|
||||||
|
) as log:
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
"\n\n================================"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
f"\nTIME : {datetime.now()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
f"\nERROR : {str(main_error)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
f"\nTRACEBACK :\n"
|
||||||
|
f"{traceback.format_exc()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.write(
|
||||||
|
"\n================================"
|
||||||
|
)
|
||||||
|
|
||||||
|
# =========================================================
|
||||||
|
# CLOSE CONNECTIONS
|
||||||
|
# =========================================================
|
||||||
|
finally:
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
sql_conn.close()
|
||||||
|
|
||||||
|
print("\nSQL Server Connection Closed")
|
||||||
|
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
ch_client.close()
|
||||||
|
|
||||||
|
print("ClickHouse Connection Closed")
|
||||||
|
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
print("\nETL Finished :", datetime.now())
|
||||||
@@ -0,0 +1,291 @@
|
|||||||
|
|
||||||
|
import pyodbc
|
||||||
|
import pandas as pd
|
||||||
|
import clickhouse_connect
|
||||||
|
from datetime import datetime
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Suppress Pandas Warning
|
||||||
|
# ---------------------------------------------------
|
||||||
|
warnings.filterwarnings(
|
||||||
|
'ignore',
|
||||||
|
'pandas only supports SQLAlchemy connectable'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("SCHEMA Migration Started :", datetime.now())
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# SQL Server Connection
|
||||||
|
# ---------------------------------------------------
|
||||||
|
SQL_CONN_STR = (
|
||||||
|
'DRIVER={ODBC Driver 17 for SQL Server};'
|
||||||
|
'SERVER=10.200.25.65;'
|
||||||
|
'DATABASE=CPMIndiaBusinessInsight;'
|
||||||
|
'UID=bsgteam_test;'
|
||||||
|
'PWD=B$gt3@m#00512;'
|
||||||
|
'TrustServerCertificate=yes;'
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# ClickHouse Connection
|
||||||
|
# ---------------------------------------------------
|
||||||
|
CH_CONFIG = {
|
||||||
|
'host': '172.188.12.194',
|
||||||
|
'port': 8123,
|
||||||
|
'username': 'default',
|
||||||
|
'password': 'dipanshu_k',
|
||||||
|
'database': 'DaburIndia_BI'
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# SQL Server → ClickHouse Datatype Mapping
|
||||||
|
# ---------------------------------------------------
|
||||||
|
DATATYPE_MAPPING = {
|
||||||
|
|
||||||
|
'bigint': 'Int64',
|
||||||
|
'int': 'Int32',
|
||||||
|
'smallint': 'Int16',
|
||||||
|
'tinyint': 'Int8',
|
||||||
|
'bit': 'UInt8',
|
||||||
|
|
||||||
|
'float': 'Float64',
|
||||||
|
'real': 'Float32',
|
||||||
|
'decimal': 'Float64',
|
||||||
|
'numeric': 'Float64',
|
||||||
|
'money': 'Float64',
|
||||||
|
|
||||||
|
'varchar': 'String',
|
||||||
|
'nvarchar': 'String',
|
||||||
|
'char': 'String',
|
||||||
|
'nchar': 'String',
|
||||||
|
'text': 'String',
|
||||||
|
'ntext': 'String',
|
||||||
|
'xml': 'String',
|
||||||
|
|
||||||
|
'date': 'Date32',
|
||||||
|
'datetime': 'DateTime64',
|
||||||
|
'datetime2': 'DateTime64',
|
||||||
|
'smalldatetime': 'DateTime64',
|
||||||
|
|
||||||
|
'time': 'String',
|
||||||
|
|
||||||
|
'uniqueidentifier': 'String',
|
||||||
|
|
||||||
|
'binary': 'String',
|
||||||
|
'varbinary': 'String'
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Connect SQL Server
|
||||||
|
# ---------------------------------------------------
|
||||||
|
sql_conn = pyodbc.connect(SQL_CONN_STR)
|
||||||
|
|
||||||
|
print("Connected to SQL Server")
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Connect ClickHouse
|
||||||
|
# ---------------------------------------------------
|
||||||
|
ch_client = clickhouse_connect.get_client(**CH_CONFIG)
|
||||||
|
|
||||||
|
print("Connected to ClickHouse")
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Create Database if Not Exists
|
||||||
|
# ---------------------------------------------------
|
||||||
|
create_database_query = f"""
|
||||||
|
CREATE DATABASE IF NOT EXISTS `{CH_CONFIG['database']}`
|
||||||
|
"""
|
||||||
|
|
||||||
|
ch_client.command(create_database_query)
|
||||||
|
|
||||||
|
print(f"Database Verified : {CH_CONFIG['database']}")
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Get All SQL Server Tables
|
||||||
|
# ---------------------------------------------------
|
||||||
|
table_query = """
|
||||||
|
SELECT TABLE_NAME
|
||||||
|
FROM INFORMATION_SCHEMA.TABLES
|
||||||
|
WHERE TABLE_TYPE = 'BASE TABLE'
|
||||||
|
AND TABLE_SCHEMA = 'dbo'
|
||||||
|
ORDER BY TABLE_NAME
|
||||||
|
"""
|
||||||
|
|
||||||
|
tables_df = pd.read_sql(table_query, sql_conn)
|
||||||
|
|
||||||
|
tables = tables_df['TABLE_NAME'].tolist()
|
||||||
|
|
||||||
|
print(f"Total Tables Found : {len(tables)}")
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Process Each Table
|
||||||
|
# ---------------------------------------------------
|
||||||
|
for table_name in tables:
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
print("\n===================================")
|
||||||
|
print(f"Creating Table : {table_name}")
|
||||||
|
print("===================================")
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Safe Table Name
|
||||||
|
# ---------------------------------------------------
|
||||||
|
safe_table_name = (
|
||||||
|
table_name
|
||||||
|
.replace('`', '')
|
||||||
|
.replace('[', '')
|
||||||
|
.replace(']', '')
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Get Table Schema
|
||||||
|
# ---------------------------------------------------
|
||||||
|
schema_query = """
|
||||||
|
SELECT
|
||||||
|
COLUMN_NAME,
|
||||||
|
DATA_TYPE,
|
||||||
|
IS_NULLABLE
|
||||||
|
FROM INFORMATION_SCHEMA.COLUMNS
|
||||||
|
WHERE TABLE_NAME = ?
|
||||||
|
ORDER BY ORDINAL_POSITION
|
||||||
|
"""
|
||||||
|
|
||||||
|
schema_df = pd.read_sql(
|
||||||
|
schema_query,
|
||||||
|
sql_conn,
|
||||||
|
params=[safe_table_name]
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Skip Empty Schema
|
||||||
|
# ---------------------------------------------------
|
||||||
|
if schema_df.empty:
|
||||||
|
|
||||||
|
print(f"No Columns Found : {safe_table_name}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Build ClickHouse Columns
|
||||||
|
# ---------------------------------------------------
|
||||||
|
columns = []
|
||||||
|
|
||||||
|
for _, row in schema_df.iterrows():
|
||||||
|
|
||||||
|
col_name = str(row['COLUMN_NAME']).replace('`', '')
|
||||||
|
|
||||||
|
sql_type = str(row['DATA_TYPE']).lower()
|
||||||
|
|
||||||
|
nullable = str(row['IS_NULLABLE'])
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Get ClickHouse Datatype
|
||||||
|
# ---------------------------------------------------
|
||||||
|
ch_type = DATATYPE_MAPPING.get(
|
||||||
|
sql_type,
|
||||||
|
'String'
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Nullable Handling
|
||||||
|
# ---------------------------------------------------
|
||||||
|
if nullable == 'YES':
|
||||||
|
ch_type = f'Nullable({ch_type})'
|
||||||
|
|
||||||
|
columns.append(
|
||||||
|
f"`{col_name}` {ch_type}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Generate CREATE TABLE Query
|
||||||
|
# ---------------------------------------------------
|
||||||
|
create_table_query = f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS `{CH_CONFIG['database']}`.`{safe_table_name}`
|
||||||
|
(
|
||||||
|
{', '.join(columns)}
|
||||||
|
)
|
||||||
|
ENGINE = MergeTree()
|
||||||
|
ORDER BY tuple()
|
||||||
|
"""
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Print SQL
|
||||||
|
# ---------------------------------------------------
|
||||||
|
print("\nGenerated CREATE TABLE SQL:\n")
|
||||||
|
print(create_table_query)
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Save SQL Log
|
||||||
|
# ---------------------------------------------------
|
||||||
|
with open(
|
||||||
|
"clickhouse_schema_debug.log",
|
||||||
|
"a",
|
||||||
|
encoding="utf-8"
|
||||||
|
) as log_file:
|
||||||
|
|
||||||
|
log_file.write("\n\n=====================================\n")
|
||||||
|
log_file.write(f"TABLE : {safe_table_name}\n")
|
||||||
|
log_file.write(create_table_query)
|
||||||
|
log_file.write("\n=====================================\n")
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Execute CREATE TABLE
|
||||||
|
# ---------------------------------------------------
|
||||||
|
ch_client.command(create_table_query)
|
||||||
|
|
||||||
|
print(f"Table Created Successfully : {safe_table_name}")
|
||||||
|
|
||||||
|
except Exception as table_error:
|
||||||
|
|
||||||
|
print("\n===================================")
|
||||||
|
print(f"FAILED TABLE : {table_name}")
|
||||||
|
print("ERROR :", str(table_error))
|
||||||
|
print("===================================")
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Error Logging
|
||||||
|
# ---------------------------------------------------
|
||||||
|
with open(
|
||||||
|
"clickhouse_schema_error.log",
|
||||||
|
"a",
|
||||||
|
encoding="utf-8"
|
||||||
|
) as error_log:
|
||||||
|
|
||||||
|
error_log.write("\n\n=====================================\n")
|
||||||
|
error_log.write(f"TABLE : {table_name}\n")
|
||||||
|
error_log.write(f"ERROR : {str(table_error)}\n")
|
||||||
|
error_log.write("=====================================\n")
|
||||||
|
|
||||||
|
continue
|
||||||
|
|
||||||
|
print("\n===================================")
|
||||||
|
print("ALL TABLE STRUCTURES CREATED")
|
||||||
|
print("===================================")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
|
||||||
|
print("\n===================================")
|
||||||
|
print("MAIN ERROR :", str(e))
|
||||||
|
print("===================================")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# Close Connections
|
||||||
|
# ---------------------------------------------------
|
||||||
|
try:
|
||||||
|
sql_conn.close()
|
||||||
|
print("SQL Server Connection Closed")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
ch_client.close()
|
||||||
|
print("ClickHouse Connection Closed")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
print("Finished :", datetime.now())
|
||||||
@@ -0,0 +1,206 @@
|
|||||||
|
|
||||||
|
|
||||||
|
=====================================
|
||||||
|
TIME : 2026-05-18 13:21:13.727572
|
||||||
|
TABLE : Employee_Master
|
||||||
|
ERROR : object of type 'float' has no len()
|
||||||
|
TRACEBACK :
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "d:\Python Code\Data_migrate_to_clickhouse.py", line 228, in <module>
|
||||||
|
ch_client.insert(
|
||||||
|
~~~~~~~~~~~~~~~~^
|
||||||
|
table=TABLE_NAME,
|
||||||
|
^^^^^^^^^^^^^^^^^
|
||||||
|
...<2 lines>...
|
||||||
|
database=CH_CONFIG['database']
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 977, in insert
|
||||||
|
context.data = data
|
||||||
|
^^^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 97, in data
|
||||||
|
self.block_row_count = self._calc_block_size()
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 119, in _calc_block_size
|
||||||
|
d_size = d_type.data_size(sample)
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 108, in data_size
|
||||||
|
d_size = self._data_size(sample)
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\string.py", line 31, in _data_size
|
||||||
|
total += len(x)
|
||||||
|
~~~^^^
|
||||||
|
TypeError: object of type 'float' has no len()
|
||||||
|
|
||||||
|
=====================================
|
||||||
|
|
||||||
|
=====================================
|
||||||
|
TIME : 2026-05-18 13:25:28.924657
|
||||||
|
TABLE : Employee_Master
|
||||||
|
ERROR : object of type 'float' has no len()
|
||||||
|
TRACEBACK :
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "d:\Python Code\Data_migrate_to_clickhouse.py", line 240, in <module>
|
||||||
|
ch_client.insert(
|
||||||
|
~~~~~~~~~~~~~~~~^
|
||||||
|
table=TABLE_NAME,
|
||||||
|
^^^^^^^^^^^^^^^^^
|
||||||
|
...<2 lines>...
|
||||||
|
database=CH_CONFIG['database']
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 977, in insert
|
||||||
|
context.data = data
|
||||||
|
^^^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 97, in data
|
||||||
|
self.block_row_count = self._calc_block_size()
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 119, in _calc_block_size
|
||||||
|
d_size = d_type.data_size(sample)
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 108, in data_size
|
||||||
|
d_size = self._data_size(sample)
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\string.py", line 31, in _data_size
|
||||||
|
total += len(x)
|
||||||
|
~~~^^^
|
||||||
|
TypeError: object of type 'float' has no len()
|
||||||
|
|
||||||
|
=====================================
|
||||||
|
|
||||||
|
=====================================
|
||||||
|
TIME : 2026-05-18 13:28:12.319405
|
||||||
|
TABLE : Employee_Master
|
||||||
|
ERROR : object of type 'float' has no len()
|
||||||
|
TRACEBACK :
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "d:\Python Code\Data_migrate_to_clickhouse.py", line 240, in <module>
|
||||||
|
ch_client.insert(
|
||||||
|
~~~~~~~~~~~~~~~~^
|
||||||
|
table=TABLE_NAME,
|
||||||
|
^^^^^^^^^^^^^^^^^
|
||||||
|
...<2 lines>...
|
||||||
|
database=CH_CONFIG['database']
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 977, in insert
|
||||||
|
context.data = data
|
||||||
|
^^^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 97, in data
|
||||||
|
self.block_row_count = self._calc_block_size()
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 119, in _calc_block_size
|
||||||
|
d_size = d_type.data_size(sample)
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 108, in data_size
|
||||||
|
d_size = self._data_size(sample)
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\string.py", line 31, in _data_size
|
||||||
|
total += len(x)
|
||||||
|
~~~^^^
|
||||||
|
TypeError: object of type 'float' has no len()
|
||||||
|
|
||||||
|
=====================================
|
||||||
|
|
||||||
|
=====================================
|
||||||
|
TIME : 2026-05-18 13:50:45.035193
|
||||||
|
TABLE : cpm_city_master
|
||||||
|
ERROR : invalid literal for int() with base 10: '3240.0'
|
||||||
|
TRACEBACK :
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "d:\Python Code\Data_migrate_to_clickhouse.py", line 254, in <module>
|
||||||
|
ch_client.insert_df(
|
||||||
|
~~~~~~~~~~~~~~~~~~~^
|
||||||
|
table=TABLE_NAME,
|
||||||
|
^^^^^^^^^^^^^^^^^
|
||||||
|
df=chunk,
|
||||||
|
^^^^^^^^^
|
||||||
|
database=CH_CONFIG['database']
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 1013, in insert_df
|
||||||
|
return self.insert(table,
|
||||||
|
~~~~~~~~~~~^^^^^^^
|
||||||
|
df,
|
||||||
|
^^^
|
||||||
|
...<5 lines>...
|
||||||
|
transport_settings=transport_settings,
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
context=context)
|
||||||
|
^^^^^^^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 978, in insert
|
||||||
|
return self.data_insert(context)
|
||||||
|
~~~~~~~~~~~~~~~~^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 347, in data_insert
|
||||||
|
response = self._raw_request(block_gen, params, headers, error_handler=error_handler, server_wait=False)
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 569, in _raw_request
|
||||||
|
error_handler(response)
|
||||||
|
~~~~~~~~~~~~~^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 331, in error_handler
|
||||||
|
raise ex
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\transform.py", line 114, in chunk_gen
|
||||||
|
col_type.write_column(data, output, context)
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 216, in write_column
|
||||||
|
self.write_column_data(column, dest, ctx)
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 231, in write_column_data
|
||||||
|
self._write_column_binary(column, dest, ctx)
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\numeric.py", line 29, in _write_column_binary
|
||||||
|
column = [int(x) if x else 0 for x in column]
|
||||||
|
~~~^^^
|
||||||
|
ValueError: invalid literal for int() with base 10: '3240.0'
|
||||||
|
|
||||||
|
=====================================
|
||||||
|
|
||||||
|
=====================================
|
||||||
|
TIME : 2026-05-18 15:13:42.747872
|
||||||
|
TABLE : cpm_city_master
|
||||||
|
ERROR : invalid literal for int() with base 10: '3240.0'
|
||||||
|
TRACEBACK :
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "d:\Python Code\City_Master_Import.py", line 256, in <module>
|
||||||
|
ch_client.insert_df(
|
||||||
|
~~~~~~~~~~~~~~~~~~~^
|
||||||
|
table=TABLE_NAME,
|
||||||
|
^^^^^^^^^^^^^^^^^
|
||||||
|
df=chunk,
|
||||||
|
^^^^^^^^^
|
||||||
|
database=CH_CONFIG['database']
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 1013, in insert_df
|
||||||
|
return self.insert(table,
|
||||||
|
~~~~~~~~~~~^^^^^^^
|
||||||
|
df,
|
||||||
|
^^^
|
||||||
|
...<5 lines>...
|
||||||
|
transport_settings=transport_settings,
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
context=context)
|
||||||
|
^^^^^^^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 978, in insert
|
||||||
|
return self.data_insert(context)
|
||||||
|
~~~~~~~~~~~~~~~~^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 347, in data_insert
|
||||||
|
response = self._raw_request(block_gen, params, headers, error_handler=error_handler, server_wait=False)
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 569, in _raw_request
|
||||||
|
error_handler(response)
|
||||||
|
~~~~~~~~~~~~~^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 331, in error_handler
|
||||||
|
raise ex
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\transform.py", line 114, in chunk_gen
|
||||||
|
col_type.write_column(data, output, context)
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 216, in write_column
|
||||||
|
self.write_column_data(column, dest, ctx)
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 231, in write_column_data
|
||||||
|
self._write_column_binary(column, dest, ctx)
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\numeric.py", line 29, in _write_column_binary
|
||||||
|
column = [int(x) if x else 0 for x in column]
|
||||||
|
~~~^^^
|
||||||
|
ValueError: invalid literal for int() with base 10: '3240.0'
|
||||||
|
|
||||||
|
=====================================
|
||||||
@@ -0,0 +1,64 @@
|
|||||||
|
|
||||||
|
|
||||||
|
=================================
|
||||||
|
TIME : 2026-05-18 13:15:13.522883
|
||||||
|
ERROR : object of type 'float' has no len()
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "d:\Python Code\Data_migrate_to_clickhouse.py", line 194, in <module>
|
||||||
|
ch_client.insert(
|
||||||
|
~~~~~~~~~~~~~~~~^
|
||||||
|
table=TABLE_NAME,
|
||||||
|
^^^^^^^^^^^^^^^^^
|
||||||
|
...<2 lines>...
|
||||||
|
database=CH_CONFIG['database']
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 977, in insert
|
||||||
|
context.data = data
|
||||||
|
^^^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 97, in data
|
||||||
|
self.block_row_count = self._calc_block_size()
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 119, in _calc_block_size
|
||||||
|
d_size = d_type.data_size(sample)
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 108, in data_size
|
||||||
|
d_size = self._data_size(sample)
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\string.py", line 31, in _data_size
|
||||||
|
total += len(x)
|
||||||
|
~~~^^^
|
||||||
|
TypeError: object of type 'float' has no len()
|
||||||
|
|
||||||
|
=================================
|
||||||
|
|
||||||
|
|
||||||
|
=================================
|
||||||
|
TIME : 2026-05-18 13:17:54.779420
|
||||||
|
ERROR : object of type 'float' has no len()
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "d:\Python Code\Data_migrate_to_clickhouse.py", line 194, in <module>
|
||||||
|
ch_client.insert(
|
||||||
|
~~~~~~~~~~~~~~~~^
|
||||||
|
table=TABLE_NAME,
|
||||||
|
^^^^^^^^^^^^^^^^^
|
||||||
|
...<2 lines>...
|
||||||
|
database=CH_CONFIG['database']
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
)
|
||||||
|
^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 977, in insert
|
||||||
|
context.data = data
|
||||||
|
^^^^^^^^^^^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 97, in data
|
||||||
|
self.block_row_count = self._calc_block_size()
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~^^
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 119, in _calc_block_size
|
||||||
|
d_size = d_type.data_size(sample)
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 108, in data_size
|
||||||
|
d_size = self._data_size(sample)
|
||||||
|
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\string.py", line 31, in _data_size
|
||||||
|
total += len(x)
|
||||||
|
~~~^^^
|
||||||
|
TypeError: object of type 'float' has no len()
|
||||||
|
|
||||||
|
=================================
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
|
||||||
|
|
||||||
|
=====================================
|
||||||
|
TIME : 2026-05-18 13:49:49.650758
|
||||||
|
ERROR : name 'PROJECT_ID' is not defined
|
||||||
|
TRACEBACK :
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "d:\Python Code\Data_migrate_to_clickhouse.py", line 193, in <module>
|
||||||
|
WHERE Project_Id = {PROJECT_ID}
|
||||||
|
^^^^^^^^^^
|
||||||
|
NameError: name 'PROJECT_ID' is not defined
|
||||||
|
|
||||||
|
=====================================
|
||||||
|
|
||||||
|
=====================================
|
||||||
|
TIME : 2026-05-18 15:03:23.790219
|
||||||
|
ERROR : ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: Timeout error [258]. (258) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Unable to complete login process due to delay in login response (258)')
|
||||||
|
TRACEBACK :
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "d:\Python Code\City_Master_Import.py", line 178, in <module>
|
||||||
|
sql_conn = pyodbc.connect(SQL_CONN_STR)
|
||||||
|
pyodbc.OperationalError: ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: Timeout error [258]. (258) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Unable to complete login process due to delay in login response (258)')
|
||||||
|
|
||||||
|
=====================================
|
||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user