first commit

This commit is contained in:
Dipanshu Kumar
2026-05-18 16:26:29 +05:30
commit 981995396a
8 changed files with 4405 additions and 0 deletions
+376
View File
@@ -0,0 +1,376 @@
import pyodbc
import pandas as pd
import clickhouse_connect
import numpy as np
from datetime import datetime
import traceback
import warnings
# ---------------------------------------------------
# Ignore Warning
# ---------------------------------------------------
warnings.filterwarnings(
'ignore',
'pandas only supports SQLAlchemy connectable'
)
print("ETL Started :", datetime.now())
# ---------------------------------------------------
# SQL SERVER CONNECTION
# ---------------------------------------------------
SQL_CONN_STR = (
'DRIVER={ODBC Driver 17 for SQL Server};'
'SERVER=10.200.25.65;'
'DATABASE=CPMIndiaBusinessInsight;'
'UID=bsgteam_test;'
'PWD=B$gt3@m#00512;'
'TrustServerCertificate=yes;'
)
# ---------------------------------------------------
# CLICKHOUSE CONFIG
# ---------------------------------------------------
CH_CONFIG = {
'host': '172.188.12.194',
'port': 8123,
'username': 'default',
'password': 'dipanshu_k',
'database': 'DaburIndia_BI'
}
# ---------------------------------------------------
# TABLE DETAILS
# ---------------------------------------------------
TABLE_NAME = 'Employee_Master'
PROJECT_ID = 41654
# ---------------------------------------------------
# CLEAN DATAFRAME
# ---------------------------------------------------
def clean_dataframe(df):
try:
# Replace NaN
df = df.replace({np.nan: None})
for col in df.columns:
try:
# ---------------------------------------------------
# HANDLE DATE COLUMNS
# ---------------------------------------------------
if 'date' in col.lower():
print(f"Cleaning Date Column : {col}")
# Convert to datetime
df[col] = pd.to_datetime(
df[col],
errors='coerce'
)
# Remove invalid dates
df[col] = df[col].where(
(df[col].dt.year >= 1970) &
(df[col].dt.year <= 2100)
)
# Convert to Python Date
df[col] = df[col].apply(
lambda x:
x.date()
if pd.notnull(x)
else None
)
# ---------------------------------------------------
# FORCE OBJECT COLUMNS TO STRING
# ---------------------------------------------------
else:
cleaned_col = []
for val in df[col]:
# NULL
if pd.isnull(val):
cleaned_col.append(None)
# STRING
elif isinstance(val, str):
cleaned_col.append(val)
# INTEGER
elif isinstance(
val,
(
int,
np.integer
)
):
cleaned_col.append(int(val))
# FLOAT
elif isinstance(
val,
(
float,
np.floating
)
):
cleaned_col.append(str(val))
# BOOLEAN
elif isinstance(val, bool):
cleaned_col.append(str(val))
# DATETIME
elif isinstance(
val,
(
datetime,
pd.Timestamp
)
):
cleaned_col.append(str(val))
# OTHER
else:
cleaned_col.append(str(val))
df[col] = cleaned_col
except Exception as col_error:
print("\n===================================")
print(f"COLUMN FAILED : {col}")
print(str(col_error))
print("===================================")
return df
except Exception as clean_error:
print("\n===================================")
print("DATA CLEAN FAILED")
print(str(clean_error))
print("===================================")
return df
try:
# ---------------------------------------------------
# CONNECT SQL SERVER
# ---------------------------------------------------
sql_conn = pyodbc.connect(SQL_CONN_STR)
print("Connected to SQL Server")
# ---------------------------------------------------
# CONNECT CLICKHOUSE
# ---------------------------------------------------
ch_client = clickhouse_connect.get_client(**CH_CONFIG)
print("Connected to ClickHouse")
# ---------------------------------------------------
# QUERY
# ---------------------------------------------------
query = f"""
SELECT *
FROM dbo.[{TABLE_NAME}]
WHERE Project_Id = {PROJECT_ID}
"""
print("\nExecuting Query:")
print(query)
# ---------------------------------------------------
# CHUNK SIZE
# ---------------------------------------------------
chunk_size = 100000
total_rows = 0
# ---------------------------------------------------
# READ DATA
# ---------------------------------------------------
for chunk in pd.read_sql(
query,
sql_conn,
chunksize=chunk_size
):
try:
print("\n===================================")
print(f"Processing {len(chunk)} Rows")
print("===================================")
# ---------------------------------------------------
# CLEAN DATA
# ---------------------------------------------------
chunk = clean_dataframe(chunk)
# ---------------------------------------------------
# FINAL DEBUG
# ---------------------------------------------------
print("\nFINAL COLUMN TYPES")
for col in chunk.columns:
sample = chunk[col].dropna()
if len(sample) > 0:
print(
col,
type(sample.iloc[0]),
sample.iloc[0]
)
# ---------------------------------------------------
# SAMPLE DATA
# ---------------------------------------------------
print("\nSAMPLE DATA")
print(chunk.head(2))
# ---------------------------------------------------
# INSERT INTO CLICKHOUSE
# ---------------------------------------------------
print("\nInserting into ClickHouse...")
ch_client.insert_df(
table=TABLE_NAME,
df=chunk,
database=CH_CONFIG['database']
)
total_rows += len(chunk)
print(f"\nInserted Total Rows : {total_rows}")
except Exception as chunk_error:
print("\n===================================")
print("CHUNK INSERT FAILED")
print("===================================")
print(str(chunk_error))
traceback.print_exc()
# ---------------------------------------------------
# SAVE ERROR LOG
# ---------------------------------------------------
with open(
"clickhouse_chunk_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n====================================="
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nTABLE : {TABLE_NAME}"
)
log.write(
f"\nERROR : {str(chunk_error)}"
)
log.write(
f"\nTRACEBACK :\n{traceback.format_exc()}"
)
log.write(
"\n====================================="
)
continue
print("\n===================================")
print("ETL COMPLETED SUCCESSFULLY")
print(f"TOTAL ROWS INSERTED : {total_rows}")
print("===================================")
except Exception as main_error:
print("\n===================================")
print("MAIN ERROR")
print("===================================")
print(str(main_error))
traceback.print_exc()
with open(
"clickhouse_main_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n====================================="
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nERROR : {str(main_error)}"
)
log.write(
f"\nTRACEBACK :\n{traceback.format_exc()}"
)
log.write(
"\n====================================="
)
finally:
# ---------------------------------------------------
# CLOSE SQL SERVER
# ---------------------------------------------------
try:
sql_conn.close()
print("\nSQL Server Connection Closed")
except:
pass
# ---------------------------------------------------
# CLOSE CLICKHOUSE
# ---------------------------------------------------
try:
ch_client.close()
print("ClickHouse Connection Closed")
except:
pass
print("\nETL Finished :", datetime.now())
+453
View File
@@ -0,0 +1,453 @@
import pyodbc
import pandas as pd
import clickhouse_connect
import numpy as np
from datetime import datetime
import traceback
import warnings
# =========================================================
# IGNORE WARNINGS
# =========================================================
warnings.filterwarnings(
'ignore',
'pandas only supports SQLAlchemy connectable'
)
print("ETL Started :", datetime.now())
# =========================================================
# SQL SERVER CONNECTION
# =========================================================
SQL_CONN_STR = (
'DRIVER={ODBC Driver 17 for SQL Server};'
'SERVER=10.200.25.65;'
'DATABASE=CPMIndiaBusinessInsight;'
'UID=bsgteam_test;'
'PWD=B$gt3@m#00512;'
'TrustServerCertificate=yes;'
)
# =========================================================
# CLICKHOUSE CONFIG
# =========================================================
CH_CONFIG = {
'host': '172.188.12.194',
'port': 8123,
'username': 'default',
'password': 'dipanshu_k',
'database': 'DaburIndia_BI'
}
# =========================================================
# TABLE NAME
# =========================================================
TABLE_NAME = 'Store_Master'
PROJECT_ID = 41654
# =========================================================
# CLEAN DATAFRAME
# =========================================================
def clean_dataframe(df):
try:
# ---------------------------------------------
# Replace NaN
# ---------------------------------------------
df = df.replace({np.nan: None})
# ---------------------------------------------
# Process Column Wise
# ---------------------------------------------
for col in df.columns:
try:
col_lower = col.lower()
# =====================================
# DATE COLUMNS
# =====================================
if 'date' in col_lower:
print(f"Cleaning Date Column : {col}")
df[col] = pd.to_datetime(
df[col],
errors='coerce'
)
# Remove invalid dates
df[col] = df[col].where(
(df[col].dt.year >= 1970) &
(df[col].dt.year <= 2100)
)
# Convert to Python Date
df[col] = df[col].apply(
lambda x:
x.date()
if pd.notnull(x)
else None
)
# =====================================
# INTEGER COLUMNS
# =====================================
elif pd.api.types.is_integer_dtype(df[col]):
df[col] = pd.to_numeric(
df[col],
errors='coerce'
)
df[col] = df[col].apply(
lambda x:
int(x)
if pd.notnull(x)
else None
)
# =====================================
# FLOAT COLUMNS
# =====================================
elif pd.api.types.is_float_dtype(df[col]):
non_null = df[col].dropna()
# ---------------------------------
# Convert whole float to int
# Example:
# 3240.0 -> 3240
# ---------------------------------
if len(non_null) > 0 and (
(non_null % 1 == 0).all()
):
df[col] = df[col].apply(
lambda x:
int(x)
if pd.notnull(x)
else None
)
else:
df[col] = df[col].apply(
lambda x:
float(x)
if pd.notnull(x)
else None
)
# =====================================
# OBJECT / STRING COLUMNS
# =====================================
else:
cleaned = []
for val in df[col]:
# NULL
if pd.isnull(val):
cleaned.append(None)
# INTEGER
elif isinstance(
val,
(
int,
np.integer
)
):
cleaned.append(int(val))
# FLOAT
elif isinstance(
val,
(
float,
np.floating
)
):
if np.isnan(val):
cleaned.append(None)
else:
# IMPORTANT FIX
# Avoid '3240.0' string issue
if val.is_integer():
cleaned.append(int(val))
else:
cleaned.append(float(val))
# STRING
elif isinstance(val, str):
cleaned.append(val.strip())
# BOOLEAN
elif isinstance(val, bool):
cleaned.append(int(val))
# DATETIME
elif isinstance(
val,
(
datetime,
pd.Timestamp
)
):
cleaned.append(str(val))
# OTHER
else:
cleaned.append(str(val))
df[col] = cleaned
except Exception as col_error:
print("\n================================")
print(f"COLUMN FAILED : {col}")
print(str(col_error))
print("================================")
return df
except Exception as clean_error:
print("\n================================")
print("DATA CLEAN FAILED")
print(str(clean_error))
print("================================")
return df
# =========================================================
# MAIN PROCESS
# =========================================================
try:
# =====================================================
# CONNECT SQL SERVER
# =====================================================
sql_conn = pyodbc.connect(SQL_CONN_STR)
print("Connected to SQL Server")
# =====================================================
# CONNECT CLICKHOUSE
# =====================================================
ch_client = clickhouse_connect.get_client(**CH_CONFIG)
print("Connected to ClickHouse")
# =====================================================
# QUERY
# =====================================================
query = f"""
SELECT *
FROM dbo.[{TABLE_NAME}]
WHERE Project_Id = {PROJECT_ID}
"""
print("\nExecuting Query:")
print(query)
# =====================================================
# CHUNK SIZE
# =====================================================
chunk_size = 100000
total_rows = 0
# =====================================================
# READ DATA
# =====================================================
for chunk in pd.read_sql(
query,
sql_conn,
chunksize=chunk_size
):
try:
print("\n================================")
print(f"Processing {len(chunk)} Rows")
print("================================")
# =================================================
# CLEAN DATA
# =================================================
chunk = clean_dataframe(chunk)
# =================================================
# DEBUG COLUMN TYPES
# =================================================
print("\nCOLUMN TYPES")
for col in chunk.columns:
sample = chunk[col].dropna()
if len(sample) > 0:
print(
col,
type(sample.iloc[0]),
sample.iloc[0]
)
# =================================================
# SAMPLE DATA
# =================================================
print("\nSAMPLE DATA")
print(chunk.head(2))
# =================================================
# INSERT INTO CLICKHOUSE
# =================================================
print("\nInserting into ClickHouse...")
ch_client.insert_df(
table=TABLE_NAME,
df=chunk,
database=CH_CONFIG['database']
)
total_rows += len(chunk)
print(
f"\nInserted Total Rows : {total_rows}"
)
except Exception as chunk_error:
print("\n================================")
print("CHUNK INSERT FAILED")
print("================================")
print(str(chunk_error))
traceback.print_exc()
# =============================================
# SAVE ERROR LOG
# =============================================
with open(
"clickhouse_chunk_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nTABLE : {TABLE_NAME}"
)
log.write(
f"\nERROR : {str(chunk_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
continue
print("\n================================")
print("ETL COMPLETED SUCCESSFULLY")
print(f"TOTAL ROWS INSERTED : {total_rows}")
print("================================")
# =========================================================
# MAIN ERROR
# =========================================================
except Exception as main_error:
print("\n================================")
print("MAIN ERROR")
print("================================")
print(str(main_error))
traceback.print_exc()
with open(
"clickhouse_main_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nERROR : {str(main_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
# =========================================================
# CLOSE CONNECTIONS
# =========================================================
finally:
try:
sql_conn.close()
print("\nSQL Server Connection Closed")
except:
pass
try:
ch_client.close()
print("ClickHouse Connection Closed")
except:
pass
print("\nETL Finished :", datetime.now())
+291
View File
@@ -0,0 +1,291 @@
import pyodbc
import pandas as pd
import clickhouse_connect
from datetime import datetime
import warnings
# ---------------------------------------------------
# Suppress Pandas Warning
# ---------------------------------------------------
warnings.filterwarnings(
'ignore',
'pandas only supports SQLAlchemy connectable'
)
print("SCHEMA Migration Started :", datetime.now())
# ---------------------------------------------------
# SQL Server Connection
# ---------------------------------------------------
SQL_CONN_STR = (
'DRIVER={ODBC Driver 17 for SQL Server};'
'SERVER=10.200.25.65;'
'DATABASE=CPMIndiaBusinessInsight;'
'UID=bsgteam_test;'
'PWD=B$gt3@m#00512;'
'TrustServerCertificate=yes;'
)
# ---------------------------------------------------
# ClickHouse Connection
# ---------------------------------------------------
CH_CONFIG = {
'host': '172.188.12.194',
'port': 8123,
'username': 'default',
'password': 'dipanshu_k',
'database': 'DaburIndia_BI'
}
# ---------------------------------------------------
# SQL Server → ClickHouse Datatype Mapping
# ---------------------------------------------------
DATATYPE_MAPPING = {
'bigint': 'Int64',
'int': 'Int32',
'smallint': 'Int16',
'tinyint': 'Int8',
'bit': 'UInt8',
'float': 'Float64',
'real': 'Float32',
'decimal': 'Float64',
'numeric': 'Float64',
'money': 'Float64',
'varchar': 'String',
'nvarchar': 'String',
'char': 'String',
'nchar': 'String',
'text': 'String',
'ntext': 'String',
'xml': 'String',
'date': 'Date32',
'datetime': 'DateTime64',
'datetime2': 'DateTime64',
'smalldatetime': 'DateTime64',
'time': 'String',
'uniqueidentifier': 'String',
'binary': 'String',
'varbinary': 'String'
}
try:
# ---------------------------------------------------
# Connect SQL Server
# ---------------------------------------------------
sql_conn = pyodbc.connect(SQL_CONN_STR)
print("Connected to SQL Server")
# ---------------------------------------------------
# Connect ClickHouse
# ---------------------------------------------------
ch_client = clickhouse_connect.get_client(**CH_CONFIG)
print("Connected to ClickHouse")
# ---------------------------------------------------
# Create Database if Not Exists
# ---------------------------------------------------
create_database_query = f"""
CREATE DATABASE IF NOT EXISTS `{CH_CONFIG['database']}`
"""
ch_client.command(create_database_query)
print(f"Database Verified : {CH_CONFIG['database']}")
# ---------------------------------------------------
# Get All SQL Server Tables
# ---------------------------------------------------
table_query = """
SELECT TABLE_NAME
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE = 'BASE TABLE'
AND TABLE_SCHEMA = 'dbo'
ORDER BY TABLE_NAME
"""
tables_df = pd.read_sql(table_query, sql_conn)
tables = tables_df['TABLE_NAME'].tolist()
print(f"Total Tables Found : {len(tables)}")
# ---------------------------------------------------
# Process Each Table
# ---------------------------------------------------
for table_name in tables:
try:
print("\n===================================")
print(f"Creating Table : {table_name}")
print("===================================")
# ---------------------------------------------------
# Safe Table Name
# ---------------------------------------------------
safe_table_name = (
table_name
.replace('`', '')
.replace('[', '')
.replace(']', '')
)
# ---------------------------------------------------
# Get Table Schema
# ---------------------------------------------------
schema_query = """
SELECT
COLUMN_NAME,
DATA_TYPE,
IS_NULLABLE
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = ?
ORDER BY ORDINAL_POSITION
"""
schema_df = pd.read_sql(
schema_query,
sql_conn,
params=[safe_table_name]
)
# ---------------------------------------------------
# Skip Empty Schema
# ---------------------------------------------------
if schema_df.empty:
print(f"No Columns Found : {safe_table_name}")
continue
# ---------------------------------------------------
# Build ClickHouse Columns
# ---------------------------------------------------
columns = []
for _, row in schema_df.iterrows():
col_name = str(row['COLUMN_NAME']).replace('`', '')
sql_type = str(row['DATA_TYPE']).lower()
nullable = str(row['IS_NULLABLE'])
# ---------------------------------------------------
# Get ClickHouse Datatype
# ---------------------------------------------------
ch_type = DATATYPE_MAPPING.get(
sql_type,
'String'
)
# ---------------------------------------------------
# Nullable Handling
# ---------------------------------------------------
if nullable == 'YES':
ch_type = f'Nullable({ch_type})'
columns.append(
f"`{col_name}` {ch_type}"
)
# ---------------------------------------------------
# Generate CREATE TABLE Query
# ---------------------------------------------------
create_table_query = f"""
CREATE TABLE IF NOT EXISTS `{CH_CONFIG['database']}`.`{safe_table_name}`
(
{', '.join(columns)}
)
ENGINE = MergeTree()
ORDER BY tuple()
"""
# ---------------------------------------------------
# Print SQL
# ---------------------------------------------------
print("\nGenerated CREATE TABLE SQL:\n")
print(create_table_query)
# ---------------------------------------------------
# Save SQL Log
# ---------------------------------------------------
with open(
"clickhouse_schema_debug.log",
"a",
encoding="utf-8"
) as log_file:
log_file.write("\n\n=====================================\n")
log_file.write(f"TABLE : {safe_table_name}\n")
log_file.write(create_table_query)
log_file.write("\n=====================================\n")
# ---------------------------------------------------
# Execute CREATE TABLE
# ---------------------------------------------------
ch_client.command(create_table_query)
print(f"Table Created Successfully : {safe_table_name}")
except Exception as table_error:
print("\n===================================")
print(f"FAILED TABLE : {table_name}")
print("ERROR :", str(table_error))
print("===================================")
# ---------------------------------------------------
# Error Logging
# ---------------------------------------------------
with open(
"clickhouse_schema_error.log",
"a",
encoding="utf-8"
) as error_log:
error_log.write("\n\n=====================================\n")
error_log.write(f"TABLE : {table_name}\n")
error_log.write(f"ERROR : {str(table_error)}\n")
error_log.write("=====================================\n")
continue
print("\n===================================")
print("ALL TABLE STRUCTURES CREATED")
print("===================================")
except Exception as e:
print("\n===================================")
print("MAIN ERROR :", str(e))
print("===================================")
finally:
# ---------------------------------------------------
# Close Connections
# ---------------------------------------------------
try:
sql_conn.close()
print("SQL Server Connection Closed")
except:
pass
try:
ch_client.close()
print("ClickHouse Connection Closed")
except:
pass
print("Finished :", datetime.now())
+1
View File
@@ -0,0 +1 @@
kwjhefyieafutokklkk
+206
View File
@@ -0,0 +1,206 @@
=====================================
TIME : 2026-05-18 13:21:13.727572
TABLE : Employee_Master
ERROR : object of type 'float' has no len()
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\Data_migrate_to_clickhouse.py", line 228, in <module>
ch_client.insert(
~~~~~~~~~~~~~~~~^
table=TABLE_NAME,
^^^^^^^^^^^^^^^^^
...<2 lines>...
database=CH_CONFIG['database']
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 977, in insert
context.data = data
^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 97, in data
self.block_row_count = self._calc_block_size()
~~~~~~~~~~~~~~~~~~~~~^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 119, in _calc_block_size
d_size = d_type.data_size(sample)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 108, in data_size
d_size = self._data_size(sample)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\string.py", line 31, in _data_size
total += len(x)
~~~^^^
TypeError: object of type 'float' has no len()
=====================================
=====================================
TIME : 2026-05-18 13:25:28.924657
TABLE : Employee_Master
ERROR : object of type 'float' has no len()
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\Data_migrate_to_clickhouse.py", line 240, in <module>
ch_client.insert(
~~~~~~~~~~~~~~~~^
table=TABLE_NAME,
^^^^^^^^^^^^^^^^^
...<2 lines>...
database=CH_CONFIG['database']
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 977, in insert
context.data = data
^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 97, in data
self.block_row_count = self._calc_block_size()
~~~~~~~~~~~~~~~~~~~~~^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 119, in _calc_block_size
d_size = d_type.data_size(sample)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 108, in data_size
d_size = self._data_size(sample)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\string.py", line 31, in _data_size
total += len(x)
~~~^^^
TypeError: object of type 'float' has no len()
=====================================
=====================================
TIME : 2026-05-18 13:28:12.319405
TABLE : Employee_Master
ERROR : object of type 'float' has no len()
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\Data_migrate_to_clickhouse.py", line 240, in <module>
ch_client.insert(
~~~~~~~~~~~~~~~~^
table=TABLE_NAME,
^^^^^^^^^^^^^^^^^
...<2 lines>...
database=CH_CONFIG['database']
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 977, in insert
context.data = data
^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 97, in data
self.block_row_count = self._calc_block_size()
~~~~~~~~~~~~~~~~~~~~~^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 119, in _calc_block_size
d_size = d_type.data_size(sample)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 108, in data_size
d_size = self._data_size(sample)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\string.py", line 31, in _data_size
total += len(x)
~~~^^^
TypeError: object of type 'float' has no len()
=====================================
=====================================
TIME : 2026-05-18 13:50:45.035193
TABLE : cpm_city_master
ERROR : invalid literal for int() with base 10: '3240.0'
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\Data_migrate_to_clickhouse.py", line 254, in <module>
ch_client.insert_df(
~~~~~~~~~~~~~~~~~~~^
table=TABLE_NAME,
^^^^^^^^^^^^^^^^^
df=chunk,
^^^^^^^^^
database=CH_CONFIG['database']
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 1013, in insert_df
return self.insert(table,
~~~~~~~~~~~^^^^^^^
df,
^^^
...<5 lines>...
transport_settings=transport_settings,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
context=context)
^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 978, in insert
return self.data_insert(context)
~~~~~~~~~~~~~~~~^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 347, in data_insert
response = self._raw_request(block_gen, params, headers, error_handler=error_handler, server_wait=False)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 569, in _raw_request
error_handler(response)
~~~~~~~~~~~~~^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 331, in error_handler
raise ex
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\transform.py", line 114, in chunk_gen
col_type.write_column(data, output, context)
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 216, in write_column
self.write_column_data(column, dest, ctx)
~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 231, in write_column_data
self._write_column_binary(column, dest, ctx)
~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\numeric.py", line 29, in _write_column_binary
column = [int(x) if x else 0 for x in column]
~~~^^^
ValueError: invalid literal for int() with base 10: '3240.0'
=====================================
=====================================
TIME : 2026-05-18 15:13:42.747872
TABLE : cpm_city_master
ERROR : invalid literal for int() with base 10: '3240.0'
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\City_Master_Import.py", line 256, in <module>
ch_client.insert_df(
~~~~~~~~~~~~~~~~~~~^
table=TABLE_NAME,
^^^^^^^^^^^^^^^^^
df=chunk,
^^^^^^^^^
database=CH_CONFIG['database']
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 1013, in insert_df
return self.insert(table,
~~~~~~~~~~~^^^^^^^
df,
^^^
...<5 lines>...
transport_settings=transport_settings,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
context=context)
^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 978, in insert
return self.data_insert(context)
~~~~~~~~~~~~~~~~^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 347, in data_insert
response = self._raw_request(block_gen, params, headers, error_handler=error_handler, server_wait=False)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 569, in _raw_request
error_handler(response)
~~~~~~~~~~~~~^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 331, in error_handler
raise ex
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\transform.py", line 114, in chunk_gen
col_type.write_column(data, output, context)
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 216, in write_column
self.write_column_data(column, dest, ctx)
~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 231, in write_column_data
self._write_column_binary(column, dest, ctx)
~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\numeric.py", line 29, in _write_column_binary
column = [int(x) if x else 0 for x in column]
~~~^^^
ValueError: invalid literal for int() with base 10: '3240.0'
=====================================
+64
View File
@@ -0,0 +1,64 @@
=================================
TIME : 2026-05-18 13:15:13.522883
ERROR : object of type 'float' has no len()
Traceback (most recent call last):
File "d:\Python Code\Data_migrate_to_clickhouse.py", line 194, in <module>
ch_client.insert(
~~~~~~~~~~~~~~~~^
table=TABLE_NAME,
^^^^^^^^^^^^^^^^^
...<2 lines>...
database=CH_CONFIG['database']
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 977, in insert
context.data = data
^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 97, in data
self.block_row_count = self._calc_block_size()
~~~~~~~~~~~~~~~~~~~~~^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 119, in _calc_block_size
d_size = d_type.data_size(sample)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 108, in data_size
d_size = self._data_size(sample)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\string.py", line 31, in _data_size
total += len(x)
~~~^^^
TypeError: object of type 'float' has no len()
=================================
=================================
TIME : 2026-05-18 13:17:54.779420
ERROR : object of type 'float' has no len()
Traceback (most recent call last):
File "d:\Python Code\Data_migrate_to_clickhouse.py", line 194, in <module>
ch_client.insert(
~~~~~~~~~~~~~~~~^
table=TABLE_NAME,
^^^^^^^^^^^^^^^^^
...<2 lines>...
database=CH_CONFIG['database']
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 977, in insert
context.data = data
^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 97, in data
self.block_row_count = self._calc_block_size()
~~~~~~~~~~~~~~~~~~~~~^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\insert.py", line 119, in _calc_block_size
d_size = d_type.data_size(sample)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 108, in data_size
d_size = self._data_size(sample)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\string.py", line 31, in _data_size
total += len(x)
~~~^^^
TypeError: object of type 'float' has no len()
=================================
+24
View File
@@ -0,0 +1,24 @@
=====================================
TIME : 2026-05-18 13:49:49.650758
ERROR : name 'PROJECT_ID' is not defined
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\Data_migrate_to_clickhouse.py", line 193, in <module>
WHERE Project_Id = {PROJECT_ID}
^^^^^^^^^^
NameError: name 'PROJECT_ID' is not defined
=====================================
=====================================
TIME : 2026-05-18 15:03:23.790219
ERROR : ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: Timeout error [258]. (258) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Unable to complete login process due to delay in login response (258)')
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\City_Master_Import.py", line 178, in <module>
sql_conn = pyodbc.connect(SQL_CONN_STR)
pyodbc.OperationalError: ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: Timeout error [258]. (258) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Unable to complete login process due to delay in login response (258)')
=====================================
File diff suppressed because it is too large Load Diff