Journey_Plan data Import

This commit is contained in:
Dipanshu Kumar
2026-05-20 12:40:47 +05:30
parent 3be8cd7259
commit 579d59e1b0
8 changed files with 15087 additions and 432 deletions
+321 -171
View File
@@ -1,5 +1,3 @@
import pyodbc import pyodbc
import pandas as pd import pandas as pd
import clickhouse_connect import clickhouse_connect
@@ -7,6 +5,7 @@ import numpy as np
from datetime import datetime from datetime import datetime
import traceback import traceback
import warnings import warnings
import time
# ========================================================= # =========================================================
# IGNORE WARNINGS # IGNORE WARNINGS
@@ -16,10 +15,12 @@ warnings.filterwarnings(
'pandas only supports SQLAlchemy connectable' 'pandas only supports SQLAlchemy connectable'
) )
print("ETL Started :", datetime.now()) print("\n================================================")
print("ETL STARTED :", datetime.now())
print("================================================")
# ========================================================= # =========================================================
# SQL SERVER CONNECTION # SQL SERVER CONNECTION STRING
# ========================================================= # =========================================================
SQL_CONN_STR = ( SQL_CONN_STR = (
'DRIVER={ODBC Driver 17 for SQL Server};' 'DRIVER={ODBC Driver 17 for SQL Server};'
@@ -28,6 +29,7 @@ SQL_CONN_STR = (
'UID=bsgteam_test;' 'UID=bsgteam_test;'
'PWD=B$gt3@m#00512;' 'PWD=B$gt3@m#00512;'
'TrustServerCertificate=yes;' 'TrustServerCertificate=yes;'
'Connection Timeout=60;'
) )
# ========================================================= # =========================================================
@@ -42,11 +44,67 @@ CH_CONFIG = {
} }
# ========================================================= # =========================================================
# TABLE NAME # TABLE DETAILS
# ========================================================= # =========================================================
TABLE_NAME = 'Journey_Plan' TABLE_NAME = 'Journey_Plan'
PROJECT_ID = 41654 PROJECT_ID = 41654
# =========================================================
# SETTINGS
# =========================================================
TRUNCATE_BEFORE_LOAD = True
table_truncated = False
# =========================================================
# CHUNK SIZE
# =========================================================
chunk_size = 20000
# =========================================================
# CONNECT SQL SERVER
# =========================================================
def connect_sql():
try:
conn = pyodbc.connect(
SQL_CONN_STR,
autocommit=True
)
print("Connected SQL Server")
return conn
except Exception as e:
print("SQL CONNECTION FAILED")
print(str(e))
raise
# =========================================================
# CONNECT CLICKHOUSE
# =========================================================
def connect_clickhouse():
try:
client = clickhouse_connect.get_client(
**CH_CONFIG
)
print("Connected ClickHouse")
return client
except Exception as e:
print("CLICKHOUSE CONNECTION FAILED")
print(str(e))
raise
# ========================================================= # =========================================================
# CLEAN DATAFRAME # CLEAN DATAFRAME
# ========================================================= # =========================================================
@@ -66,39 +124,46 @@ def clean_dataframe(df):
try: try:
col_lower = col.lower() print(f"\nCleaning Column : {col}")
# ===================================== # =====================================
# DATE COLUMNS # AUTO DETECT DATE / TIME COLUMNS
# ===================================== # =====================================
if 'date' in col_lower: if (
'date' in col.lower()
or 'time' in col.lower()
):
print(f"Cleaning Date Column : {col}") print(f"Date Column : {col}")
df[col] = pd.to_datetime( df[col] = pd.to_datetime(
df[col], df[col],
errors='coerce' errors='coerce'
) )
# Remove invalid dates cleaned_dates = []
df[col] = df[col].where(
(df[col].dt.year >= 1970) &
(df[col].dt.year <= 2100)
)
# Convert to Python Date for val in df[col]:
df[col] = df[col].apply(
lambda x: if pd.isnull(val):
x.date()
if pd.notnull(x) cleaned_dates.append(None)
else None
) else:
cleaned_dates.append(
val.to_pydatetime()
)
df[col] = cleaned_dates
# ===================================== # =====================================
# INTEGER COLUMNS # INTEGER COLUMNS
# ===================================== # =====================================
elif pd.api.types.is_integer_dtype(df[col]): elif pd.api.types.is_integer_dtype(df[col]):
print(f"Integer Column : {col}")
df[col] = pd.to_numeric( df[col] = pd.to_numeric(
df[col], df[col],
errors='coerce' errors='coerce'
@@ -116,38 +181,27 @@ def clean_dataframe(df):
# ===================================== # =====================================
elif pd.api.types.is_float_dtype(df[col]): elif pd.api.types.is_float_dtype(df[col]):
non_null = df[col].dropna() print(f"Float Column : {col}")
# --------------------------------- df[col] = pd.to_numeric(
# Convert whole float to int df[col],
# Example: errors='coerce'
# 3240.0 -> 3240 )
# ---------------------------------
if len(non_null) > 0 and (
(non_null % 1 == 0).all()
):
df[col] = df[col].apply( df[col] = df[col].apply(
lambda x: lambda x:
int(x) float(x)
if pd.notnull(x) if pd.notnull(x)
else None else None
) )
else:
df[col] = df[col].apply(
lambda x:
float(x)
if pd.notnull(x)
else None
)
# ===================================== # =====================================
# OBJECT / STRING COLUMNS # OBJECT / STRING COLUMNS
# ===================================== # =====================================
else: else:
print(f"String/Object Column : {col}")
cleaned = [] cleaned = []
for val in df[col]: for val in df[col]:
@@ -157,6 +211,28 @@ def clean_dataframe(df):
cleaned.append(None) cleaned.append(None)
# DATETIME
elif isinstance(
val,
(
datetime,
pd.Timestamp
)
):
if isinstance(
val,
pd.Timestamp
):
cleaned.append(
val.to_pydatetime()
)
else:
cleaned.append(val)
# INTEGER # INTEGER
elif isinstance( elif isinstance(
val, val,
@@ -166,7 +242,9 @@ def clean_dataframe(df):
) )
): ):
cleaned.append(int(val)) cleaned.append(
int(val)
)
# FLOAT # FLOAT
elif isinstance( elif isinstance(
@@ -183,41 +261,26 @@ def clean_dataframe(df):
else: else:
# IMPORTANT FIX cleaned.append(
# Avoid '3240.0' string issue float(val)
if val.is_integer(): )
cleaned.append(int(val))
else:
cleaned.append(float(val))
# STRING
elif isinstance(val, str):
cleaned.append(val.strip())
# BOOLEAN # BOOLEAN
elif isinstance(val, bool):
cleaned.append(int(val))
# DATETIME
elif isinstance( elif isinstance(
val, val,
( bool
datetime,
pd.Timestamp
)
): ):
cleaned.append(str(val)) cleaned.append(
int(val)
)
# OTHER # STRING
else: else:
cleaned.append(str(val)) cleaned.append(
str(val).strip()
)
df[col] = cleaned df[col] = cleaned
@@ -247,16 +310,12 @@ try:
# ===================================================== # =====================================================
# CONNECT SQL SERVER # CONNECT SQL SERVER
# ===================================================== # =====================================================
sql_conn = pyodbc.connect(SQL_CONN_STR) sql_conn = connect_sql()
print("Connected to SQL Server")
# ===================================================== # =====================================================
# CONNECT CLICKHOUSE # CONNECT CLICKHOUSE
# ===================================================== # =====================================================
ch_client = clickhouse_connect.get_client(**CH_CONFIG) ch_client = connect_clickhouse()
print("Connected to ClickHouse")
# ===================================================== # =====================================================
# QUERY # QUERY
@@ -267,125 +326,214 @@ try:
WHERE Project_Id = {PROJECT_ID} WHERE Project_Id = {PROJECT_ID}
""" """
print("\nExecuting Query:") print("\nExecuting Query")
print(query) print(query)
# ===================================================== # =====================================================
# CHUNK SIZE # RETRY SETTINGS
# ===================================================== # =====================================================
chunk_size = 100000 retry_count = 0
max_retry = 5
total_rows = 0
# ===================================================== # =====================================================
# READ DATA # MAIN RETRY LOOP
# ===================================================== # =====================================================
for chunk in pd.read_sql( while retry_count < max_retry:
query,
sql_conn,
chunksize=chunk_size
):
try: try:
print("\n================================") # =============================================
print(f"Processing {len(chunk)} Rows") # READ SQL DATA
print("================================") # =============================================
for chunk in pd.read_sql(
query,
sql_conn,
chunksize=chunk_size
):
# ================================================= try:
# CLEAN DATA
# =================================================
chunk = clean_dataframe(chunk)
# =================================================
# DEBUG COLUMN TYPES
# =================================================
print("\nCOLUMN TYPES")
for col in chunk.columns:
sample = chunk[col].dropna()
if len(sample) > 0:
print("\n================================")
print( print(
col, f"Processing Rows : "
type(sample.iloc[0]), f"{len(chunk)}"
sample.iloc[0] )
print("================================")
# =====================================
# CLEAN DATA
# =====================================
chunk = clean_dataframe(chunk)
# =====================================
# DEBUG COLUMN TYPES
# =====================================
print("\nCOLUMN DATATYPES")
print(chunk.dtypes)
print("\nCOLUMN SAMPLE TYPES")
for col in chunk.columns:
sample = chunk[col].dropna()
if len(sample) > 0:
print(
col,
type(sample.iloc[0]),
sample.iloc[0]
)
# =====================================
# TRUNCATE TABLE FIRST TIME ONLY
# =====================================
if (
TRUNCATE_BEFORE_LOAD
and
not table_truncated
):
print("\n================================")
print(
f"TRUNCATING TABLE : "
f"{TABLE_NAME}"
)
print("================================")
# IMPORTANT FIX
truncate_query = f"""
TRUNCATE TABLE
`{CH_CONFIG['database']}`.`{TABLE_NAME}`
"""
ch_client.command(
truncate_query
)
print(
"TABLE TRUNCATED SUCCESSFULLY"
)
table_truncated = True
# =====================================
# INSERT INTO CLICKHOUSE
# =====================================
print(
"\nINSERTING INTO CLICKHOUSE..."
) )
# ================================================= ch_client.insert_df(
# SAMPLE DATA table=TABLE_NAME,
# ================================================= df=chunk,
print("\nSAMPLE DATA") database=CH_CONFIG['database']
print(chunk.head(2)) )
# ================================================= print(
# INSERT INTO CLICKHOUSE f"INSERTED : "
# ================================================= f"{len(chunk)} ROWS"
print("\nInserting into ClickHouse...") )
ch_client.insert_df( except Exception as insert_error:
table=TABLE_NAME,
df=chunk,
database=CH_CONFIG['database']
)
total_rows += len(chunk) print("\n================================")
print("INSERT FAILED")
print("================================")
print( print(str(insert_error))
f"\nInserted Total Rows : {total_rows}"
)
except Exception as chunk_error: traceback.print_exc()
# =================================
# SAVE ERROR LOG
# =================================
with open(
"insert_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : "
f"{datetime.now()}"
)
log.write(
f"\nTABLE : "
f"{TABLE_NAME}"
)
log.write(
f"\nERROR : "
f"{str(insert_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
continue
# =============================================
# SUCCESS
# =============================================
break
# =================================================
# SQL CONNECTION FAILURE
# =================================================
except pyodbc.OperationalError as op_error:
retry_count += 1
print("\n================================") print("\n================================")
print("CHUNK INSERT FAILED") print(
f"SQL CONNECTION LOST "
f"- RETRY {retry_count}"
)
print("================================") print("================================")
print(str(chunk_error)) print(str(op_error))
time.sleep(10)
try:
sql_conn.close()
except:
pass
# RECONNECT SQL
sql_conn = connect_sql()
# =================================================
# OTHER ERROR
# =================================================
except Exception as loop_error:
print("\n================================")
print("MAIN LOOP ERROR")
print("================================")
print(str(loop_error))
traceback.print_exc() traceback.print_exc()
# ============================================= break
# SAVE ERROR LOG
# =============================================
with open(
"clickhouse_chunk_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nTABLE : {TABLE_NAME}"
)
log.write(
f"\nERROR : {str(chunk_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
continue
print("\n================================") print("\n================================")
print("ETL COMPLETED SUCCESSFULLY") print("ETL COMPLETED SUCCESSFULLY")
print(f"TOTAL ROWS INSERTED : {total_rows}")
print("================================") print("================================")
# ========================================================= # =========================================================
@@ -402,7 +550,7 @@ except Exception as main_error:
traceback.print_exc() traceback.print_exc()
with open( with open(
"clickhouse_main_error.log", "main_error.log",
"a", "a",
encoding="utf-8" encoding="utf-8"
) as log: ) as log:
@@ -437,7 +585,7 @@ finally:
sql_conn.close() sql_conn.close()
print("\nSQL Server Connection Closed") print("\nSQL SERVER CONNECTION CLOSED")
except: except:
pass pass
@@ -446,9 +594,11 @@ finally:
ch_client.close() ch_client.close()
print("ClickHouse Connection Closed") print("CLICKHOUSE CONNECTION CLOSED")
except: except:
pass pass
print("\nETL Finished :", datetime.now()) print("\n================================================")
print("ETL FINISHED :", datetime.now())
print("================================================")
+293 -261
View File
@@ -5,6 +5,7 @@ import numpy as np
from datetime import datetime from datetime import datetime
import traceback import traceback
import warnings import warnings
import time
# ========================================================= # =========================================================
# IGNORE WARNINGS # IGNORE WARNINGS
@@ -14,10 +15,12 @@ warnings.filterwarnings(
'pandas only supports SQLAlchemy connectable' 'pandas only supports SQLAlchemy connectable'
) )
print("ETL Started :", datetime.now()) print("\n================================================")
print("ETL STARTED :", datetime.now())
print("================================================")
# ========================================================= # =========================================================
# SQL SERVER CONNECTION # SQL SERVER CONNECTION STRING
# ========================================================= # =========================================================
SQL_CONN_STR = ( SQL_CONN_STR = (
'DRIVER={ODBC Driver 17 for SQL Server};' 'DRIVER={ODBC Driver 17 for SQL Server};'
@@ -26,6 +29,7 @@ SQL_CONN_STR = (
'UID=bsgteam_test;' 'UID=bsgteam_test;'
'PWD=B$gt3@m#00512;' 'PWD=B$gt3@m#00512;'
'TrustServerCertificate=yes;' 'TrustServerCertificate=yes;'
'Connection Timeout=60;'
) )
# ========================================================= # =========================================================
@@ -40,31 +44,66 @@ CH_CONFIG = {
} }
# ========================================================= # =========================================================
# TABLE NAME # TABLE DETAILS
# ========================================================= # =========================================================
TABLE_NAME = 'PaidVisibility_Compliance' TABLE_NAME = 'Sales'
PROJECT_ID = 41654 PROJECT_ID = 41654
# ========================================================= # =========================================================
# LOAD SETTINGS # SETTINGS
# ========================================================= # =========================================================
TRUNCATE_BEFORE_LOAD = True TRUNCATE_BEFORE_LOAD = True
table_truncated = False table_truncated = False
# ========================================================= # =========================================================
# CLICKHOUSE DATE COLUMNS # CHUNK SIZE
# ========================================================= # =========================================================
DATE_COLUMNS = [ chunk_size = 20000
'visit_date'
]
# ========================================================= # =========================================================
# CLICKHOUSE DATETIME COLUMNS # CONNECT SQL SERVER
# ========================================================= # =========================================================
DATETIME_COLUMNS = [ def connect_sql():
'create_date',
'update_date' try:
]
conn = pyodbc.connect(
SQL_CONN_STR,
autocommit=True
)
print("Connected SQL Server")
return conn
except Exception as e:
print("SQL CONNECTION FAILED")
print(str(e))
raise
# =========================================================
# CONNECT CLICKHOUSE
# =========================================================
def connect_clickhouse():
try:
client = clickhouse_connect.get_client(
**CH_CONFIG
)
print("Connected ClickHouse")
return client
except Exception as e:
print("CLICKHOUSE CONNECTION FAILED")
print(str(e))
raise
# ========================================================= # =========================================================
# CLEAN DATAFRAME # CLEAN DATAFRAME
@@ -79,7 +118,7 @@ def clean_dataframe(df):
df = df.replace({np.nan: None}) df = df.replace({np.nan: None})
# --------------------------------------------- # ---------------------------------------------
# Process Columns # Process Column Wise
# --------------------------------------------- # ---------------------------------------------
for col in df.columns: for col in df.columns:
@@ -88,13 +127,14 @@ def clean_dataframe(df):
print(f"\nCleaning Column : {col}") print(f"\nCleaning Column : {col}")
# ===================================== # =====================================
# DATE32 COLUMNS # AUTO DETECT DATE / TIME COLUMNS
# ===================================== # =====================================
if col.lower() in [ if (
x.lower() for x in DATE_COLUMNS 'date' in col.lower()
]: or 'time' in col.lower()
):
print(f"Date32 Column : {col}") print(f"Date Column : {col}")
df[col] = pd.to_datetime( df[col] = pd.to_datetime(
df[col], df[col],
@@ -111,43 +151,11 @@ def clean_dataframe(df):
else: else:
# IMPORTANT FIX
cleaned_dates.append( cleaned_dates.append(
val.date()
)
df[col] = cleaned_dates
# =====================================
# DATETIME COLUMNS
# =====================================
elif col.lower() in [
x.lower() for x in DATETIME_COLUMNS
]:
print(f"DateTime Column : {col}")
df[col] = pd.to_datetime(
df[col],
errors='coerce'
)
cleaned_datetime = []
for val in df[col]:
if pd.isnull(val):
cleaned_datetime.append(None)
else:
# IMPORTANT FIX
cleaned_datetime.append(
val.to_pydatetime() val.to_pydatetime()
) )
df[col] = cleaned_datetime df[col] = cleaned_dates
# ===================================== # =====================================
# INTEGER COLUMNS # INTEGER COLUMNS
@@ -180,28 +188,12 @@ def clean_dataframe(df):
errors='coerce' errors='coerce'
) )
non_null = df[col].dropna() df[col] = df[col].apply(
lambda x:
# Convert float to int if possible float(x)
if len(non_null) > 0 and ( if pd.notnull(x)
(non_null % 1 == 0).all() else None
): )
df[col] = df[col].apply(
lambda x:
int(x)
if pd.notnull(x)
else None
)
else:
df[col] = df[col].apply(
lambda x:
float(x)
if pd.notnull(x)
else None
)
# ===================================== # =====================================
# OBJECT / STRING COLUMNS # OBJECT / STRING COLUMNS
@@ -219,19 +211,27 @@ def clean_dataframe(df):
cleaned.append(None) cleaned.append(None)
# STRING # DATETIME
elif isinstance(val, str): elif isinstance(
val,
cleaned.append( (
val.strip() datetime,
pd.Timestamp
) )
):
# BOOLEAN if isinstance(
elif isinstance(val, bool): val,
pd.Timestamp
):
cleaned.append( cleaned.append(
int(val) val.to_pydatetime()
) )
else:
cleaned.append(val)
# INTEGER # INTEGER
elif isinstance( elif isinstance(
@@ -261,50 +261,25 @@ def clean_dataframe(df):
else: else:
if val.is_integer():
cleaned.append(
int(val)
)
else:
cleaned.append(
float(val)
)
# DATETIME
elif isinstance(
val,
(
datetime,
pd.Timestamp
)
):
if isinstance(
val,
pd.Timestamp
):
cleaned.append( cleaned.append(
val.to_pydatetime() float(val)
) )
else: # BOOLEAN
elif isinstance(
val,
bool
):
cleaned.append(val) cleaned.append(
int(val)
)
# DATE # STRING
elif hasattr(val, 'year') and hasattr(val, 'month'):
cleaned.append(val)
# OTHER
else: else:
cleaned.append( cleaned.append(
str(val) str(val).strip()
) )
df[col] = cleaned df[col] = cleaned
@@ -335,16 +310,12 @@ try:
# ===================================================== # =====================================================
# CONNECT SQL SERVER # CONNECT SQL SERVER
# ===================================================== # =====================================================
sql_conn = pyodbc.connect(SQL_CONN_STR) sql_conn = connect_sql()
print("Connected to SQL Server")
# ===================================================== # =====================================================
# CONNECT CLICKHOUSE # CONNECT CLICKHOUSE
# ===================================================== # =====================================================
ch_client = clickhouse_connect.get_client(**CH_CONFIG) ch_client = connect_clickhouse()
print("Connected to ClickHouse")
# ===================================================== # =====================================================
# QUERY # QUERY
@@ -355,156 +326,214 @@ try:
WHERE Project_Id = {PROJECT_ID} WHERE Project_Id = {PROJECT_ID}
""" """
print("\nExecuting Query:") print("\nExecuting Query")
print(query) print(query)
# ===================================================== # =====================================================
# CHUNK SIZE # RETRY SETTINGS
# ===================================================== # =====================================================
chunk_size = 100000 retry_count = 0
max_retry = 5
total_rows = 0
# ===================================================== # =====================================================
# READ DATA # MAIN RETRY LOOP
# ===================================================== # =====================================================
for chunk in pd.read_sql( while retry_count < max_retry:
query,
sql_conn,
chunksize=chunk_size
):
try: try:
# =============================================
# READ SQL DATA
# =============================================
for chunk in pd.read_sql(
query,
sql_conn,
chunksize=chunk_size
):
try:
print("\n================================")
print(
f"Processing Rows : "
f"{len(chunk)}"
)
print("================================")
# =====================================
# CLEAN DATA
# =====================================
chunk = clean_dataframe(chunk)
# =====================================
# DEBUG COLUMN TYPES
# =====================================
print("\nCOLUMN DATATYPES")
print(chunk.dtypes)
print("\nCOLUMN SAMPLE TYPES")
for col in chunk.columns:
sample = chunk[col].dropna()
if len(sample) > 0:
print(
col,
type(sample.iloc[0]),
sample.iloc[0]
)
# =====================================
# TRUNCATE TABLE FIRST TIME ONLY
# =====================================
if (
TRUNCATE_BEFORE_LOAD
and
not table_truncated
):
print("\n================================")
print(
f"TRUNCATING TABLE : "
f"{TABLE_NAME}"
)
print("================================")
# IMPORTANT FIX
truncate_query = f"""
TRUNCATE TABLE
`{CH_CONFIG['database']}`.`{TABLE_NAME}`
"""
ch_client.command(
truncate_query
)
print(
"TABLE TRUNCATED SUCCESSFULLY"
)
table_truncated = True
# =====================================
# INSERT INTO CLICKHOUSE
# =====================================
print(
"\nINSERTING INTO CLICKHOUSE..."
)
ch_client.insert_df(
table=TABLE_NAME,
df=chunk,
database=CH_CONFIG['database']
)
print(
f"INSERTED : "
f"{len(chunk)} ROWS"
)
except Exception as insert_error:
print("\n================================")
print("INSERT FAILED")
print("================================")
print(str(insert_error))
traceback.print_exc()
# =================================
# SAVE ERROR LOG
# =================================
with open(
"insert_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : "
f"{datetime.now()}"
)
log.write(
f"\nTABLE : "
f"{TABLE_NAME}"
)
log.write(
f"\nERROR : "
f"{str(insert_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
continue
# =============================================
# SUCCESS
# =============================================
break
# =================================================
# SQL CONNECTION FAILURE
# =================================================
except pyodbc.OperationalError as op_error:
retry_count += 1
print("\n================================") print("\n================================")
print(f"Processing {len(chunk)} Rows")
print("================================")
# =================================================
# CLEAN DATA
# =================================================
chunk = clean_dataframe(chunk)
# =================================================
# DEBUG COLUMN TYPES
# =================================================
print("\nCOLUMN TYPES")
for col in chunk.columns:
sample = chunk[col].dropna()
if len(sample) > 0:
print(
col,
type(sample.iloc[0]),
sample.iloc[0]
)
# =================================================
# DEBUG DATE COLUMN
# =================================================
if 'visit_date' in chunk.columns:
print("\nvisit_date Sample")
print(chunk['visit_date'].head())
sample = chunk['visit_date'].dropna()
if len(sample) > 0:
print(
"visit_date datatype:",
type(sample.iloc[0])
)
# =================================================
# TRUNCATE TABLE
# =================================================
if TRUNCATE_BEFORE_LOAD and not table_truncated:
print("\n================================")
print(f"TRUNCATING TABLE : {TABLE_NAME}")
print("================================")
truncate_query = f"""
TRUNCATE TABLE
{CH_CONFIG['database']}.{TABLE_NAME}
"""
ch_client.command(truncate_query)
print("TABLE TRUNCATED SUCCESSFULLY")
table_truncated = True
# =================================================
# INSERT INTO CLICKHOUSE
# =================================================
print("\nInserting into ClickHouse...")
ch_client.insert_df(
table=TABLE_NAME,
df=chunk,
database=CH_CONFIG['database']
)
total_rows += len(chunk)
print( print(
f"\nInserted Total Rows : {total_rows}" f"SQL CONNECTION LOST "
f"- RETRY {retry_count}"
) )
except Exception as chunk_error:
print("\n================================")
print("CHUNK INSERT FAILED")
print("================================") print("================================")
print(str(chunk_error)) print(str(op_error))
time.sleep(10)
try:
sql_conn.close()
except:
pass
# RECONNECT SQL
sql_conn = connect_sql()
# =================================================
# OTHER ERROR
# =================================================
except Exception as loop_error:
print("\n================================")
print("MAIN LOOP ERROR")
print("================================")
print(str(loop_error))
traceback.print_exc() traceback.print_exc()
# ============================================= break
# SAVE ERROR LOG
# =============================================
with open(
"clickhouse_chunk_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nTABLE : {TABLE_NAME}"
)
log.write(
f"\nERROR : {str(chunk_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
continue
print("\n================================") print("\n================================")
print("ETL COMPLETED SUCCESSFULLY") print("ETL COMPLETED SUCCESSFULLY")
print(f"TOTAL ROWS INSERTED : {total_rows}")
print("================================") print("================================")
# ========================================================= # =========================================================
@@ -521,7 +550,7 @@ except Exception as main_error:
traceback.print_exc() traceback.print_exc()
with open( with open(
"clickhouse_main_error.log", "main_error.log",
"a", "a",
encoding="utf-8" encoding="utf-8"
) as log: ) as log:
@@ -555,7 +584,8 @@ finally:
try: try:
sql_conn.close() sql_conn.close()
print("\nSQL Server Connection Closed")
print("\nSQL SERVER CONNECTION CLOSED")
except: except:
pass pass
@@ -563,10 +593,12 @@ finally:
try: try:
ch_client.close() ch_client.close()
print("ClickHouse Connection Closed")
print("CLICKHOUSE CONNECTION CLOSED")
except: except:
pass pass
print("\nETL Finished :", datetime.now()) print("\n================================================")
print("ETL FINISHED :", datetime.now())
print("================================================")
+604
View File
@@ -0,0 +1,604 @@
import pyodbc
import pandas as pd
import clickhouse_connect
import numpy as np
from datetime import datetime
import traceback
import warnings
import time
# =========================================================
# IGNORE WARNINGS
# =========================================================
warnings.filterwarnings(
'ignore',
'pandas only supports SQLAlchemy connectable'
)
print("\n================================================")
print("ETL STARTED :", datetime.now())
print("================================================")
# =========================================================
# SQL SERVER CONNECTION STRING
# =========================================================
SQL_CONN_STR = (
'DRIVER={ODBC Driver 17 for SQL Server};'
'SERVER=10.200.25.65;'
'DATABASE=CPMIndiaBusinessInsight;'
'UID=bsgteam_test;'
'PWD=B$gt3@m#00512;'
'TrustServerCertificate=yes;'
'Connection Timeout=60;'
)
# =========================================================
# CLICKHOUSE CONFIG
# =========================================================
CH_CONFIG = {
'host': '172.188.12.194',
'port': 8123,
'username': 'default',
'password': 'dipanshu_k',
'database': 'DaburIndia_BI'
}
# =========================================================
# TABLE DETAILS
# =========================================================
TABLE_NAME = 'Sales'
PROJECT_ID = 41654
# =========================================================
# SETTINGS
# =========================================================
TRUNCATE_BEFORE_LOAD = True
table_truncated = False
# =========================================================
# CHUNK SIZE
# =========================================================
chunk_size = 10000
# =========================================================
# CONNECT SQL SERVER
# =========================================================
def connect_sql():
try:
conn = pyodbc.connect(
SQL_CONN_STR,
autocommit=True
)
print("Connected SQL Server")
return conn
except Exception as e:
print("SQL CONNECTION FAILED")
print(str(e))
raise
# =========================================================
# CONNECT CLICKHOUSE
# =========================================================
def connect_clickhouse():
try:
client = clickhouse_connect.get_client(
**CH_CONFIG
)
print("Connected ClickHouse")
return client
except Exception as e:
print("CLICKHOUSE CONNECTION FAILED")
print(str(e))
raise
# =========================================================
# CLEAN DATAFRAME
# =========================================================
def clean_dataframe(df):
try:
# ---------------------------------------------
# Replace NaN
# ---------------------------------------------
df = df.replace({np.nan: None})
# ---------------------------------------------
# Process Column Wise
# ---------------------------------------------
for col in df.columns:
try:
print(f"\nCleaning Column : {col}")
# =====================================
# AUTO DETECT DATE / TIME COLUMNS
# =====================================
if (
'date' in col.lower()
or 'time' in col.lower()
):
print(f"Date Column : {col}")
df[col] = pd.to_datetime(
df[col],
errors='coerce'
)
cleaned_dates = []
for val in df[col]:
if pd.isnull(val):
cleaned_dates.append(None)
else:
cleaned_dates.append(
val.to_pydatetime()
)
df[col] = cleaned_dates
# =====================================
# INTEGER COLUMNS
# =====================================
elif pd.api.types.is_integer_dtype(df[col]):
print(f"Integer Column : {col}")
df[col] = pd.to_numeric(
df[col],
errors='coerce'
)
df[col] = df[col].apply(
lambda x:
int(x)
if pd.notnull(x)
else None
)
# =====================================
# FLOAT COLUMNS
# =====================================
elif pd.api.types.is_float_dtype(df[col]):
print(f"Float Column : {col}")
df[col] = pd.to_numeric(
df[col],
errors='coerce'
)
df[col] = df[col].apply(
lambda x:
float(x)
if pd.notnull(x)
else None
)
# =====================================
# OBJECT / STRING COLUMNS
# =====================================
else:
print(f"String/Object Column : {col}")
cleaned = []
for val in df[col]:
# NULL
if pd.isnull(val):
cleaned.append(None)
# DATETIME
elif isinstance(
val,
(
datetime,
pd.Timestamp
)
):
if isinstance(
val,
pd.Timestamp
):
cleaned.append(
val.to_pydatetime()
)
else:
cleaned.append(val)
# INTEGER
elif isinstance(
val,
(
int,
np.integer
)
):
cleaned.append(
int(val)
)
# FLOAT
elif isinstance(
val,
(
float,
np.floating
)
):
if np.isnan(val):
cleaned.append(None)
else:
cleaned.append(
float(val)
)
# BOOLEAN
elif isinstance(
val,
bool
):
cleaned.append(
int(val)
)
# STRING
else:
cleaned.append(
str(val).strip()
)
df[col] = cleaned
except Exception as col_error:
print("\n================================")
print(f"COLUMN FAILED : {col}")
print(str(col_error))
print("================================")
return df
except Exception as clean_error:
print("\n================================")
print("DATA CLEAN FAILED")
print(str(clean_error))
print("================================")
return df
# =========================================================
# MAIN PROCESS
# =========================================================
try:
# =====================================================
# CONNECT SQL SERVER
# =====================================================
sql_conn = connect_sql()
# =====================================================
# CONNECT CLICKHOUSE
# =====================================================
ch_client = connect_clickhouse()
# =====================================================
# QUERY
# =====================================================
query = f"""
SELECT *
FROM dbo.[{TABLE_NAME}]
WHERE Project_Id = {PROJECT_ID}
"""
print("\nExecuting Query")
print(query)
# =====================================================
# RETRY SETTINGS
# =====================================================
retry_count = 0
max_retry = 5
# =====================================================
# MAIN RETRY LOOP
# =====================================================
while retry_count < max_retry:
try:
# =============================================
# READ SQL DATA
# =============================================
for chunk in pd.read_sql(
query,
sql_conn,
chunksize=chunk_size
):
try:
print("\n================================")
print(
f"Processing Rows : "
f"{len(chunk)}"
)
print("================================")
# =====================================
# CLEAN DATA
# =====================================
chunk = clean_dataframe(chunk)
# =====================================
# DEBUG COLUMN TYPES
# =====================================
print("\nCOLUMN DATATYPES")
print(chunk.dtypes)
print("\nCOLUMN SAMPLE TYPES")
for col in chunk.columns:
sample = chunk[col].dropna()
if len(sample) > 0:
print(
col,
type(sample.iloc[0]),
sample.iloc[0]
)
# =====================================
# TRUNCATE TABLE FIRST TIME ONLY
# =====================================
if (
TRUNCATE_BEFORE_LOAD
and
not table_truncated
):
print("\n================================")
print(
f"TRUNCATING TABLE : "
f"{TABLE_NAME}"
)
print("================================")
# IMPORTANT FIX
truncate_query = f"""
TRUNCATE TABLE
`{CH_CONFIG['database']}`.`{TABLE_NAME}`
"""
ch_client.command(
truncate_query
)
print(
"TABLE TRUNCATED SUCCESSFULLY"
)
table_truncated = True
# =====================================
# INSERT INTO CLICKHOUSE
# =====================================
print(
"\nINSERTING INTO CLICKHOUSE..."
)
ch_client.insert_df(
table=TABLE_NAME,
df=chunk,
database=CH_CONFIG['database']
)
print(
f"INSERTED : "
f"{len(chunk)} ROWS"
)
except Exception as insert_error:
print("\n================================")
print("INSERT FAILED")
print("================================")
print(str(insert_error))
traceback.print_exc()
# =================================
# SAVE ERROR LOG
# =================================
with open(
"insert_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : "
f"{datetime.now()}"
)
log.write(
f"\nTABLE : "
f"{TABLE_NAME}"
)
log.write(
f"\nERROR : "
f"{str(insert_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
continue
# =============================================
# SUCCESS
# =============================================
break
# =================================================
# SQL CONNECTION FAILURE
# =================================================
except pyodbc.OperationalError as op_error:
retry_count += 1
print("\n================================")
print(
f"SQL CONNECTION LOST "
f"- RETRY {retry_count}"
)
print("================================")
print(str(op_error))
time.sleep(10)
try:
sql_conn.close()
except:
pass
# RECONNECT SQL
sql_conn = connect_sql()
# =================================================
# OTHER ERROR
# =================================================
except Exception as loop_error:
print("\n================================")
print("MAIN LOOP ERROR")
print("================================")
print(str(loop_error))
traceback.print_exc()
break
print("\n================================")
print("ETL COMPLETED SUCCESSFULLY")
print("================================")
# =========================================================
# MAIN ERROR
# =========================================================
except Exception as main_error:
print("\n================================")
print("MAIN ERROR")
print("================================")
print(str(main_error))
traceback.print_exc()
with open(
"main_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nERROR : {str(main_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
# =========================================================
# CLOSE CONNECTIONS
# =========================================================
finally:
try:
sql_conn.close()
print("\nSQL SERVER CONNECTION CLOSED")
except:
pass
try:
ch_client.close()
print("CLICKHOUSE CONNECTION CLOSED")
except:
pass
print("\n================================================")
print("ETL FINISHED :", datetime.now())
print("================================================")
+283
View File
@@ -10005,4 +10005,287 @@ Traceback (most recent call last):
clickhouse_connect.driver.exceptions.DatabaseError: Received ClickHouse exception, code: 62, server response: Code: 62. DB::Exception: Syntax error: failed at position 67 (Master) (line 3, col 35): Master clickhouse_connect.driver.exceptions.DatabaseError: Received ClickHouse exception, code: 62, server response: Code: 62. DB::Exception: Syntax error: failed at position 67 (Master) (line 3, col 35): Master
. Expected one of: ParserArrayOfJSONIdentifierDelimiter, token sequence, OpeningSquareBracket, Dot, token, UUID, Comma, ON, NO DELAY, SYNC, INTO OUTFILE, FORMAT, SETTINGS, ParallelWithClause, PARALLEL WITH, end of query. (SYNTAX_ERROR) (for url http://172.188.12.194:8123) . Expected one of: ParserArrayOfJSONIdentifierDelimiter, token sequence, OpeningSquareBracket, Dot, token, UUID, Comma, ON, NO DELAY, SYNC, INTO OUTFILE, FORMAT, SETTINGS, ParallelWithClause, PARALLEL WITH, end of query. (SYNTAX_ERROR) (for url http://172.188.12.194:8123)
================================
================================
TIME : 2026-05-19 17:34:04.760816
TABLE : Sales
ERROR : unsupported operand type(s) for -: 'str' and 'datetime.date'
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\Sales_Import.py", line 426, in <module>
ch_client.insert_df(
~~~~~~~~~~~~~~~~~~~^
table=f"`{TABLE_NAME}`",
^^^^^^^^^^^^^^^^^^^^^^^^
df=chunk,
^^^^^^^^^
database=CH_CONFIG['database']
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 1013, in insert_df
return self.insert(table,
~~~~~~~~~~~^^^^^^^
df,
^^^
...<5 lines>...
transport_settings=transport_settings,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
context=context)
^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 978, in insert
return self.data_insert(context)
~~~~~~~~~~~~~~~~^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 347, in data_insert
response = self._raw_request(block_gen, params, headers, error_handler=error_handler, server_wait=False)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 569, in _raw_request
error_handler(response)
~~~~~~~~~~~~~^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 331, in error_handler
raise ex
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\transform.py", line 114, in chunk_gen
col_type.write_column(data, output, context)
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 216, in write_column
self.write_column_data(column, dest, ctx)
~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\base.py", line 231, in write_column_data
self._write_column_binary(column, dest, ctx)
~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\datatypes\temporal.py", line 60, in _write_column_binary
column = [(x - esd).days for x in column]
~~^~~~~
TypeError: unsupported operand type(s) for -: 'str' and 'datetime.date'
================================
================================
TIME : 2026-05-20 10:42:59.946046
TABLE : PaidVisibility_Compliance
ERROR : Error ('Connection aborted.', TimeoutError('timed out')) executing HTTP request attempt 1 (http://172.188.12.194:8123)
TRACEBACK :
Traceback (most recent call last):
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 788, in urlopen
response = self._make_request(
conn,
...<10 lines>...
**response_kw,
)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 493, in _make_request
conn.request(
~~~~~~~~~~~~^
method,
^^^^^^^
...<6 lines>...
enforce_content_length=enforce_content_length,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connection.py", line 512, in request
self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk))
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\http\client.py", line 1086, in send
self.sock.sendall(data)
~~~~~~~~~~~~~~~~~^^^^^^
TimeoutError: timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 546, in _raw_request
response = self.http.request(method, url, **kwargs)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\_request_methods.py", line 143, in request
return self.request_encode_body(
~~~~~~~~~~~~~~~~~~~~~~~~^
method, url, fields=fields, headers=headers, **urlopen_kw
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\_request_methods.py", line 278, in request_encode_body
return self.urlopen(method, url, **extra_kw)
~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\poolmanager.py", line 457, in urlopen
response = conn.urlopen(method, u.request_uri, **kw)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 842, in urlopen
retries = retries.increment(
method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\util\retry.py", line 498, in increment
raise reraise(type(error), error, _stacktrace)
~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\util\util.py", line 38, in reraise
raise value.with_traceback(tb)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 788, in urlopen
response = self._make_request(
conn,
...<10 lines>...
**response_kw,
)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 493, in _make_request
conn.request(
~~~~~~~~~~~~^
method,
^^^^^^^
...<6 lines>...
enforce_content_length=enforce_content_length,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connection.py", line 512, in request
self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk))
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\http\client.py", line 1086, in send
self.sock.sendall(data)
~~~~~~~~~~~~~~~~~^^^^^^
urllib3.exceptions.ProtocolError: ('Connection aborted.', TimeoutError('timed out'))
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "d:\Python Code\PaidVisibility_Compliance Import.py", line 447, in <module>
ch_client.insert_df(
~~~~~~~~~~~~~~~~~~~^
table=TABLE_NAME,
^^^^^^^^^^^^^^^^^
df=chunk,
^^^^^^^^^
database=CH_CONFIG['database']
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 1013, in insert_df
return self.insert(table,
~~~~~~~~~~~^^^^^^^
df,
^^^
...<5 lines>...
transport_settings=transport_settings,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
context=context)
^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 978, in insert
return self.data_insert(context)
~~~~~~~~~~~~~~~~^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 347, in data_insert
response = self._raw_request(block_gen, params, headers, error_handler=error_handler, server_wait=False)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 558, in _raw_request
raise OperationalError(f'Error {ex} executing HTTP request attempt {attempts}{err_url}') from ex
clickhouse_connect.driver.exceptions.OperationalError: Error ('Connection aborted.', TimeoutError('timed out')) executing HTTP request attempt 1 (http://172.188.12.194:8123)
================================
================================
TIME : 2026-05-20 10:43:26.819553
TABLE : PaidVisibility_Compliance
ERROR : Error ('Connection aborted.', TimeoutError('timed out')) executing HTTP request attempt 1 (http://172.188.12.194:8123)
TRACEBACK :
Traceback (most recent call last):
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 788, in urlopen
response = self._make_request(
conn,
...<10 lines>...
**response_kw,
)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 493, in _make_request
conn.request(
~~~~~~~~~~~~^
method,
^^^^^^^
...<6 lines>...
enforce_content_length=enforce_content_length,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connection.py", line 512, in request
self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk))
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\http\client.py", line 1086, in send
self.sock.sendall(data)
~~~~~~~~~~~~~~~~~^^^^^^
TimeoutError: timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 546, in _raw_request
response = self.http.request(method, url, **kwargs)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\_request_methods.py", line 143, in request
return self.request_encode_body(
~~~~~~~~~~~~~~~~~~~~~~~~^
method, url, fields=fields, headers=headers, **urlopen_kw
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\_request_methods.py", line 278, in request_encode_body
return self.urlopen(method, url, **extra_kw)
~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\poolmanager.py", line 457, in urlopen
response = conn.urlopen(method, u.request_uri, **kw)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 842, in urlopen
retries = retries.increment(
method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\util\retry.py", line 498, in increment
raise reraise(type(error), error, _stacktrace)
~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\util\util.py", line 38, in reraise
raise value.with_traceback(tb)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 788, in urlopen
response = self._make_request(
conn,
...<10 lines>...
**response_kw,
)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 493, in _make_request
conn.request(
~~~~~~~~~~~~^
method,
^^^^^^^
...<6 lines>...
enforce_content_length=enforce_content_length,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connection.py", line 512, in request
self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk))
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\http\client.py", line 1086, in send
self.sock.sendall(data)
~~~~~~~~~~~~~~~~~^^^^^^
urllib3.exceptions.ProtocolError: ('Connection aborted.', TimeoutError('timed out'))
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "d:\Python Code\PaidVisibility_Compliance Import.py", line 447, in <module>
ch_client.insert_df(
~~~~~~~~~~~~~~~~~~~^
table=TABLE_NAME,
^^^^^^^^^^^^^^^^^
df=chunk,
^^^^^^^^^
database=CH_CONFIG['database']
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 1013, in insert_df
return self.insert(table,
~~~~~~~~~~~^^^^^^^
df,
^^^
...<5 lines>...
transport_settings=transport_settings,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
context=context)
^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 978, in insert
return self.data_insert(context)
~~~~~~~~~~~~~~~~^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 347, in data_insert
response = self._raw_request(block_gen, params, headers, error_handler=error_handler, server_wait=False)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 558, in _raw_request
raise OperationalError(f'Error {ex} executing HTTP request attempt {attempts}{err_url}') from ex
clickhouse_connect.driver.exceptions.OperationalError: Error ('Connection aborted.', TimeoutError('timed out')) executing HTTP request attempt 1 (http://172.188.12.194:8123)
================================ ================================
+231
View File
@@ -0,0 +1,231 @@
================================
TIME : 2026-05-19 17:13:53.695971
TABLE : Sales
ERROR : Error ('Connection aborted.', TimeoutError('timed out')) executing HTTP request attempt 1 (http://172.188.12.194:8123)
TRACEBACK :
Traceback (most recent call last):
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 788, in urlopen
response = self._make_request(
conn,
...<10 lines>...
**response_kw,
)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 493, in _make_request
conn.request(
~~~~~~~~~~~~^
method,
^^^^^^^
...<6 lines>...
enforce_content_length=enforce_content_length,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connection.py", line 512, in request
self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk))
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\http\client.py", line 1086, in send
self.sock.sendall(data)
~~~~~~~~~~~~~~~~~^^^^^^
TimeoutError: timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 546, in _raw_request
response = self.http.request(method, url, **kwargs)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\_request_methods.py", line 143, in request
return self.request_encode_body(
~~~~~~~~~~~~~~~~~~~~~~~~^
method, url, fields=fields, headers=headers, **urlopen_kw
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\_request_methods.py", line 278, in request_encode_body
return self.urlopen(method, url, **extra_kw)
~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\poolmanager.py", line 457, in urlopen
response = conn.urlopen(method, u.request_uri, **kw)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 842, in urlopen
retries = retries.increment(
method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\util\retry.py", line 498, in increment
raise reraise(type(error), error, _stacktrace)
~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\util\util.py", line 38, in reraise
raise value.with_traceback(tb)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 788, in urlopen
response = self._make_request(
conn,
...<10 lines>...
**response_kw,
)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 493, in _make_request
conn.request(
~~~~~~~~~~~~^
method,
^^^^^^^
...<6 lines>...
enforce_content_length=enforce_content_length,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connection.py", line 512, in request
self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk))
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\http\client.py", line 1086, in send
self.sock.sendall(data)
~~~~~~~~~~~~~~~~~^^^^^^
urllib3.exceptions.ProtocolError: ('Connection aborted.', TimeoutError('timed out'))
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "d:\Python Code\Sales_Import.py", line 494, in <module>
ch_client.insert_df(
~~~~~~~~~~~~~~~~~~~^
table=f"`{TABLE_NAME}`",
^^^^^^^^^^^^^^^^^^^^^^^^
df=chunk,
^^^^^^^^^
database=CH_CONFIG['database']
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 1013, in insert_df
return self.insert(table,
~~~~~~~~~~~^^^^^^^
df,
^^^
...<5 lines>...
transport_settings=transport_settings,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
context=context)
^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 978, in insert
return self.data_insert(context)
~~~~~~~~~~~~~~~~^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 347, in data_insert
response = self._raw_request(block_gen, params, headers, error_handler=error_handler, server_wait=False)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 558, in _raw_request
raise OperationalError(f'Error {ex} executing HTTP request attempt {attempts}{err_url}') from ex
clickhouse_connect.driver.exceptions.OperationalError: Error ('Connection aborted.', TimeoutError('timed out')) executing HTTP request attempt 1 (http://172.188.12.194:8123)
================================
================================
TIME : 2026-05-19 17:15:26.425862
TABLE : Sales
ERROR : Error ('Connection aborted.', TimeoutError('timed out')) executing HTTP request attempt 1 (http://172.188.12.194:8123)
TRACEBACK :
Traceback (most recent call last):
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 788, in urlopen
response = self._make_request(
conn,
...<10 lines>...
**response_kw,
)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 493, in _make_request
conn.request(
~~~~~~~~~~~~^
method,
^^^^^^^
...<6 lines>...
enforce_content_length=enforce_content_length,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connection.py", line 512, in request
self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk))
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\http\client.py", line 1086, in send
self.sock.sendall(data)
~~~~~~~~~~~~~~~~~^^^^^^
TimeoutError: timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 546, in _raw_request
response = self.http.request(method, url, **kwargs)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\_request_methods.py", line 143, in request
return self.request_encode_body(
~~~~~~~~~~~~~~~~~~~~~~~~^
method, url, fields=fields, headers=headers, **urlopen_kw
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\_request_methods.py", line 278, in request_encode_body
return self.urlopen(method, url, **extra_kw)
~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\poolmanager.py", line 457, in urlopen
response = conn.urlopen(method, u.request_uri, **kw)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 842, in urlopen
retries = retries.increment(
method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\util\retry.py", line 498, in increment
raise reraise(type(error), error, _stacktrace)
~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\util\util.py", line 38, in reraise
raise value.with_traceback(tb)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 788, in urlopen
response = self._make_request(
conn,
...<10 lines>...
**response_kw,
)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 493, in _make_request
conn.request(
~~~~~~~~~~~~^
method,
^^^^^^^
...<6 lines>...
enforce_content_length=enforce_content_length,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connection.py", line 512, in request
self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk))
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\http\client.py", line 1086, in send
self.sock.sendall(data)
~~~~~~~~~~~~~~~~~^^^^^^
urllib3.exceptions.ProtocolError: ('Connection aborted.', TimeoutError('timed out'))
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "d:\Python Code\Sales_Import.py", line 494, in <module>
ch_client.insert_df(
~~~~~~~~~~~~~~~~~~~^
table=f"`{TABLE_NAME}`",
^^^^^^^^^^^^^^^^^^^^^^^^
df=chunk,
^^^^^^^^^
database=CH_CONFIG['database']
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 1013, in insert_df
return self.insert(table,
~~~~~~~~~~~^^^^^^^
df,
^^^
...<5 lines>...
transport_settings=transport_settings,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
context=context)
^^^^^^^^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 978, in insert
return self.data_insert(context)
~~~~~~~~~~~~~~~~^^^^^^^^^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 347, in data_insert
response = self._raw_request(block_gen, params, headers, error_handler=error_handler, server_wait=False)
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 558, in _raw_request
raise OperationalError(f'Error {ex} executing HTTP request attempt {attempts}{err_url}') from ex
clickhouse_connect.driver.exceptions.OperationalError: Error ('Connection aborted.', TimeoutError('timed out')) executing HTTP request attempt 1 (http://172.188.12.194:8123)
================================
+22
View File
@@ -166,4 +166,26 @@ Traceback (most recent call last):
data = cursor.fetchmany(chunksize) data = cursor.fetchmany(chunksize)
pyodbc.OperationalError: ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.\r\n (10060) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10060)') pyodbc.OperationalError: ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.\r\n (10060) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10060)')
================================
================================
TIME : 2026-05-19 17:16:08.366780
ERROR : ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: An existing connection was forcibly closed by the remote host.\r\n (10054) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10054)')
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\Sales_Import.py", line 411, in <module>
for chunk in pd.read_sql(
~~~~~~~~~~~^
query,
^^^^^^
sql_conn,
^^^^^^^^^
chunksize=chunk_size
^^^^^^^^^^^^^^^^^^^^
):
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\pandas\io\sql.py", line 2730, in _query_iterator
data = cursor.fetchmany(chunksize)
pyodbc.OperationalError: ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: An existing connection was forcibly closed by the remote host.\r\n (10054) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10054)')
================================ ================================
+13248
View File
File diff suppressed because it is too large Load Diff
+85
View File
@@ -0,0 +1,85 @@
================================
TIME : 2026-05-20 09:30:27.532149
ERROR : ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [2]. (2) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (2)')
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\PaidVisibility_Compliance Import.py", line 313, in <module>
sql_conn = connect_sql()
File "d:\Python Code\PaidVisibility_Compliance Import.py", line 70, in connect_sql
conn = pyodbc.connect(
SQL_CONN_STR,
autocommit=True
)
pyodbc.OperationalError: ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [2]. (2) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (2)')
================================
================================
TIME : 2026-05-20 10:38:12.600484
ERROR : ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [64]. (64) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (64)')
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\PaidVisibility_Compliance Import.py", line 348, in <module>
for chunk in pd.read_sql(
~~~~~~~~~~~^
query,
^^^^^^
sql_conn,
^^^^^^^^^
chunksize=chunk_size
^^^^^^^^^^^^^^^^^^^^
):
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\pandas\io\sql.py", line 2730, in _query_iterator
data = cursor.fetchmany(chunksize)
pyodbc.OperationalError: ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: An existing connection was forcibly closed by the remote host.\r\n (10054) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10054)')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "d:\Python Code\PaidVisibility_Compliance Import.py", line 518, in <module>
sql_conn = connect_sql()
File "d:\Python Code\PaidVisibility_Compliance Import.py", line 70, in connect_sql
conn = pyodbc.connect(
SQL_CONN_STR,
autocommit=True
)
pyodbc.OperationalError: ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [64]. (64) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (64)')
================================
================================
TIME : 2026-05-20 12:28:25.483277
ERROR : ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [53]. (53) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (53)')
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\PaidVisibility_Compliance Import.py", line 348, in <module>
for chunk in pd.read_sql(
~~~~~~~~~~~^
query,
^^^^^^
sql_conn,
^^^^^^^^^
chunksize=chunk_size
^^^^^^^^^^^^^^^^^^^^
):
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\pandas\io\sql.py", line 2730, in _query_iterator
data = cursor.fetchmany(chunksize)
pyodbc.OperationalError: ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.\r\n (10060) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10060)')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "d:\Python Code\PaidVisibility_Compliance Import.py", line 518, in <module>
sql_conn = connect_sql()
File "d:\Python Code\PaidVisibility_Compliance Import.py", line 70, in connect_sql
conn = pyodbc.connect(
SQL_CONN_STR,
autocommit=True
)
pyodbc.OperationalError: ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [53]. (53) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (53)')
================================