[Web Logins] Import

This commit is contained in:
Dipanshu Kumar
2026-05-19 16:58:54 +05:30
parent 379ead3d8e
commit 3be8cd7259
7 changed files with 11034 additions and 0 deletions
+545
View File
@@ -0,0 +1,545 @@
import pyodbc
import pandas as pd
import clickhouse_connect
import numpy as np
from datetime import datetime
import traceback
import warnings
# =========================================================
# IGNORE WARNINGS
# =========================================================
warnings.filterwarnings(
'ignore',
'pandas only supports SQLAlchemy connectable'
)
print("ETL Started :", datetime.now())
# =========================================================
# SQL SERVER CONNECTION
# =========================================================
SQL_CONN_STR = (
'DRIVER={ODBC Driver 17 for SQL Server};'
'SERVER=10.200.25.65;'
'DATABASE=CPMIndiaBusinessInsight;'
'UID=bsgteam_test;'
'PWD=B$gt3@m#00512;'
'TrustServerCertificate=yes;'
)
# =========================================================
# CLICKHOUSE CONFIG
# =========================================================
CH_CONFIG = {
'host': '172.188.12.194',
'port': 8123,
'username': 'default',
'password': 'dipanshu_k',
'database': 'DaburIndia_BI'
}
# =========================================================
# TABLE NAME
# =========================================================
TABLE_NAME = 'OQaD'
PROJECT_ID = 41654
# =========================================================
# LOAD SETTINGS
# =========================================================
TRUNCATE_BEFORE_LOAD = True
table_truncated = False
# =========================================================
# CLICKHOUSE DATE COLUMNS
# =========================================================
DATE_COLUMNS = [
'visit_date'
]
# =========================================================
# CLICKHOUSE DATETIME COLUMNS
# =========================================================
DATETIME_COLUMNS = [
'create_date',
'update_date'
]
# =========================================================
# CLEAN DATAFRAME
# =========================================================
def clean_dataframe(df):
try:
# ---------------------------------------------
# Replace NaN
# ---------------------------------------------
df = df.replace({np.nan: None})
# ---------------------------------------------
# Process Column Wise
# ---------------------------------------------
for col in df.columns:
try:
print(f"\nCleaning Column : {col}")
# =====================================
# DATE32 COLUMNS
# =====================================
if col.lower() in [
x.lower() for x in DATE_COLUMNS
]:
print(f"Date32 Column : {col}")
df[col] = pd.to_datetime(
df[col],
errors='coerce'
)
# Remove invalid dates
df[col] = df[col].where(
(df[col].dt.year >= 1970) &
(df[col].dt.year <= 2100)
)
# Convert to datetime.date
df[col] = df[col].apply(
lambda x:
x.date()
if pd.notnull(x)
else None
)
# =====================================
# DATETIME64 COLUMNS
# =====================================
elif col.lower() in [
x.lower() for x in DATETIME_COLUMNS
]:
print(f"DateTime Column : {col}")
df[col] = pd.to_datetime(
df[col],
errors='coerce'
)
# Remove invalid dates
df[col] = df[col].where(
(df[col].dt.year >= 1970) &
(df[col].dt.year <= 2100)
)
# Convert to datetime.datetime
df[col] = df[col].apply(
lambda x:
x.to_pydatetime()
if pd.notnull(x)
else None
)
# =====================================
# INTEGER COLUMNS
# =====================================
elif pd.api.types.is_integer_dtype(df[col]):
print(f"Integer Column : {col}")
df[col] = pd.to_numeric(
df[col],
errors='coerce'
)
df[col] = df[col].apply(
lambda x:
int(x)
if pd.notnull(x)
else None
)
# =====================================
# FLOAT COLUMNS
# =====================================
elif pd.api.types.is_float_dtype(df[col]):
print(f"Float Column : {col}")
non_null = df[col].dropna()
# Convert whole float to int
if len(non_null) > 0 and (
(non_null % 1 == 0).all()
):
df[col] = df[col].apply(
lambda x:
int(x)
if pd.notnull(x)
else None
)
else:
df[col] = df[col].apply(
lambda x:
float(x)
if pd.notnull(x)
else None
)
# =====================================
# OBJECT / STRING COLUMNS
# =====================================
else:
print(f"String/Object Column : {col}")
cleaned = []
for val in df[col]:
# NULL
if pd.isnull(val):
cleaned.append(None)
# INTEGER
elif isinstance(
val,
(
int,
np.integer
)
):
cleaned.append(int(val))
# FLOAT
elif isinstance(
val,
(
float,
np.floating
)
):
if np.isnan(val):
cleaned.append(None)
else:
if val.is_integer():
cleaned.append(int(val))
else:
cleaned.append(float(val))
# STRING
elif isinstance(val, str):
cleaned.append(val.strip())
# BOOLEAN
elif isinstance(val, bool):
cleaned.append(int(val))
# DATETIME
elif isinstance(
val,
(
datetime,
pd.Timestamp
)
):
if isinstance(
val,
pd.Timestamp
):
cleaned.append(
val.to_pydatetime()
)
else:
cleaned.append(val)
# OTHER
else:
cleaned.append(str(val))
df[col] = cleaned
except Exception as col_error:
print("\n================================")
print(f"COLUMN FAILED : {col}")
print(str(col_error))
print("================================")
return df
except Exception as clean_error:
print("\n================================")
print("DATA CLEAN FAILED")
print(str(clean_error))
print("================================")
return df
# =========================================================
# MAIN PROCESS
# =========================================================
try:
# =====================================================
# CONNECT SQL SERVER
# =====================================================
sql_conn = pyodbc.connect(SQL_CONN_STR)
print("Connected to SQL Server")
# =====================================================
# CONNECT CLICKHOUSE
# =====================================================
ch_client = clickhouse_connect.get_client(**CH_CONFIG)
print("Connected to ClickHouse")
# =====================================================
# QUERY
# =====================================================
query = f"""
SELECT *
FROM dbo.[{TABLE_NAME}]
WHERE Project_Id = {PROJECT_ID}
"""
print("\nExecuting Query:")
print(query)
# =====================================================
# CHUNK SIZE
# =====================================================
chunk_size = 100000
total_rows = 0
# =====================================================
# READ DATA
# =====================================================
for chunk in pd.read_sql(
query,
sql_conn,
chunksize=chunk_size
):
try:
print("\n================================")
print(f"Processing {len(chunk)} Rows")
print("================================")
# =================================================
# CLEAN DATA
# =================================================
chunk = clean_dataframe(chunk)
# =================================================
# DEBUG COLUMN TYPES
# =================================================
print("\nCOLUMN TYPES")
for col in chunk.columns:
sample = chunk[col].dropna()
if len(sample) > 0:
print(
col,
type(sample.iloc[0]),
sample.iloc[0]
)
# =================================================
# DEBUG DATE COLUMN
# =================================================
if 'visit_date' in chunk.columns:
print("\nvisit_date Sample")
print(chunk['visit_date'].head())
sample = chunk['visit_date'].dropna()
if len(sample) > 0:
print(
"visit_date datatype:",
type(sample.iloc[0])
)
# =================================================
# TRUNCATE TABLE FIRST TIME ONLY
# =================================================
if TRUNCATE_BEFORE_LOAD and not table_truncated:
print("\n================================")
print(f"TRUNCATING TABLE : {TABLE_NAME}")
print("================================")
truncate_query = f"""
TRUNCATE TABLE
{CH_CONFIG['database']}.{TABLE_NAME}
"""
ch_client.command(truncate_query)
print("TABLE TRUNCATED SUCCESSFULLY")
table_truncated = True
# =================================================
# INSERT INTO CLICKHOUSE
# =================================================
print("\nInserting into ClickHouse...")
ch_client.insert_df(
table=TABLE_NAME,
df=chunk,
database=CH_CONFIG['database']
)
total_rows += len(chunk)
print(
f"\nInserted Total Rows : {total_rows}"
)
except Exception as chunk_error:
print("\n================================")
print("CHUNK INSERT FAILED")
print("================================")
print(str(chunk_error))
traceback.print_exc()
# =============================================
# SAVE ERROR LOG
# =============================================
with open(
"clickhouse_chunk_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nTABLE : {TABLE_NAME}"
)
log.write(
f"\nERROR : {str(chunk_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
continue
print("\n================================")
print("ETL COMPLETED SUCCESSFULLY")
print(f"TOTAL ROWS INSERTED : {total_rows}")
print("================================")
# =========================================================
# MAIN ERROR
# =========================================================
except Exception as main_error:
print("\n================================")
print("MAIN ERROR")
print("================================")
print(str(main_error))
traceback.print_exc()
with open(
"clickhouse_main_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nERROR : {str(main_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
# =========================================================
# CLOSE CONNECTIONS
# =========================================================
finally:
try:
sql_conn.close()
print("\nSQL Server Connection Closed")
except:
pass
try:
ch_client.close()
print("ClickHouse Connection Closed")
except:
pass
print("\nETL Finished :", datetime.now())
+572
View File
@@ -0,0 +1,572 @@
import pyodbc
import pandas as pd
import clickhouse_connect
import numpy as np
from datetime import datetime
import traceback
import warnings
# =========================================================
# IGNORE WARNINGS
# =========================================================
warnings.filterwarnings(
'ignore',
'pandas only supports SQLAlchemy connectable'
)
print("ETL Started :", datetime.now())
# =========================================================
# SQL SERVER CONNECTION
# =========================================================
SQL_CONN_STR = (
'DRIVER={ODBC Driver 17 for SQL Server};'
'SERVER=10.200.25.65;'
'DATABASE=CPMIndiaBusinessInsight;'
'UID=bsgteam_test;'
'PWD=B$gt3@m#00512;'
'TrustServerCertificate=yes;'
)
# =========================================================
# CLICKHOUSE CONFIG
# =========================================================
CH_CONFIG = {
'host': '172.188.12.194',
'port': 8123,
'username': 'default',
'password': 'dipanshu_k',
'database': 'DaburIndia_BI'
}
# =========================================================
# TABLE NAME
# =========================================================
TABLE_NAME = 'PaidVisibility_Compliance'
PROJECT_ID = 41654
# =========================================================
# LOAD SETTINGS
# =========================================================
TRUNCATE_BEFORE_LOAD = True
table_truncated = False
# =========================================================
# CLICKHOUSE DATE COLUMNS
# =========================================================
DATE_COLUMNS = [
'visit_date'
]
# =========================================================
# CLICKHOUSE DATETIME COLUMNS
# =========================================================
DATETIME_COLUMNS = [
'create_date',
'update_date'
]
# =========================================================
# CLEAN DATAFRAME
# =========================================================
def clean_dataframe(df):
try:
# ---------------------------------------------
# Replace NaN
# ---------------------------------------------
df = df.replace({np.nan: None})
# ---------------------------------------------
# Process Columns
# ---------------------------------------------
for col in df.columns:
try:
print(f"\nCleaning Column : {col}")
# =====================================
# DATE32 COLUMNS
# =====================================
if col.lower() in [
x.lower() for x in DATE_COLUMNS
]:
print(f"Date32 Column : {col}")
df[col] = pd.to_datetime(
df[col],
errors='coerce'
)
cleaned_dates = []
for val in df[col]:
if pd.isnull(val):
cleaned_dates.append(None)
else:
# IMPORTANT FIX
cleaned_dates.append(
val.date()
)
df[col] = cleaned_dates
# =====================================
# DATETIME COLUMNS
# =====================================
elif col.lower() in [
x.lower() for x in DATETIME_COLUMNS
]:
print(f"DateTime Column : {col}")
df[col] = pd.to_datetime(
df[col],
errors='coerce'
)
cleaned_datetime = []
for val in df[col]:
if pd.isnull(val):
cleaned_datetime.append(None)
else:
# IMPORTANT FIX
cleaned_datetime.append(
val.to_pydatetime()
)
df[col] = cleaned_datetime
# =====================================
# INTEGER COLUMNS
# =====================================
elif pd.api.types.is_integer_dtype(df[col]):
print(f"Integer Column : {col}")
df[col] = pd.to_numeric(
df[col],
errors='coerce'
)
df[col] = df[col].apply(
lambda x:
int(x)
if pd.notnull(x)
else None
)
# =====================================
# FLOAT COLUMNS
# =====================================
elif pd.api.types.is_float_dtype(df[col]):
print(f"Float Column : {col}")
df[col] = pd.to_numeric(
df[col],
errors='coerce'
)
non_null = df[col].dropna()
# Convert float to int if possible
if len(non_null) > 0 and (
(non_null % 1 == 0).all()
):
df[col] = df[col].apply(
lambda x:
int(x)
if pd.notnull(x)
else None
)
else:
df[col] = df[col].apply(
lambda x:
float(x)
if pd.notnull(x)
else None
)
# =====================================
# OBJECT / STRING COLUMNS
# =====================================
else:
print(f"String/Object Column : {col}")
cleaned = []
for val in df[col]:
# NULL
if pd.isnull(val):
cleaned.append(None)
# STRING
elif isinstance(val, str):
cleaned.append(
val.strip()
)
# BOOLEAN
elif isinstance(val, bool):
cleaned.append(
int(val)
)
# INTEGER
elif isinstance(
val,
(
int,
np.integer
)
):
cleaned.append(
int(val)
)
# FLOAT
elif isinstance(
val,
(
float,
np.floating
)
):
if np.isnan(val):
cleaned.append(None)
else:
if val.is_integer():
cleaned.append(
int(val)
)
else:
cleaned.append(
float(val)
)
# DATETIME
elif isinstance(
val,
(
datetime,
pd.Timestamp
)
):
if isinstance(
val,
pd.Timestamp
):
cleaned.append(
val.to_pydatetime()
)
else:
cleaned.append(val)
# DATE
elif hasattr(val, 'year') and hasattr(val, 'month'):
cleaned.append(val)
# OTHER
else:
cleaned.append(
str(val)
)
df[col] = cleaned
except Exception as col_error:
print("\n================================")
print(f"COLUMN FAILED : {col}")
print(str(col_error))
print("================================")
return df
except Exception as clean_error:
print("\n================================")
print("DATA CLEAN FAILED")
print(str(clean_error))
print("================================")
return df
# =========================================================
# MAIN PROCESS
# =========================================================
try:
# =====================================================
# CONNECT SQL SERVER
# =====================================================
sql_conn = pyodbc.connect(SQL_CONN_STR)
print("Connected to SQL Server")
# =====================================================
# CONNECT CLICKHOUSE
# =====================================================
ch_client = clickhouse_connect.get_client(**CH_CONFIG)
print("Connected to ClickHouse")
# =====================================================
# QUERY
# =====================================================
query = f"""
SELECT *
FROM dbo.[{TABLE_NAME}]
WHERE Project_Id = {PROJECT_ID}
"""
print("\nExecuting Query:")
print(query)
# =====================================================
# CHUNK SIZE
# =====================================================
chunk_size = 100000
total_rows = 0
# =====================================================
# READ DATA
# =====================================================
for chunk in pd.read_sql(
query,
sql_conn,
chunksize=chunk_size
):
try:
print("\n================================")
print(f"Processing {len(chunk)} Rows")
print("================================")
# =================================================
# CLEAN DATA
# =================================================
chunk = clean_dataframe(chunk)
# =================================================
# DEBUG COLUMN TYPES
# =================================================
print("\nCOLUMN TYPES")
for col in chunk.columns:
sample = chunk[col].dropna()
if len(sample) > 0:
print(
col,
type(sample.iloc[0]),
sample.iloc[0]
)
# =================================================
# DEBUG DATE COLUMN
# =================================================
if 'visit_date' in chunk.columns:
print("\nvisit_date Sample")
print(chunk['visit_date'].head())
sample = chunk['visit_date'].dropna()
if len(sample) > 0:
print(
"visit_date datatype:",
type(sample.iloc[0])
)
# =================================================
# TRUNCATE TABLE
# =================================================
if TRUNCATE_BEFORE_LOAD and not table_truncated:
print("\n================================")
print(f"TRUNCATING TABLE : {TABLE_NAME}")
print("================================")
truncate_query = f"""
TRUNCATE TABLE
{CH_CONFIG['database']}.{TABLE_NAME}
"""
ch_client.command(truncate_query)
print("TABLE TRUNCATED SUCCESSFULLY")
table_truncated = True
# =================================================
# INSERT INTO CLICKHOUSE
# =================================================
print("\nInserting into ClickHouse...")
ch_client.insert_df(
table=TABLE_NAME,
df=chunk,
database=CH_CONFIG['database']
)
total_rows += len(chunk)
print(
f"\nInserted Total Rows : {total_rows}"
)
except Exception as chunk_error:
print("\n================================")
print("CHUNK INSERT FAILED")
print("================================")
print(str(chunk_error))
traceback.print_exc()
# =============================================
# SAVE ERROR LOG
# =============================================
with open(
"clickhouse_chunk_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nTABLE : {TABLE_NAME}"
)
log.write(
f"\nERROR : {str(chunk_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
continue
print("\n================================")
print("ETL COMPLETED SUCCESSFULLY")
print(f"TOTAL ROWS INSERTED : {total_rows}")
print("================================")
# =========================================================
# MAIN ERROR
# =========================================================
except Exception as main_error:
print("\n================================")
print("MAIN ERROR")
print("================================")
print(str(main_error))
traceback.print_exc()
with open(
"clickhouse_main_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nERROR : {str(main_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
# =========================================================
# CLOSE CONNECTIONS
# =========================================================
finally:
try:
sql_conn.close()
print("\nSQL Server Connection Closed")
except:
pass
try:
ch_client.close()
print("ClickHouse Connection Closed")
except:
pass
print("\nETL Finished :", datetime.now())
+545
View File
@@ -0,0 +1,545 @@
import pyodbc
import pandas as pd
import clickhouse_connect
import numpy as np
from datetime import datetime
import traceback
import warnings
# =========================================================
# IGNORE WARNINGS
# =========================================================
warnings.filterwarnings(
'ignore',
'pandas only supports SQLAlchemy connectable'
)
print("ETL Started :", datetime.now())
# =========================================================
# SQL SERVER CONNECTION
# =========================================================
SQL_CONN_STR = (
'DRIVER={ODBC Driver 17 for SQL Server};'
'SERVER=10.200.25.65;'
'DATABASE=CPMIndiaBusinessInsight;'
'UID=bsgteam_test;'
'PWD=B$gt3@m#00512;'
'TrustServerCertificate=yes;'
)
# =========================================================
# CLICKHOUSE CONFIG
# =========================================================
CH_CONFIG = {
'host': '172.188.12.194',
'port': 8123,
'username': 'default',
'password': 'dipanshu_k',
'database': 'DaburIndia_BI'
}
# =========================================================
# TABLE NAME
# =========================================================
TABLE_NAME = 'PaidVisibility'
PROJECT_ID = 41654
# =========================================================
# LOAD SETTINGS
# =========================================================
TRUNCATE_BEFORE_LOAD = True
table_truncated = False
# =========================================================
# CLICKHOUSE DATE COLUMNS
# =========================================================
DATE_COLUMNS = [
'visit_date'
]
# =========================================================
# CLICKHOUSE DATETIME COLUMNS
# =========================================================
DATETIME_COLUMNS = [
'create_date',
'update_date'
]
# =========================================================
# CLEAN DATAFRAME
# =========================================================
def clean_dataframe(df):
try:
# ---------------------------------------------
# Replace NaN
# ---------------------------------------------
df = df.replace({np.nan: None})
# ---------------------------------------------
# Process Column Wise
# ---------------------------------------------
for col in df.columns:
try:
print(f"\nCleaning Column : {col}")
# =====================================
# DATE32 COLUMNS
# =====================================
if col.lower() in [
x.lower() for x in DATE_COLUMNS
]:
print(f"Date32 Column : {col}")
df[col] = pd.to_datetime(
df[col],
errors='coerce'
)
# Remove invalid dates
df[col] = df[col].where(
(df[col].dt.year >= 1970) &
(df[col].dt.year <= 2100)
)
# Convert to datetime.date
df[col] = df[col].apply(
lambda x:
x.date()
if pd.notnull(x)
else None
)
# =====================================
# DATETIME64 COLUMNS
# =====================================
elif col.lower() in [
x.lower() for x in DATETIME_COLUMNS
]:
print(f"DateTime Column : {col}")
df[col] = pd.to_datetime(
df[col],
errors='coerce'
)
# Remove invalid dates
df[col] = df[col].where(
(df[col].dt.year >= 1970) &
(df[col].dt.year <= 2100)
)
# Convert to datetime.datetime
df[col] = df[col].apply(
lambda x:
x.to_pydatetime()
if pd.notnull(x)
else None
)
# =====================================
# INTEGER COLUMNS
# =====================================
elif pd.api.types.is_integer_dtype(df[col]):
print(f"Integer Column : {col}")
df[col] = pd.to_numeric(
df[col],
errors='coerce'
)
df[col] = df[col].apply(
lambda x:
int(x)
if pd.notnull(x)
else None
)
# =====================================
# FLOAT COLUMNS
# =====================================
elif pd.api.types.is_float_dtype(df[col]):
print(f"Float Column : {col}")
non_null = df[col].dropna()
# Convert whole float to int
if len(non_null) > 0 and (
(non_null % 1 == 0).all()
):
df[col] = df[col].apply(
lambda x:
int(x)
if pd.notnull(x)
else None
)
else:
df[col] = df[col].apply(
lambda x:
float(x)
if pd.notnull(x)
else None
)
# =====================================
# OBJECT / STRING COLUMNS
# =====================================
else:
print(f"String/Object Column : {col}")
cleaned = []
for val in df[col]:
# NULL
if pd.isnull(val):
cleaned.append(None)
# INTEGER
elif isinstance(
val,
(
int,
np.integer
)
):
cleaned.append(int(val))
# FLOAT
elif isinstance(
val,
(
float,
np.floating
)
):
if np.isnan(val):
cleaned.append(None)
else:
if val.is_integer():
cleaned.append(int(val))
else:
cleaned.append(float(val))
# STRING
elif isinstance(val, str):
cleaned.append(val.strip())
# BOOLEAN
elif isinstance(val, bool):
cleaned.append(int(val))
# DATETIME
elif isinstance(
val,
(
datetime,
pd.Timestamp
)
):
if isinstance(
val,
pd.Timestamp
):
cleaned.append(
val.to_pydatetime()
)
else:
cleaned.append(val)
# OTHER
else:
cleaned.append(str(val))
df[col] = cleaned
except Exception as col_error:
print("\n================================")
print(f"COLUMN FAILED : {col}")
print(str(col_error))
print("================================")
return df
except Exception as clean_error:
print("\n================================")
print("DATA CLEAN FAILED")
print(str(clean_error))
print("================================")
return df
# =========================================================
# MAIN PROCESS
# =========================================================
try:
# =====================================================
# CONNECT SQL SERVER
# =====================================================
sql_conn = pyodbc.connect(SQL_CONN_STR)
print("Connected to SQL Server")
# =====================================================
# CONNECT CLICKHOUSE
# =====================================================
ch_client = clickhouse_connect.get_client(**CH_CONFIG)
print("Connected to ClickHouse")
# =====================================================
# QUERY
# =====================================================
query = f"""
SELECT *
FROM dbo.[{TABLE_NAME}]
WHERE Project_Id = {PROJECT_ID}
"""
print("\nExecuting Query:")
print(query)
# =====================================================
# CHUNK SIZE
# =====================================================
chunk_size = 100000
total_rows = 0
# =====================================================
# READ DATA
# =====================================================
for chunk in pd.read_sql(
query,
sql_conn,
chunksize=chunk_size
):
try:
print("\n================================")
print(f"Processing {len(chunk)} Rows")
print("================================")
# =================================================
# CLEAN DATA
# =================================================
chunk = clean_dataframe(chunk)
# =================================================
# DEBUG COLUMN TYPES
# =================================================
print("\nCOLUMN TYPES")
for col in chunk.columns:
sample = chunk[col].dropna()
if len(sample) > 0:
print(
col,
type(sample.iloc[0]),
sample.iloc[0]
)
# =================================================
# DEBUG DATE COLUMN
# =================================================
if 'visit_date' in chunk.columns:
print("\nvisit_date Sample")
print(chunk['visit_date'].head())
sample = chunk['visit_date'].dropna()
if len(sample) > 0:
print(
"visit_date datatype:",
type(sample.iloc[0])
)
# =================================================
# TRUNCATE TABLE FIRST TIME ONLY
# =================================================
if TRUNCATE_BEFORE_LOAD and not table_truncated:
print("\n================================")
print(f"TRUNCATING TABLE : {TABLE_NAME}")
print("================================")
truncate_query = f"""
TRUNCATE TABLE
{CH_CONFIG['database']}.{TABLE_NAME}
"""
ch_client.command(truncate_query)
print("TABLE TRUNCATED SUCCESSFULLY")
table_truncated = True
# =================================================
# INSERT INTO CLICKHOUSE
# =================================================
print("\nInserting into ClickHouse...")
ch_client.insert_df(
table=TABLE_NAME,
df=chunk,
database=CH_CONFIG['database']
)
total_rows += len(chunk)
print(
f"\nInserted Total Rows : {total_rows}"
)
except Exception as chunk_error:
print("\n================================")
print("CHUNK INSERT FAILED")
print("================================")
print(str(chunk_error))
traceback.print_exc()
# =============================================
# SAVE ERROR LOG
# =============================================
with open(
"clickhouse_chunk_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nTABLE : {TABLE_NAME}"
)
log.write(
f"\nERROR : {str(chunk_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
continue
print("\n================================")
print("ETL COMPLETED SUCCESSFULLY")
print(f"TOTAL ROWS INSERTED : {total_rows}")
print("================================")
# =========================================================
# MAIN ERROR
# =========================================================
except Exception as main_error:
print("\n================================")
print("MAIN ERROR")
print("================================")
print(str(main_error))
traceback.print_exc()
with open(
"clickhouse_main_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nERROR : {str(main_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
# =========================================================
# CLOSE CONNECTIONS
# =========================================================
finally:
try:
sql_conn.close()
print("\nSQL Server Connection Closed")
except:
pass
try:
ch_client.close()
print("ClickHouse Connection Closed")
except:
pass
print("\nETL Finished :", datetime.now())
+632
View File
@@ -0,0 +1,632 @@
import pyodbc
import pandas as pd
import clickhouse_connect
import numpy as np
from datetime import datetime
import traceback
import warnings
# =========================================================
# IGNORE WARNINGS
# =========================================================
warnings.filterwarnings(
'ignore',
'pandas only supports SQLAlchemy connectable'
)
print("\n====================================")
print("ETL Started :", datetime.now())
print("====================================")
# =========================================================
# SQL SERVER CONNECTION
# =========================================================
SQL_CONN_STR = (
'DRIVER={ODBC Driver 17 for SQL Server};'
'SERVER=10.200.25.65;'
'DATABASE=CPMIndiaBusinessInsight;'
'UID=bsgteam_test;'
'PWD=B$gt3@m#00512;'
'TrustServerCertificate=yes;'
)
# =========================================================
# CLICKHOUSE CONFIG
# =========================================================
CH_CONFIG = {
'host': '172.188.12.194',
'port': 8123,
'username': 'default',
'password': 'dipanshu_k',
'database': 'DaburIndia_BI'
}
# =========================================================
# TABLE DETAILS
# =========================================================
TABLE_NAME = 'SKU Master'
PROJECT_ID = 41654
# =========================================================
# SETTINGS
# =========================================================
TRUNCATE_BEFORE_LOAD = True
table_truncated = False
# =========================================================
# CLICKHOUSE DATE COLUMNS
# =========================================================
DATE_COLUMNS = [
'visit_date'
]
# =========================================================
# CLICKHOUSE DATETIME COLUMNS
# =========================================================
DATETIME_COLUMNS = [
'create_date',
'update_date'
]
# =========================================================
# CLEAN DATAFRAME
# =========================================================
def clean_dataframe(df):
try:
# ---------------------------------------------
# Replace NaN with None
# ---------------------------------------------
df = df.replace({np.nan: None})
# ---------------------------------------------
# Process Each Column
# ---------------------------------------------
for col in df.columns:
try:
print(f"\nCleaning Column : {col}")
# =====================================
# DATE COLUMNS
# =====================================
if col.lower() in [
x.lower() for x in DATE_COLUMNS
]:
print(f"Date Column : {col}")
df[col] = pd.to_datetime(
df[col],
errors='coerce'
)
cleaned_dates = []
for val in df[col]:
if pd.isnull(val):
cleaned_dates.append(None)
else:
cleaned_dates.append(
val.date()
)
df[col] = cleaned_dates
# =====================================
# DATETIME COLUMNS
# =====================================
elif col.lower() in [
x.lower() for x in DATETIME_COLUMNS
]:
print(f"DateTime Column : {col}")
df[col] = pd.to_datetime(
df[col],
errors='coerce'
)
cleaned_datetime = []
for val in df[col]:
if pd.isnull(val):
cleaned_datetime.append(None)
else:
cleaned_datetime.append(
val.to_pydatetime()
)
df[col] = cleaned_datetime
# =====================================
# INTEGER COLUMNS
# =====================================
elif pd.api.types.is_integer_dtype(df[col]):
print(f"Integer Column : {col}")
df[col] = pd.to_numeric(
df[col],
errors='coerce'
)
df[col] = df[col].apply(
lambda x:
int(x)
if pd.notnull(x)
else None
)
# =====================================
# FLOAT COLUMNS
# =====================================
elif pd.api.types.is_float_dtype(df[col]):
print(f"Float Column : {col}")
df[col] = pd.to_numeric(
df[col],
errors='coerce'
)
non_null = df[col].dropna()
# ---------------------------------
# Convert whole float to int
# Example:
# 12.0 -> 12
# ---------------------------------
if len(non_null) > 0 and (
(non_null % 1 == 0).all()
):
df[col] = df[col].apply(
lambda x:
int(x)
if pd.notnull(x)
else None
)
else:
df[col] = df[col].apply(
lambda x:
float(x)
if pd.notnull(x)
else None
)
# =====================================
# OBJECT / STRING COLUMNS
# =====================================
else:
print(f"String/Object Column : {col}")
cleaned = []
for val in df[col]:
try:
# -------------------------
# NULL
# -------------------------
if pd.isnull(val):
cleaned.append(None)
# -------------------------
# STRING
# -------------------------
elif isinstance(val, str):
cleaned.append(
val.strip()
)
# -------------------------
# BOOLEAN
# -------------------------
elif isinstance(val, bool):
cleaned.append(
int(val)
)
# -------------------------
# INTEGER
# -------------------------
elif isinstance(
val,
(
int,
np.integer
)
):
cleaned.append(
int(val)
)
# -------------------------
# FLOAT
# -------------------------
elif isinstance(
val,
(
float,
np.floating
)
):
if np.isnan(val):
cleaned.append(None)
else:
if val.is_integer():
cleaned.append(
int(val)
)
else:
cleaned.append(
float(val)
)
# -------------------------
# DATETIME
# -------------------------
elif isinstance(
val,
(
datetime,
pd.Timestamp
)
):
if isinstance(
val,
pd.Timestamp
):
cleaned.append(
val.to_pydatetime()
)
else:
cleaned.append(val)
# -------------------------
# DATE
# -------------------------
elif hasattr(val, 'year'):
cleaned.append(val)
# -------------------------
# OTHER
# -------------------------
else:
cleaned.append(
str(val)
)
except Exception as row_error:
print(
f"Row Cleaning Error "
f"in Column {col}"
)
print(str(row_error))
cleaned.append(None)
df[col] = cleaned
except Exception as col_error:
print("\n================================")
print(f"COLUMN FAILED : {col}")
print(str(col_error))
print("================================")
return df
except Exception as clean_error:
print("\n================================")
print("DATA CLEAN FAILED")
print(str(clean_error))
print(traceback.format_exc())
print("================================")
return df
# =========================================================
# MAIN PROCESS
# =========================================================
try:
# =====================================================
# CONNECT SQL SERVER
# =====================================================
print("\nConnecting SQL Server...")
sql_conn = pyodbc.connect(SQL_CONN_STR)
print("Connected to SQL Server")
# =====================================================
# CONNECT CLICKHOUSE
# =====================================================
print("\nConnecting ClickHouse...")
ch_client = clickhouse_connect.get_client(**CH_CONFIG)
print("Connected to ClickHouse")
# =====================================================
# QUERY
# =====================================================
query = f"""
SELECT *
FROM dbo.[{TABLE_NAME}]
WHERE Project_Id = {PROJECT_ID}
"""
print("\n====================================")
print("Executing Query")
print("====================================")
print(query)
# =====================================================
# CHUNK SIZE
# =====================================================
chunk_size = 100000
total_rows = 0
# =====================================================
# READ DATA
# =====================================================
for chunk in pd.read_sql(
query,
sql_conn,
chunksize=chunk_size
):
try:
print("\n====================================")
print(f"Processing Rows : {len(chunk)}")
print("====================================")
# =================================================
# CLEAN DATA
# =================================================
chunk = clean_dataframe(chunk)
# =================================================
# DEBUG COLUMN TYPES
# =================================================
print("\nCOLUMN TYPES")
for col in chunk.columns:
try:
sample = chunk[col].dropna()
if len(sample) > 0:
print(
col,
type(sample.iloc[0]),
sample.iloc[0]
)
except:
pass
# =================================================
# TRUNCATE TABLE
# =================================================
if TRUNCATE_BEFORE_LOAD and not table_truncated:
try:
print("\n====================================")
print(f"TRUNCATING : {TABLE_NAME}")
print("====================================")
truncate_query = f"""
TRUNCATE TABLE
`{CH_CONFIG['database']}`.`{TABLE_NAME}`
"""
print(truncate_query)
ch_client.command(
truncate_query
)
print(
"TABLE TRUNCATED SUCCESSFULLY"
)
table_truncated = True
except Exception as truncate_error:
print("\nTRUNCATE FAILED")
print(str(truncate_error))
raise
# =================================================
# INSERT DATA
# =================================================
try:
print("\n====================================")
print("INSERTING DATA INTO CLICKHOUSE")
print("====================================")
ch_client.insert_df(
table=f"`{TABLE_NAME}`",
df=chunk,
database=CH_CONFIG['database']
)
total_rows += len(chunk)
print(
f"\nTOTAL INSERTED : "
f"{total_rows}"
)
except Exception as insert_error:
print("\nINSERT FAILED")
print(str(insert_error))
traceback.print_exc()
# =============================================
# SAVE ERROR LOG
# =============================================
with open(
"clickhouse_insert_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nTABLE : {TABLE_NAME}"
)
log.write(
f"\nERROR : {str(insert_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
continue
except Exception as chunk_error:
print("\n====================================")
print("CHUNK PROCESS FAILED")
print("====================================")
print(str(chunk_error))
traceback.print_exc()
continue
print("\n====================================")
print("ETL COMPLETED SUCCESSFULLY")
print(f"TOTAL ROWS INSERTED : {total_rows}")
print("====================================")
# =========================================================
# MAIN ERROR
# =========================================================
except Exception as main_error:
print("\n====================================")
print("MAIN ERROR")
print("====================================")
print(str(main_error))
traceback.print_exc()
with open(
"clickhouse_main_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nERROR : {str(main_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
# =========================================================
# CLOSE CONNECTIONS
# =========================================================
finally:
try:
sql_conn.close()
print("\nSQL Server Connection Closed")
except:
pass
try:
ch_client.close()
print("ClickHouse Connection Closed")
except:
pass
print("\n====================================")
print("ETL Finished :", datetime.now())
print("====================================")
+632
View File
@@ -0,0 +1,632 @@
import pyodbc
import pandas as pd
import clickhouse_connect
import numpy as np
from datetime import datetime
import traceback
import warnings
# =========================================================
# IGNORE WARNINGS
# =========================================================
warnings.filterwarnings(
'ignore',
'pandas only supports SQLAlchemy connectable'
)
print("\n====================================")
print("ETL Started :", datetime.now())
print("====================================")
# =========================================================
# SQL SERVER CONNECTION
# =========================================================
SQL_CONN_STR = (
'DRIVER={ODBC Driver 17 for SQL Server};'
'SERVER=10.200.25.65;'
'DATABASE=CPMIndiaBusinessInsight;'
'UID=bsgteam_test;'
'PWD=B$gt3@m#00512;'
'TrustServerCertificate=yes;'
)
# =========================================================
# CLICKHOUSE CONFIG
# =========================================================
CH_CONFIG = {
'host': '172.188.12.194',
'port': 8123,
'username': 'default',
'password': 'dipanshu_k',
'database': 'DaburIndia_BI'
}
# =========================================================
# TABLE DETAILS
# =========================================================
TABLE_NAME = 'Web Logins'
PROJECT_ID = 41654
# =========================================================
# SETTINGS
# =========================================================
TRUNCATE_BEFORE_LOAD = True
table_truncated = False
# =========================================================
# CLICKHOUSE DATE COLUMNS
# =========================================================
DATE_COLUMNS = [
'visit_date'
]
# =========================================================
# CLICKHOUSE DATETIME COLUMNS
# =========================================================
DATETIME_COLUMNS = [
'create_date',
'update_date'
]
# =========================================================
# CLEAN DATAFRAME
# =========================================================
def clean_dataframe(df):
try:
# ---------------------------------------------
# Replace NaN with None
# ---------------------------------------------
df = df.replace({np.nan: None})
# ---------------------------------------------
# Process Each Column
# ---------------------------------------------
for col in df.columns:
try:
print(f"\nCleaning Column : {col}")
# =====================================
# DATE COLUMNS
# =====================================
if col.lower() in [
x.lower() for x in DATE_COLUMNS
]:
print(f"Date Column : {col}")
df[col] = pd.to_datetime(
df[col],
errors='coerce'
)
cleaned_dates = []
for val in df[col]:
if pd.isnull(val):
cleaned_dates.append(None)
else:
cleaned_dates.append(
val.date()
)
df[col] = cleaned_dates
# =====================================
# DATETIME COLUMNS
# =====================================
elif col.lower() in [
x.lower() for x in DATETIME_COLUMNS
]:
print(f"DateTime Column : {col}")
df[col] = pd.to_datetime(
df[col],
errors='coerce'
)
cleaned_datetime = []
for val in df[col]:
if pd.isnull(val):
cleaned_datetime.append(None)
else:
cleaned_datetime.append(
val.to_pydatetime()
)
df[col] = cleaned_datetime
# =====================================
# INTEGER COLUMNS
# =====================================
elif pd.api.types.is_integer_dtype(df[col]):
print(f"Integer Column : {col}")
df[col] = pd.to_numeric(
df[col],
errors='coerce'
)
df[col] = df[col].apply(
lambda x:
int(x)
if pd.notnull(x)
else None
)
# =====================================
# FLOAT COLUMNS
# =====================================
elif pd.api.types.is_float_dtype(df[col]):
print(f"Float Column : {col}")
df[col] = pd.to_numeric(
df[col],
errors='coerce'
)
non_null = df[col].dropna()
# ---------------------------------
# Convert whole float to int
# Example:
# 12.0 -> 12
# ---------------------------------
if len(non_null) > 0 and (
(non_null % 1 == 0).all()
):
df[col] = df[col].apply(
lambda x:
int(x)
if pd.notnull(x)
else None
)
else:
df[col] = df[col].apply(
lambda x:
float(x)
if pd.notnull(x)
else None
)
# =====================================
# OBJECT / STRING COLUMNS
# =====================================
else:
print(f"String/Object Column : {col}")
cleaned = []
for val in df[col]:
try:
# -------------------------
# NULL
# -------------------------
if pd.isnull(val):
cleaned.append(None)
# -------------------------
# STRING
# -------------------------
elif isinstance(val, str):
cleaned.append(
val.strip()
)
# -------------------------
# BOOLEAN
# -------------------------
elif isinstance(val, bool):
cleaned.append(
int(val)
)
# -------------------------
# INTEGER
# -------------------------
elif isinstance(
val,
(
int,
np.integer
)
):
cleaned.append(
int(val)
)
# -------------------------
# FLOAT
# -------------------------
elif isinstance(
val,
(
float,
np.floating
)
):
if np.isnan(val):
cleaned.append(None)
else:
if val.is_integer():
cleaned.append(
int(val)
)
else:
cleaned.append(
float(val)
)
# -------------------------
# DATETIME
# -------------------------
elif isinstance(
val,
(
datetime,
pd.Timestamp
)
):
if isinstance(
val,
pd.Timestamp
):
cleaned.append(
val.to_pydatetime()
)
else:
cleaned.append(val)
# -------------------------
# DATE
# -------------------------
elif hasattr(val, 'year'):
cleaned.append(val)
# -------------------------
# OTHER
# -------------------------
else:
cleaned.append(
str(val)
)
except Exception as row_error:
print(
f"Row Cleaning Error "
f"in Column {col}"
)
print(str(row_error))
cleaned.append(None)
df[col] = cleaned
except Exception as col_error:
print("\n================================")
print(f"COLUMN FAILED : {col}")
print(str(col_error))
print("================================")
return df
except Exception as clean_error:
print("\n================================")
print("DATA CLEAN FAILED")
print(str(clean_error))
print(traceback.format_exc())
print("================================")
return df
# =========================================================
# MAIN PROCESS
# =========================================================
try:
# =====================================================
# CONNECT SQL SERVER
# =====================================================
print("\nConnecting SQL Server...")
sql_conn = pyodbc.connect(SQL_CONN_STR)
print("Connected to SQL Server")
# =====================================================
# CONNECT CLICKHOUSE
# =====================================================
print("\nConnecting ClickHouse...")
ch_client = clickhouse_connect.get_client(**CH_CONFIG)
print("Connected to ClickHouse")
# =====================================================
# QUERY
# =====================================================
query = f"""
SELECT *
FROM dbo.[{TABLE_NAME}]
WHERE Project_Id = {PROJECT_ID}
"""
print("\n====================================")
print("Executing Query")
print("====================================")
print(query)
# =====================================================
# CHUNK SIZE
# =====================================================
chunk_size = 100000
total_rows = 0
# =====================================================
# READ DATA
# =====================================================
for chunk in pd.read_sql(
query,
sql_conn,
chunksize=chunk_size
):
try:
print("\n====================================")
print(f"Processing Rows : {len(chunk)}")
print("====================================")
# =================================================
# CLEAN DATA
# =================================================
chunk = clean_dataframe(chunk)
# =================================================
# DEBUG COLUMN TYPES
# =================================================
print("\nCOLUMN TYPES")
for col in chunk.columns:
try:
sample = chunk[col].dropna()
if len(sample) > 0:
print(
col,
type(sample.iloc[0]),
sample.iloc[0]
)
except:
pass
# =================================================
# TRUNCATE TABLE
# =================================================
if TRUNCATE_BEFORE_LOAD and not table_truncated:
try:
print("\n====================================")
print(f"TRUNCATING : {TABLE_NAME}")
print("====================================")
truncate_query = f"""
TRUNCATE TABLE
`{CH_CONFIG['database']}`.`{TABLE_NAME}`
"""
print(truncate_query)
ch_client.command(
truncate_query
)
print(
"TABLE TRUNCATED SUCCESSFULLY"
)
table_truncated = True
except Exception as truncate_error:
print("\nTRUNCATE FAILED")
print(str(truncate_error))
raise
# =================================================
# INSERT DATA
# =================================================
try:
print("\n====================================")
print("INSERTING DATA INTO CLICKHOUSE")
print("====================================")
ch_client.insert_df(
table=f"`{TABLE_NAME}`",
df=chunk,
database=CH_CONFIG['database']
)
total_rows += len(chunk)
print(
f"\nTOTAL INSERTED : "
f"{total_rows}"
)
except Exception as insert_error:
print("\nINSERT FAILED")
print(str(insert_error))
traceback.print_exc()
# =============================================
# SAVE ERROR LOG
# =============================================
with open(
"clickhouse_insert_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nTABLE : {TABLE_NAME}"
)
log.write(
f"\nERROR : {str(insert_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
continue
except Exception as chunk_error:
print("\n====================================")
print("CHUNK PROCESS FAILED")
print("====================================")
print(str(chunk_error))
traceback.print_exc()
continue
print("\n====================================")
print("ETL COMPLETED SUCCESSFULLY")
print(f"TOTAL ROWS INSERTED : {total_rows}")
print("====================================")
# =========================================================
# MAIN ERROR
# =========================================================
except Exception as main_error:
print("\n====================================")
print("MAIN ERROR")
print("====================================")
print(str(main_error))
traceback.print_exc()
with open(
"clickhouse_main_error.log",
"a",
encoding="utf-8"
) as log:
log.write(
"\n\n================================"
)
log.write(
f"\nTIME : {datetime.now()}"
)
log.write(
f"\nERROR : {str(main_error)}"
)
log.write(
f"\nTRACEBACK :\n"
f"{traceback.format_exc()}"
)
log.write(
"\n================================"
)
# =========================================================
# CLOSE CONNECTIONS
# =========================================================
finally:
try:
sql_conn.close()
print("\nSQL Server Connection Closed")
except:
pass
try:
ch_client.close()
print("ClickHouse Connection Closed")
except:
pass
print("\n====================================")
print("ETL Finished :", datetime.now())
print("====================================")
File diff suppressed because it is too large Load Diff
+77
View File
@@ -90,3 +90,80 @@ Traceback (most recent call last):
NameError: name 'PROJECT_ID' is not defined. Did you mean: 'PROJECTID'? NameError: name 'PROJECT_ID' is not defined. Did you mean: 'PROJECTID'?
================================ ================================
================================
TIME : 2026-05-19 15:19:40.194819
ERROR : ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: An existing connection was forcibly closed by the remote host.\r\n (10054) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10054)')
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\PaidVisibility_Import.py", line 345, in <module>
for chunk in pd.read_sql(
~~~~~~~~~~~^
query,
^^^^^^
sql_conn,
^^^^^^^^^
chunksize=chunk_size
^^^^^^^^^^^^^^^^^^^^
):
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\pandas\io\sql.py", line 2730, in _query_iterator
data = cursor.fetchmany(chunksize)
pyodbc.OperationalError: ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: An existing connection was forcibly closed by the remote host.\r\n (10054) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10054)')
================================
================================
TIME : 2026-05-19 15:26:07.910371
ERROR : ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [5]. (5) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (5)')
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\OQaD Import.py", line 310, in <module>
sql_conn = pyodbc.connect(SQL_CONN_STR)
pyodbc.OperationalError: ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [5]. (5) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (5)')
================================
================================
TIME : 2026-05-19 16:01:03.630328
ERROR : ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.\r\n (10060) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10060)')
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\PaidVisibility_Import.py", line 343, in <module>
for chunk in pd.read_sql(
~~~~~~~~~~~^
query,
^^^^^^
sql_conn,
^^^^^^^^^
chunksize=chunk_size
^^^^^^^^^^^^^^^^^^^^
):
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\pandas\io\sql.py", line 2730, in _query_iterator
data = cursor.fetchmany(chunksize)
pyodbc.OperationalError: ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.\r\n (10060) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10060)')
================================
================================
TIME : 2026-05-19 16:36:09.557213
ERROR : ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.\r\n (10060) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10060)')
TRACEBACK :
Traceback (most recent call last):
File "d:\Python Code\PaidVisibility_Compliance Import.py", line 371, in <module>
for chunk in pd.read_sql(
~~~~~~~~~~~^
query,
^^^^^^
sql_conn,
^^^^^^^^^
chunksize=chunk_size
^^^^^^^^^^^^^^^^^^^^
):
^
File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\pandas\io\sql.py", line 2730, in _query_iterator
data = cursor.fetchmany(chunksize)
pyodbc.OperationalError: ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.\r\n (10060) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10060)')
================================