Journey_Plan data Import

[Web Logins] Import
2026-05-20 12:40:47 +05:30 · 2026-05-19 16:58:54 +05:30
12 changed files with 25860 additions and 171 deletions
@@ -1,5 +1,3 @@
 import pyodbc
 import pandas as pd
 import clickhouse_connect
@@ -7,6 +5,7 @@ import numpy as np
 from datetime import datetime
 import traceback
 import warnings
 import time
 # =========================================================
 # IGNORE WARNINGS
@@ -16,10 +15,12 @@ warnings.filterwarnings(
    'pandas only supports SQLAlchemy connectable'
 )
-print("ETL Started :", datetime.now())
+print("\n================================================")
 print("ETL STARTED :", datetime.now())
 print("================================================")
 # =========================================================
-# SQL SERVER CONNECTION
+# SQL SERVER CONNECTION STRING
 # =========================================================
 SQL_CONN_STR = (
    'DRIVER={ODBC Driver 17 for SQL Server};'
@@ -28,6 +29,7 @@ SQL_CONN_STR = (
    'UID=bsgteam_test;'
    'PWD=B$gt3@m#00512;'
    'TrustServerCertificate=yes;'
    'Connection Timeout=60;'
 )
 # =========================================================
@@ -42,11 +44,67 @@ CH_CONFIG = {
 }
 # =========================================================
-# TABLE NAME
+# TABLE DETAILS
 # =========================================================
 TABLE_NAME = 'Journey_Plan'
 PROJECT_ID = 41654
 # =========================================================
 # SETTINGS
 # =========================================================
 TRUNCATE_BEFORE_LOAD = True
 table_truncated = False
 # =========================================================
 # CHUNK SIZE
 # =========================================================
 chunk_size = 20000
 # =========================================================
 # CONNECT SQL SERVER
 # =========================================================
 def connect_sql():
    try:
        conn = pyodbc.connect(
            SQL_CONN_STR,
            autocommit=True
        )
        print("Connected SQL Server")
        return conn
    except Exception as e:
        print("SQL CONNECTION FAILED")
        print(str(e))
        raise
 # =========================================================
 # CONNECT CLICKHOUSE
 # =========================================================
 def connect_clickhouse():
    try:
        client = clickhouse_connect.get_client(
            **CH_CONFIG
        )
        print("Connected ClickHouse")
        return client
    except Exception as e:
        print("CLICKHOUSE CONNECTION FAILED")
        print(str(e))
        raise
 # =========================================================
 # CLEAN DATAFRAME
 # =========================================================
@@ -66,39 +124,46 @@ def clean_dataframe(df):
            try:
-                col_lower = col.lower()
+                print(f"\nCleaning Column : {col}")
                # =====================================
-                # DATE COLUMNS
+                # AUTO DETECT DATE / TIME COLUMNS
                # =====================================
-                if 'date' in col_lower:
+                if (
                    'date' in col.lower()
                    or 'time' in col.lower()
                ):
-                    print(f"Cleaning Date Column : {col}")
+                    print(f"Date Column : {col}")
                    df[col] = pd.to_datetime(
                        df[col],
                        errors='coerce'
                    )
-                    # Remove invalid dates
+                    cleaned_dates = []
                    df[col] = df[col].where(
                        (df[col].dt.year >= 1970) &
                        (df[col].dt.year <= 2100)
                    )
-                    # Convert to Python Date
+                    for val in df[col]:
-                    df[col] = df[col].apply(
+
-                        lambda x:
+                        if pd.isnull(val):
-                        x.date()
+
-                        if pd.notnull(x)
+                            cleaned_dates.append(None)
-                        else None
+
-                    )
+                        else:
                            cleaned_dates.append(
                                val.to_pydatetime()
                            )
                    df[col] = cleaned_dates
                # =====================================
                # INTEGER COLUMNS
                # =====================================
                elif pd.api.types.is_integer_dtype(df[col]):
                    print(f"Integer Column : {col}")
                    df[col] = pd.to_numeric(
                        df[col],
                        errors='coerce'
@@ -116,38 +181,27 @@ def clean_dataframe(df):
                # =====================================
                elif pd.api.types.is_float_dtype(df[col]):
-                    non_null = df[col].dropna()
+                    print(f"Float Column : {col}")
-                    # ---------------------------------
+                    df[col] = pd.to_numeric(
-                    # Convert whole float to int
+                        df[col],
-                    # Example:
+                        errors='coerce'
-                    # 3240.0 -> 3240
+                    )
                    # ---------------------------------
                    if len(non_null) > 0 and (
                        (non_null % 1 == 0).all()
                    ):
-                        df[col] = df[col].apply(
+                    df[col] = df[col].apply(
-                            lambda x:
+                        lambda x:
-                            int(x)
+                        float(x)
-                            if pd.notnull(x)
+                        if pd.notnull(x)
-                            else None
+                        else None
-                        )
+                    )
                    else:
                        df[col] = df[col].apply(
                            lambda x:
                            float(x)
                            if pd.notnull(x)
                            else None
                        )
                # =====================================
                # OBJECT / STRING COLUMNS
                # =====================================
                else:
                    print(f"String/Object Column : {col}")
                    cleaned = []
                    for val in df[col]:
@@ -157,6 +211,28 @@ def clean_dataframe(df):
                            cleaned.append(None)
                        # DATETIME
                        elif isinstance(
                            val,
                            (
                                datetime,
                                pd.Timestamp
                            )
                        ):
                            if isinstance(
                                val,
                                pd.Timestamp
                            ):
                                cleaned.append(
                                    val.to_pydatetime()
                                )
                            else:
                                cleaned.append(val)
                        # INTEGER
                        elif isinstance(
                            val,
@@ -166,7 +242,9 @@ def clean_dataframe(df):
                            )
                        ):
-                            cleaned.append(int(val))
+                            cleaned.append(
                                int(val)
                            )
                        # FLOAT
                        elif isinstance(
@@ -183,41 +261,26 @@ def clean_dataframe(df):
                            else:
-                                # IMPORTANT FIX
+                                cleaned.append(
-                                # Avoid '3240.0' string issue
+                                    float(val)
-                                if val.is_integer():
+                                )
                                    cleaned.append(int(val))
                                else:
                                    cleaned.append(float(val))
                        # STRING
                        elif isinstance(val, str):
                            cleaned.append(val.strip())
                        # BOOLEAN
                        elif isinstance(val, bool):
                            cleaned.append(int(val))
                        # DATETIME
                        elif isinstance(
                            val,
-                            (
+                            bool
                                datetime,
                                pd.Timestamp
                            )
                        ):
-                            cleaned.append(str(val))
+                            cleaned.append(
                                int(val)
                            )
-                        # OTHER
+                        # STRING
                        else:
-                            cleaned.append(str(val))
+                            cleaned.append(
                                str(val).strip()
                            )
                    df[col] = cleaned
@@ -247,16 +310,12 @@ try:
    # =====================================================
    # CONNECT SQL SERVER
    # =====================================================
-    sql_conn = pyodbc.connect(SQL_CONN_STR)
+    sql_conn = connect_sql()
    print("Connected to SQL Server")
    # =====================================================
    # CONNECT CLICKHOUSE
    # =====================================================
-    ch_client = clickhouse_connect.get_client(**CH_CONFIG)
+    ch_client = connect_clickhouse()
    print("Connected to ClickHouse")
    # =====================================================
    # QUERY
@@ -267,125 +326,214 @@ try:
    WHERE Project_Id = {PROJECT_ID}
    """
-    print("\nExecuting Query:")
+    print("\nExecuting Query")
    print(query)
    # =====================================================
-    # CHUNK SIZE
+    # RETRY SETTINGS
    # =====================================================
-    chunk_size = 100000
+    retry_count = 0
-
+    max_retry = 5
    total_rows = 0
    # =====================================================
-    # READ DATA
+    # MAIN RETRY LOOP
    # =====================================================
-    for chunk in pd.read_sql(
+    while retry_count < max_retry:
        query,
        sql_conn,
        chunksize=chunk_size
    ):
        try:
-            print("\n================================")
+            # =============================================
-            print(f"Processing {len(chunk)} Rows")
+            # READ SQL DATA
-            print("================================")
+            # =============================================
            for chunk in pd.read_sql(
                query,
                sql_conn,
                chunksize=chunk_size
            ):
-            # =================================================
+                try:
            # CLEAN DATA
            # =================================================
            chunk = clean_dataframe(chunk)
            # =================================================
            # DEBUG COLUMN TYPES
            # =================================================
            print("\nCOLUMN TYPES")
            for col in chunk.columns:
                sample = chunk[col].dropna()
                if len(sample) > 0:
                    print("\n================================")
                    print(
-                        col,
+                        f"Processing Rows : "
-                        type(sample.iloc[0]),
+                        f"{len(chunk)}"
-                        sample.iloc[0]
+                    )
                    print("================================")
                    # =====================================
                    # CLEAN DATA
                    # =====================================
                    chunk = clean_dataframe(chunk)
                    # =====================================
                    # DEBUG COLUMN TYPES
                    # =====================================
                    print("\nCOLUMN DATATYPES")
                    print(chunk.dtypes)
                    print("\nCOLUMN SAMPLE TYPES")
                    for col in chunk.columns:
                        sample = chunk[col].dropna()
                        if len(sample) > 0:
                            print(
                                col,
                                type(sample.iloc[0]),
                                sample.iloc[0]
                            )
                    # =====================================
                    # TRUNCATE TABLE FIRST TIME ONLY
                    # =====================================
                    if (
                        TRUNCATE_BEFORE_LOAD
                        and
                        not table_truncated
                    ):
                        print("\n================================")
                        print(
                            f"TRUNCATING TABLE : "
                            f"{TABLE_NAME}"
                        )
                        print("================================")
                        # IMPORTANT FIX
                        truncate_query = f"""
                        TRUNCATE TABLE
                        `{CH_CONFIG['database']}`.`{TABLE_NAME}`
                        """
                        ch_client.command(
                            truncate_query
                        )
                        print(
                            "TABLE TRUNCATED SUCCESSFULLY"
                        )
                        table_truncated = True
                    # =====================================
                    # INSERT INTO CLICKHOUSE
                    # =====================================
                    print(
                        "\nINSERTING INTO CLICKHOUSE..."
                    )
-            # =================================================
+                    ch_client.insert_df(
-            # SAMPLE DATA
+                        table=TABLE_NAME,
-            # =================================================
+                        df=chunk,
-            print("\nSAMPLE DATA")
+                        database=CH_CONFIG['database']
-            print(chunk.head(2))
+                    )
-            # =================================================
+                    print(
-            # INSERT INTO CLICKHOUSE
+                        f"INSERTED : "
-            # =================================================
+                        f"{len(chunk)} ROWS"
-            print("\nInserting into ClickHouse...")
+                    )
-            ch_client.insert_df(
+                except Exception as insert_error:
                table=TABLE_NAME,
                df=chunk,
                database=CH_CONFIG['database']
            )
-            total_rows += len(chunk)
+                    print("\n================================")
                    print("INSERT FAILED")
                    print("================================")
-            print(
+                    print(str(insert_error))
                f"\nInserted Total Rows : {total_rows}"
            )
-        except Exception as chunk_error:
+                    traceback.print_exc()
                    # =================================
                    # SAVE ERROR LOG
                    # =================================
                    with open(
                        "insert_error.log",
                        "a",
                        encoding="utf-8"
                    ) as log:
                        log.write(
                            "\n\n================================"
                        )
                        log.write(
                            f"\nTIME : "
                            f"{datetime.now()}"
                        )
                        log.write(
                            f"\nTABLE : "
                            f"{TABLE_NAME}"
                        )
                        log.write(
                            f"\nERROR : "
                            f"{str(insert_error)}"
                        )
                        log.write(
                            f"\nTRACEBACK :\n"
                            f"{traceback.format_exc()}"
                        )
                        log.write(
                            "\n================================"
                        )
                    continue
            # =============================================
            # SUCCESS
            # =============================================
            break
        # =================================================
        # SQL CONNECTION FAILURE
        # =================================================
        except pyodbc.OperationalError as op_error:
            retry_count += 1
            print("\n================================")
-            print("CHUNK INSERT FAILED")
+            print(
                f"SQL CONNECTION LOST "
                f"- RETRY {retry_count}"
            )
            print("================================")
-            print(str(chunk_error))
+            print(str(op_error))
            time.sleep(10)
            try:
                sql_conn.close()
            except:
                pass
            # RECONNECT SQL
            sql_conn = connect_sql()
        # =================================================
        # OTHER ERROR
        # =================================================
        except Exception as loop_error:
            print("\n================================")
            print("MAIN LOOP ERROR")
            print("================================")
            print(str(loop_error))
            traceback.print_exc()
-            # =============================================
+            break
            # SAVE ERROR LOG
            # =============================================
            with open(
                "clickhouse_chunk_error.log",
                "a",
                encoding="utf-8"
            ) as log:
                log.write(
                    "\n\n================================"
                )
                log.write(
                    f"\nTIME : {datetime.now()}"
                )
                log.write(
                    f"\nTABLE : {TABLE_NAME}"
                )
                log.write(
                    f"\nERROR : {str(chunk_error)}"
                )
                log.write(
                    f"\nTRACEBACK :\n"
                    f"{traceback.format_exc()}"
                )
                log.write(
                    "\n================================"
                )
            continue
    print("\n================================")
    print("ETL COMPLETED SUCCESSFULLY")
    print(f"TOTAL ROWS INSERTED : {total_rows}")
    print("================================")
 # =========================================================
@@ -402,7 +550,7 @@ except Exception as main_error:
    traceback.print_exc()
    with open(
-        "clickhouse_main_error.log",
+        "main_error.log",
        "a",
        encoding="utf-8"
    ) as log:
@@ -437,7 +585,7 @@ finally:
        sql_conn.close()
-        print("\nSQL Server Connection Closed")
+        print("\nSQL SERVER CONNECTION CLOSED")
    except:
        pass
@@ -446,9 +594,11 @@ finally:
        ch_client.close()
-        print("ClickHouse Connection Closed")
+        print("CLICKHOUSE CONNECTION CLOSED")
    except:
        pass
-print("\nETL Finished :", datetime.now())
+print("\n================================================")
 print("ETL FINISHED :", datetime.now())
 print("================================================")
@@ -0,0 +1,545 @@
 import pyodbc
 import pandas as pd
 import clickhouse_connect
 import numpy as np
 from datetime import datetime
 import traceback
 import warnings
 # =========================================================
 # IGNORE WARNINGS
 # =========================================================
 warnings.filterwarnings(
    'ignore',
    'pandas only supports SQLAlchemy connectable'
 )
 print("ETL Started :", datetime.now())
 # =========================================================
 # SQL SERVER CONNECTION
 # =========================================================
 SQL_CONN_STR = (
    'DRIVER={ODBC Driver 17 for SQL Server};'
    'SERVER=10.200.25.65;'
    'DATABASE=CPMIndiaBusinessInsight;'
    'UID=bsgteam_test;'
    'PWD=B$gt3@m#00512;'
    'TrustServerCertificate=yes;'
 )
 # =========================================================
 # CLICKHOUSE CONFIG
 # =========================================================
 CH_CONFIG = {
    'host': '172.188.12.194',
    'port': 8123,
    'username': 'default',
    'password': 'dipanshu_k',
    'database': 'DaburIndia_BI'
 }
 # =========================================================
 # TABLE NAME
 # =========================================================
 TABLE_NAME = 'OQaD'
 PROJECT_ID = 41654
 # =========================================================
 # LOAD SETTINGS
 # =========================================================
 TRUNCATE_BEFORE_LOAD = True
 table_truncated = False
 # =========================================================
 # CLICKHOUSE DATE COLUMNS
 # =========================================================
 DATE_COLUMNS = [
    'visit_date'
 ]
 # =========================================================
 # CLICKHOUSE DATETIME COLUMNS
 # =========================================================
 DATETIME_COLUMNS = [
    'create_date',
    'update_date'
 ]
 # =========================================================
 # CLEAN DATAFRAME
 # =========================================================
 def clean_dataframe(df):
    try:
        # ---------------------------------------------
        # Replace NaN
        # ---------------------------------------------
        df = df.replace({np.nan: None})
        # ---------------------------------------------
        # Process Column Wise
        # ---------------------------------------------
        for col in df.columns:
            try:
                print(f"\nCleaning Column : {col}")
                # =====================================
                # DATE32 COLUMNS
                # =====================================
                if col.lower() in [
                    x.lower() for x in DATE_COLUMNS
                ]:
                    print(f"Date32 Column : {col}")
                    df[col] = pd.to_datetime(
                        df[col],
                        errors='coerce'
                    )
                    # Remove invalid dates
                    df[col] = df[col].where(
                        (df[col].dt.year >= 1970) &
                        (df[col].dt.year <= 2100)
                    )
                    # Convert to datetime.date
                    df[col] = df[col].apply(
                        lambda x:
                        x.date()
                        if pd.notnull(x)
                        else None
                    )
                # =====================================
                # DATETIME64 COLUMNS
                # =====================================
                elif col.lower() in [
                    x.lower() for x in DATETIME_COLUMNS
                ]:
                    print(f"DateTime Column : {col}")
                    df[col] = pd.to_datetime(
                        df[col],
                        errors='coerce'
                    )
                    # Remove invalid dates
                    df[col] = df[col].where(
                        (df[col].dt.year >= 1970) &
                        (df[col].dt.year <= 2100)
                    )
                    # Convert to datetime.datetime
                    df[col] = df[col].apply(
                        lambda x:
                        x.to_pydatetime()
                        if pd.notnull(x)
                        else None
                    )
                # =====================================
                # INTEGER COLUMNS
                # =====================================
                elif pd.api.types.is_integer_dtype(df[col]):
                    print(f"Integer Column : {col}")
                    df[col] = pd.to_numeric(
                        df[col],
                        errors='coerce'
                    )
                    df[col] = df[col].apply(
                        lambda x:
                        int(x)
                        if pd.notnull(x)
                        else None
                    )
                # =====================================
                # FLOAT COLUMNS
                # =====================================
                elif pd.api.types.is_float_dtype(df[col]):
                    print(f"Float Column : {col}")
                    non_null = df[col].dropna()
                    # Convert whole float to int
                    if len(non_null) > 0 and (
                        (non_null % 1 == 0).all()
                    ):
                        df[col] = df[col].apply(
                            lambda x:
                            int(x)
                            if pd.notnull(x)
                            else None
                        )
                    else:
                        df[col] = df[col].apply(
                            lambda x:
                            float(x)
                            if pd.notnull(x)
                            else None
                        )
                # =====================================
                # OBJECT / STRING COLUMNS
                # =====================================
                else:
                    print(f"String/Object Column : {col}")
                    cleaned = []
                    for val in df[col]:
                        # NULL
                        if pd.isnull(val):
                            cleaned.append(None)
                        # INTEGER
                        elif isinstance(
                            val,
                            (
                                int,
                                np.integer
                            )
                        ):
                            cleaned.append(int(val))
                        # FLOAT
                        elif isinstance(
                            val,
                            (
                                float,
                                np.floating
                            )
                        ):
                            if np.isnan(val):
                                cleaned.append(None)
                            else:
                                if val.is_integer():
                                    cleaned.append(int(val))
                                else:
                                    cleaned.append(float(val))
                        # STRING
                        elif isinstance(val, str):
                            cleaned.append(val.strip())
                        # BOOLEAN
                        elif isinstance(val, bool):
                            cleaned.append(int(val))
                        # DATETIME
                        elif isinstance(
                            val,
                            (
                                datetime,
                                pd.Timestamp
                            )
                        ):
                            if isinstance(
                                val,
                                pd.Timestamp
                            ):
                                cleaned.append(
                                    val.to_pydatetime()
                                )
                            else:
                                cleaned.append(val)
                        # OTHER
                        else:
                            cleaned.append(str(val))
                    df[col] = cleaned
            except Exception as col_error:
                print("\n================================")
                print(f"COLUMN FAILED : {col}")
                print(str(col_error))
                print("================================")
        return df
    except Exception as clean_error:
        print("\n================================")
        print("DATA CLEAN FAILED")
        print(str(clean_error))
        print("================================")
        return df
 # =========================================================
 # MAIN PROCESS
 # =========================================================
 try:
    # =====================================================
    # CONNECT SQL SERVER
    # =====================================================
    sql_conn = pyodbc.connect(SQL_CONN_STR)
    print("Connected to SQL Server")
    # =====================================================
    # CONNECT CLICKHOUSE
    # =====================================================
    ch_client = clickhouse_connect.get_client(**CH_CONFIG)
    print("Connected to ClickHouse")
    # =====================================================
    # QUERY
    # =====================================================
    query = f"""
    SELECT *
    FROM dbo.[{TABLE_NAME}]
    WHERE Project_Id = {PROJECT_ID}
    """
    print("\nExecuting Query:")
    print(query)
    # =====================================================
    # CHUNK SIZE
    # =====================================================
    chunk_size = 100000
    total_rows = 0
    # =====================================================
    # READ DATA
    # =====================================================
    for chunk in pd.read_sql(
        query,
        sql_conn,
        chunksize=chunk_size
    ):
        try:
            print("\n================================")
            print(f"Processing {len(chunk)} Rows")
            print("================================")
            # =================================================
            # CLEAN DATA
            # =================================================
            chunk = clean_dataframe(chunk)
            # =================================================
            # DEBUG COLUMN TYPES
            # =================================================
            print("\nCOLUMN TYPES")
            for col in chunk.columns:
                sample = chunk[col].dropna()
                if len(sample) > 0:
                    print(
                        col,
                        type(sample.iloc[0]),
                        sample.iloc[0]
                    )
            # =================================================
            # DEBUG DATE COLUMN
            # =================================================
            if 'visit_date' in chunk.columns:
                print("\nvisit_date Sample")
                print(chunk['visit_date'].head())
                sample = chunk['visit_date'].dropna()
                if len(sample) > 0:
                    print(
                        "visit_date datatype:",
                        type(sample.iloc[0])
                    )
            # =================================================
            # TRUNCATE TABLE FIRST TIME ONLY
            # =================================================
            if TRUNCATE_BEFORE_LOAD and not table_truncated:
                print("\n================================")
                print(f"TRUNCATING TABLE : {TABLE_NAME}")
                print("================================")
                truncate_query = f"""
                TRUNCATE TABLE
                {CH_CONFIG['database']}.{TABLE_NAME}
                """
                ch_client.command(truncate_query)
                print("TABLE TRUNCATED SUCCESSFULLY")
                table_truncated = True
            # =================================================
            # INSERT INTO CLICKHOUSE
            # =================================================
            print("\nInserting into ClickHouse...")
            ch_client.insert_df(
                table=TABLE_NAME,
                df=chunk,
                database=CH_CONFIG['database']
            )
            total_rows += len(chunk)
            print(
                f"\nInserted Total Rows : {total_rows}"
            )
        except Exception as chunk_error:
            print("\n================================")
            print("CHUNK INSERT FAILED")
            print("================================")
            print(str(chunk_error))
            traceback.print_exc()
            # =============================================
            # SAVE ERROR LOG
            # =============================================
            with open(
                "clickhouse_chunk_error.log",
                "a",
                encoding="utf-8"
            ) as log:
                log.write(
                    "\n\n================================"
                )
                log.write(
                    f"\nTIME : {datetime.now()}"
                )
                log.write(
                    f"\nTABLE : {TABLE_NAME}"
                )
                log.write(
                    f"\nERROR : {str(chunk_error)}"
                )
                log.write(
                    f"\nTRACEBACK :\n"
                    f"{traceback.format_exc()}"
                )
                log.write(
                    "\n================================"
                )
            continue
    print("\n================================")
    print("ETL COMPLETED SUCCESSFULLY")
    print(f"TOTAL ROWS INSERTED : {total_rows}")
    print("================================")
 # =========================================================
 # MAIN ERROR
 # =========================================================
 except Exception as main_error:
    print("\n================================")
    print("MAIN ERROR")
    print("================================")
    print(str(main_error))
    traceback.print_exc()
    with open(
        "clickhouse_main_error.log",
        "a",
        encoding="utf-8"
    ) as log:
        log.write(
            "\n\n================================"
        )
        log.write(
            f"\nTIME : {datetime.now()}"
        )
        log.write(
            f"\nERROR : {str(main_error)}"
        )
        log.write(
            f"\nTRACEBACK :\n"
            f"{traceback.format_exc()}"
        )
        log.write(
            "\n================================"
        )
 # =========================================================
 # CLOSE CONNECTIONS
 # =========================================================
 finally:
    try:
        sql_conn.close()
        print("\nSQL Server Connection Closed")
    except:
        pass
    try:
        ch_client.close()
        print("ClickHouse Connection Closed")
    except:
        pass
 print("\nETL Finished :", datetime.now())
@@ -0,0 +1,604 @@
 import pyodbc
 import pandas as pd
 import clickhouse_connect
 import numpy as np
 from datetime import datetime
 import traceback
 import warnings
 import time
 # =========================================================
 # IGNORE WARNINGS
 # =========================================================
 warnings.filterwarnings(
    'ignore',
    'pandas only supports SQLAlchemy connectable'
 )
 print("\n================================================")
 print("ETL STARTED :", datetime.now())
 print("================================================")
 # =========================================================
 # SQL SERVER CONNECTION STRING
 # =========================================================
 SQL_CONN_STR = (
    'DRIVER={ODBC Driver 17 for SQL Server};'
    'SERVER=10.200.25.65;'
    'DATABASE=CPMIndiaBusinessInsight;'
    'UID=bsgteam_test;'
    'PWD=B$gt3@m#00512;'
    'TrustServerCertificate=yes;'
    'Connection Timeout=60;'
 )
 # =========================================================
 # CLICKHOUSE CONFIG
 # =========================================================
 CH_CONFIG = {
    'host': '172.188.12.194',
    'port': 8123,
    'username': 'default',
    'password': 'dipanshu_k',
    'database': 'DaburIndia_BI'
 }
 # =========================================================
 # TABLE DETAILS
 # =========================================================
 TABLE_NAME = 'Sales'
 PROJECT_ID = 41654
 # =========================================================
 # SETTINGS
 # =========================================================
 TRUNCATE_BEFORE_LOAD = True
 table_truncated = False
 # =========================================================
 # CHUNK SIZE
 # =========================================================
 chunk_size = 20000
 # =========================================================
 # CONNECT SQL SERVER
 # =========================================================
 def connect_sql():
    try:
        conn = pyodbc.connect(
            SQL_CONN_STR,
            autocommit=True
        )
        print("Connected SQL Server")
        return conn
    except Exception as e:
        print("SQL CONNECTION FAILED")
        print(str(e))
        raise
 # =========================================================
 # CONNECT CLICKHOUSE
 # =========================================================
 def connect_clickhouse():
    try:
        client = clickhouse_connect.get_client(
            **CH_CONFIG
        )
        print("Connected ClickHouse")
        return client
    except Exception as e:
        print("CLICKHOUSE CONNECTION FAILED")
        print(str(e))
        raise
 # =========================================================
 # CLEAN DATAFRAME
 # =========================================================
 def clean_dataframe(df):
    try:
        # ---------------------------------------------
        # Replace NaN
        # ---------------------------------------------
        df = df.replace({np.nan: None})
        # ---------------------------------------------
        # Process Column Wise
        # ---------------------------------------------
        for col in df.columns:
            try:
                print(f"\nCleaning Column : {col}")
                # =====================================
                # AUTO DETECT DATE / TIME COLUMNS
                # =====================================
                if (
                    'date' in col.lower()
                    or 'time' in col.lower()
                ):
                    print(f"Date Column : {col}")
                    df[col] = pd.to_datetime(
                        df[col],
                        errors='coerce'
                    )
                    cleaned_dates = []
                    for val in df[col]:
                        if pd.isnull(val):
                            cleaned_dates.append(None)
                        else:
                            cleaned_dates.append(
                                val.to_pydatetime()
                            )
                    df[col] = cleaned_dates
                # =====================================
                # INTEGER COLUMNS
                # =====================================
                elif pd.api.types.is_integer_dtype(df[col]):
                    print(f"Integer Column : {col}")
                    df[col] = pd.to_numeric(
                        df[col],
                        errors='coerce'
                    )
                    df[col] = df[col].apply(
                        lambda x:
                        int(x)
                        if pd.notnull(x)
                        else None
                    )
                # =====================================
                # FLOAT COLUMNS
                # =====================================
                elif pd.api.types.is_float_dtype(df[col]):
                    print(f"Float Column : {col}")
                    df[col] = pd.to_numeric(
                        df[col],
                        errors='coerce'
                    )
                    df[col] = df[col].apply(
                        lambda x:
                        float(x)
                        if pd.notnull(x)
                        else None
                    )
                # =====================================
                # OBJECT / STRING COLUMNS
                # =====================================
                else:
                    print(f"String/Object Column : {col}")
                    cleaned = []
                    for val in df[col]:
                        # NULL
                        if pd.isnull(val):
                            cleaned.append(None)
                        # DATETIME
                        elif isinstance(
                            val,
                            (
                                datetime,
                                pd.Timestamp
                            )
                        ):
                            if isinstance(
                                val,
                                pd.Timestamp
                            ):
                                cleaned.append(
                                    val.to_pydatetime()
                                )
                            else:
                                cleaned.append(val)
                        # INTEGER
                        elif isinstance(
                            val,
                            (
                                int,
                                np.integer
                            )
                        ):
                            cleaned.append(
                                int(val)
                            )
                        # FLOAT
                        elif isinstance(
                            val,
                            (
                                float,
                                np.floating
                            )
                        ):
                            if np.isnan(val):
                                cleaned.append(None)
                            else:
                                cleaned.append(
                                    float(val)
                                )
                        # BOOLEAN
                        elif isinstance(
                            val,
                            bool
                        ):
                            cleaned.append(
                                int(val)
                            )
                        # STRING
                        else:
                            cleaned.append(
                                str(val).strip()
                            )
                    df[col] = cleaned
            except Exception as col_error:
                print("\n================================")
                print(f"COLUMN FAILED : {col}")
                print(str(col_error))
                print("================================")
        return df
    except Exception as clean_error:
        print("\n================================")
        print("DATA CLEAN FAILED")
        print(str(clean_error))
        print("================================")
        return df
 # =========================================================
 # MAIN PROCESS
 # =========================================================
 try:
    # =====================================================
    # CONNECT SQL SERVER
    # =====================================================
    sql_conn = connect_sql()
    # =====================================================
    # CONNECT CLICKHOUSE
    # =====================================================
    ch_client = connect_clickhouse()
    # =====================================================
    # QUERY
    # =====================================================
    query = f"""
    SELECT *
    FROM dbo.[{TABLE_NAME}]
    WHERE Project_Id = {PROJECT_ID}
    """
    print("\nExecuting Query")
    print(query)
    # =====================================================
    # RETRY SETTINGS
    # =====================================================
    retry_count = 0
    max_retry = 5
    # =====================================================
    # MAIN RETRY LOOP
    # =====================================================
    while retry_count < max_retry:
        try:
            # =============================================
            # READ SQL DATA
            # =============================================
            for chunk in pd.read_sql(
                query,
                sql_conn,
                chunksize=chunk_size
            ):
                try:
                    print("\n================================")
                    print(
                        f"Processing Rows : "
                        f"{len(chunk)}"
                    )
                    print("================================")
                    # =====================================
                    # CLEAN DATA
                    # =====================================
                    chunk = clean_dataframe(chunk)
                    # =====================================
                    # DEBUG COLUMN TYPES
                    # =====================================
                    print("\nCOLUMN DATATYPES")
                    print(chunk.dtypes)
                    print("\nCOLUMN SAMPLE TYPES")
                    for col in chunk.columns:
                        sample = chunk[col].dropna()
                        if len(sample) > 0:
                            print(
                                col,
                                type(sample.iloc[0]),
                                sample.iloc[0]
                            )
                    # =====================================
                    # TRUNCATE TABLE FIRST TIME ONLY
                    # =====================================
                    if (
                        TRUNCATE_BEFORE_LOAD
                        and
                        not table_truncated
                    ):
                        print("\n================================")
                        print(
                            f"TRUNCATING TABLE : "
                            f"{TABLE_NAME}"
                        )
                        print("================================")
                        # IMPORTANT FIX
                        truncate_query = f"""
                        TRUNCATE TABLE
                        `{CH_CONFIG['database']}`.`{TABLE_NAME}`
                        """
                        ch_client.command(
                            truncate_query
                        )
                        print(
                            "TABLE TRUNCATED SUCCESSFULLY"
                        )
                        table_truncated = True
                    # =====================================
                    # INSERT INTO CLICKHOUSE
                    # =====================================
                    print(
                        "\nINSERTING INTO CLICKHOUSE..."
                    )
                    ch_client.insert_df(
                        table=TABLE_NAME,
                        df=chunk,
                        database=CH_CONFIG['database']
                    )
                    print(
                        f"INSERTED : "
                        f"{len(chunk)} ROWS"
                    )
                except Exception as insert_error:
                    print("\n================================")
                    print("INSERT FAILED")
                    print("================================")
                    print(str(insert_error))
                    traceback.print_exc()
                    # =================================
                    # SAVE ERROR LOG
                    # =================================
                    with open(
                        "insert_error.log",
                        "a",
                        encoding="utf-8"
                    ) as log:
                        log.write(
                            "\n\n================================"
                        )
                        log.write(
                            f"\nTIME : "
                            f"{datetime.now()}"
                        )
                        log.write(
                            f"\nTABLE : "
                            f"{TABLE_NAME}"
                        )
                        log.write(
                            f"\nERROR : "
                            f"{str(insert_error)}"
                        )
                        log.write(
                            f"\nTRACEBACK :\n"
                            f"{traceback.format_exc()}"
                        )
                        log.write(
                            "\n================================"
                        )
                    continue
            # =============================================
            # SUCCESS
            # =============================================
            break
        # =================================================
        # SQL CONNECTION FAILURE
        # =================================================
        except pyodbc.OperationalError as op_error:
            retry_count += 1
            print("\n================================")
            print(
                f"SQL CONNECTION LOST "
                f"- RETRY {retry_count}"
            )
            print("================================")
            print(str(op_error))
            time.sleep(10)
            try:
                sql_conn.close()
            except:
                pass
            # RECONNECT SQL
            sql_conn = connect_sql()
        # =================================================
        # OTHER ERROR
        # =================================================
        except Exception as loop_error:
            print("\n================================")
            print("MAIN LOOP ERROR")
            print("================================")
            print(str(loop_error))
            traceback.print_exc()
            break
    print("\n================================")
    print("ETL COMPLETED SUCCESSFULLY")
    print("================================")
 # =========================================================
 # MAIN ERROR
 # =========================================================
 except Exception as main_error:
    print("\n================================")
    print("MAIN ERROR")
    print("================================")
    print(str(main_error))
    traceback.print_exc()
    with open(
        "main_error.log",
        "a",
        encoding="utf-8"
    ) as log:
        log.write(
            "\n\n================================"
        )
        log.write(
            f"\nTIME : {datetime.now()}"
        )
        log.write(
            f"\nERROR : {str(main_error)}"
        )
        log.write(
            f"\nTRACEBACK :\n"
            f"{traceback.format_exc()}"
        )
        log.write(
            "\n================================"
        )
 # =========================================================
 # CLOSE CONNECTIONS
 # =========================================================
 finally:
    try:
        sql_conn.close()
        print("\nSQL SERVER CONNECTION CLOSED")
    except:
        pass
    try:
        ch_client.close()
        print("CLICKHOUSE CONNECTION CLOSED")
    except:
        pass
 print("\n================================================")
 print("ETL FINISHED :", datetime.now())
 print("================================================")
@@ -0,0 +1,545 @@
 import pyodbc
 import pandas as pd
 import clickhouse_connect
 import numpy as np
 from datetime import datetime
 import traceback
 import warnings
 # =========================================================
 # IGNORE WARNINGS
 # =========================================================
 warnings.filterwarnings(
    'ignore',
    'pandas only supports SQLAlchemy connectable'
 )
 print("ETL Started :", datetime.now())
 # =========================================================
 # SQL SERVER CONNECTION
 # =========================================================
 SQL_CONN_STR = (
    'DRIVER={ODBC Driver 17 for SQL Server};'
    'SERVER=10.200.25.65;'
    'DATABASE=CPMIndiaBusinessInsight;'
    'UID=bsgteam_test;'
    'PWD=B$gt3@m#00512;'
    'TrustServerCertificate=yes;'
 )
 # =========================================================
 # CLICKHOUSE CONFIG
 # =========================================================
 CH_CONFIG = {
    'host': '172.188.12.194',
    'port': 8123,
    'username': 'default',
    'password': 'dipanshu_k',
    'database': 'DaburIndia_BI'
 }
 # =========================================================
 # TABLE NAME
 # =========================================================
 TABLE_NAME = 'PaidVisibility'
 PROJECT_ID = 41654
 # =========================================================
 # LOAD SETTINGS
 # =========================================================
 TRUNCATE_BEFORE_LOAD = True
 table_truncated = False
 # =========================================================
 # CLICKHOUSE DATE COLUMNS
 # =========================================================
 DATE_COLUMNS = [
    'visit_date'
 ]
 # =========================================================
 # CLICKHOUSE DATETIME COLUMNS
 # =========================================================
 DATETIME_COLUMNS = [
    'create_date',
    'update_date'
 ]
 # =========================================================
 # CLEAN DATAFRAME
 # =========================================================
 def clean_dataframe(df):
    try:
        # ---------------------------------------------
        # Replace NaN
        # ---------------------------------------------
        df = df.replace({np.nan: None})
        # ---------------------------------------------
        # Process Column Wise
        # ---------------------------------------------
        for col in df.columns:
            try:
                print(f"\nCleaning Column : {col}")
                # =====================================
                # DATE32 COLUMNS
                # =====================================
                if col.lower() in [
                    x.lower() for x in DATE_COLUMNS
                ]:
                    print(f"Date32 Column : {col}")
                    df[col] = pd.to_datetime(
                        df[col],
                        errors='coerce'
                    )
                    # Remove invalid dates
                    df[col] = df[col].where(
                        (df[col].dt.year >= 1970) &
                        (df[col].dt.year <= 2100)
                    )
                    # Convert to datetime.date
                    df[col] = df[col].apply(
                        lambda x:
                        x.date()
                        if pd.notnull(x)
                        else None
                    )
                # =====================================
                # DATETIME64 COLUMNS
                # =====================================
                elif col.lower() in [
                    x.lower() for x in DATETIME_COLUMNS
                ]:
                    print(f"DateTime Column : {col}")
                    df[col] = pd.to_datetime(
                        df[col],
                        errors='coerce'
                    )
                    # Remove invalid dates
                    df[col] = df[col].where(
                        (df[col].dt.year >= 1970) &
                        (df[col].dt.year <= 2100)
                    )
                    # Convert to datetime.datetime
                    df[col] = df[col].apply(
                        lambda x:
                        x.to_pydatetime()
                        if pd.notnull(x)
                        else None
                    )
                # =====================================
                # INTEGER COLUMNS
                # =====================================
                elif pd.api.types.is_integer_dtype(df[col]):
                    print(f"Integer Column : {col}")
                    df[col] = pd.to_numeric(
                        df[col],
                        errors='coerce'
                    )
                    df[col] = df[col].apply(
                        lambda x:
                        int(x)
                        if pd.notnull(x)
                        else None
                    )
                # =====================================
                # FLOAT COLUMNS
                # =====================================
                elif pd.api.types.is_float_dtype(df[col]):
                    print(f"Float Column : {col}")
                    non_null = df[col].dropna()
                    # Convert whole float to int
                    if len(non_null) > 0 and (
                        (non_null % 1 == 0).all()
                    ):
                        df[col] = df[col].apply(
                            lambda x:
                            int(x)
                            if pd.notnull(x)
                            else None
                        )
                    else:
                        df[col] = df[col].apply(
                            lambda x:
                            float(x)
                            if pd.notnull(x)
                            else None
                        )
                # =====================================
                # OBJECT / STRING COLUMNS
                # =====================================
                else:
                    print(f"String/Object Column : {col}")
                    cleaned = []
                    for val in df[col]:
                        # NULL
                        if pd.isnull(val):
                            cleaned.append(None)
                        # INTEGER
                        elif isinstance(
                            val,
                            (
                                int,
                                np.integer
                            )
                        ):
                            cleaned.append(int(val))
                        # FLOAT
                        elif isinstance(
                            val,
                            (
                                float,
                                np.floating
                            )
                        ):
                            if np.isnan(val):
                                cleaned.append(None)
                            else:
                                if val.is_integer():
                                    cleaned.append(int(val))
                                else:
                                    cleaned.append(float(val))
                        # STRING
                        elif isinstance(val, str):
                            cleaned.append(val.strip())
                        # BOOLEAN
                        elif isinstance(val, bool):
                            cleaned.append(int(val))
                        # DATETIME
                        elif isinstance(
                            val,
                            (
                                datetime,
                                pd.Timestamp
                            )
                        ):
                            if isinstance(
                                val,
                                pd.Timestamp
                            ):
                                cleaned.append(
                                    val.to_pydatetime()
                                )
                            else:
                                cleaned.append(val)
                        # OTHER
                        else:
                            cleaned.append(str(val))
                    df[col] = cleaned
            except Exception as col_error:
                print("\n================================")
                print(f"COLUMN FAILED : {col}")
                print(str(col_error))
                print("================================")
        return df
    except Exception as clean_error:
        print("\n================================")
        print("DATA CLEAN FAILED")
        print(str(clean_error))
        print("================================")
        return df
 # =========================================================
 # MAIN PROCESS
 # =========================================================
 try:
    # =====================================================
    # CONNECT SQL SERVER
    # =====================================================
    sql_conn = pyodbc.connect(SQL_CONN_STR)
    print("Connected to SQL Server")
    # =====================================================
    # CONNECT CLICKHOUSE
    # =====================================================
    ch_client = clickhouse_connect.get_client(**CH_CONFIG)
    print("Connected to ClickHouse")
    # =====================================================
    # QUERY
    # =====================================================
    query = f"""
    SELECT *
    FROM dbo.[{TABLE_NAME}]
    WHERE Project_Id = {PROJECT_ID}
    """
    print("\nExecuting Query:")
    print(query)
    # =====================================================
    # CHUNK SIZE
    # =====================================================
    chunk_size = 100000
    total_rows = 0
    # =====================================================
    # READ DATA
    # =====================================================
    for chunk in pd.read_sql(
        query,
        sql_conn,
        chunksize=chunk_size
    ):
        try:
            print("\n================================")
            print(f"Processing {len(chunk)} Rows")
            print("================================")
            # =================================================
            # CLEAN DATA
            # =================================================
            chunk = clean_dataframe(chunk)
            # =================================================
            # DEBUG COLUMN TYPES
            # =================================================
            print("\nCOLUMN TYPES")
            for col in chunk.columns:
                sample = chunk[col].dropna()
                if len(sample) > 0:
                    print(
                        col,
                        type(sample.iloc[0]),
                        sample.iloc[0]
                    )
            # =================================================
            # DEBUG DATE COLUMN
            # =================================================
            if 'visit_date' in chunk.columns:
                print("\nvisit_date Sample")
                print(chunk['visit_date'].head())
                sample = chunk['visit_date'].dropna()
                if len(sample) > 0:
                    print(
                        "visit_date datatype:",
                        type(sample.iloc[0])
                    )
            # =================================================
            # TRUNCATE TABLE FIRST TIME ONLY
            # =================================================
            if TRUNCATE_BEFORE_LOAD and not table_truncated:
                print("\n================================")
                print(f"TRUNCATING TABLE : {TABLE_NAME}")
                print("================================")
                truncate_query = f"""
                TRUNCATE TABLE
                {CH_CONFIG['database']}.{TABLE_NAME}
                """
                ch_client.command(truncate_query)
                print("TABLE TRUNCATED SUCCESSFULLY")
                table_truncated = True
            # =================================================
            # INSERT INTO CLICKHOUSE
            # =================================================
            print("\nInserting into ClickHouse...")
            ch_client.insert_df(
                table=TABLE_NAME,
                df=chunk,
                database=CH_CONFIG['database']
            )
            total_rows += len(chunk)
            print(
                f"\nInserted Total Rows : {total_rows}"
            )
        except Exception as chunk_error:
            print("\n================================")
            print("CHUNK INSERT FAILED")
            print("================================")
            print(str(chunk_error))
            traceback.print_exc()
            # =============================================
            # SAVE ERROR LOG
            # =============================================
            with open(
                "clickhouse_chunk_error.log",
                "a",
                encoding="utf-8"
            ) as log:
                log.write(
                    "\n\n================================"
                )
                log.write(
                    f"\nTIME : {datetime.now()}"
                )
                log.write(
                    f"\nTABLE : {TABLE_NAME}"
                )
                log.write(
                    f"\nERROR : {str(chunk_error)}"
                )
                log.write(
                    f"\nTRACEBACK :\n"
                    f"{traceback.format_exc()}"
                )
                log.write(
                    "\n================================"
                )
            continue
    print("\n================================")
    print("ETL COMPLETED SUCCESSFULLY")
    print(f"TOTAL ROWS INSERTED : {total_rows}")
    print("================================")
 # =========================================================
 # MAIN ERROR
 # =========================================================
 except Exception as main_error:
    print("\n================================")
    print("MAIN ERROR")
    print("================================")
    print(str(main_error))
    traceback.print_exc()
    with open(
        "clickhouse_main_error.log",
        "a",
        encoding="utf-8"
    ) as log:
        log.write(
            "\n\n================================"
        )
        log.write(
            f"\nTIME : {datetime.now()}"
        )
        log.write(
            f"\nERROR : {str(main_error)}"
        )
        log.write(
            f"\nTRACEBACK :\n"
            f"{traceback.format_exc()}"
        )
        log.write(
            "\n================================"
        )
 # =========================================================
 # CLOSE CONNECTIONS
 # =========================================================
 finally:
    try:
        sql_conn.close()
        print("\nSQL Server Connection Closed")
    except:
        pass
    try:
        ch_client.close()
        print("ClickHouse Connection Closed")
    except:
        pass
 print("\nETL Finished :", datetime.now())
@@ -0,0 +1,632 @@
 import pyodbc
 import pandas as pd
 import clickhouse_connect
 import numpy as np
 from datetime import datetime
 import traceback
 import warnings
 # =========================================================
 # IGNORE WARNINGS
 # =========================================================
 warnings.filterwarnings(
    'ignore',
    'pandas only supports SQLAlchemy connectable'
 )
 print("\n====================================")
 print("ETL Started :", datetime.now())
 print("====================================")
 # =========================================================
 # SQL SERVER CONNECTION
 # =========================================================
 SQL_CONN_STR = (
    'DRIVER={ODBC Driver 17 for SQL Server};'
    'SERVER=10.200.25.65;'
    'DATABASE=CPMIndiaBusinessInsight;'
    'UID=bsgteam_test;'
    'PWD=B$gt3@m#00512;'
    'TrustServerCertificate=yes;'
 )
 # =========================================================
 # CLICKHOUSE CONFIG
 # =========================================================
 CH_CONFIG = {
    'host': '172.188.12.194',
    'port': 8123,
    'username': 'default',
    'password': 'dipanshu_k',
    'database': 'DaburIndia_BI'
 }
 # =========================================================
 # TABLE DETAILS
 # =========================================================
 TABLE_NAME = 'SKU Master'
 PROJECT_ID = 41654
 # =========================================================
 # SETTINGS
 # =========================================================
 TRUNCATE_BEFORE_LOAD = True
 table_truncated = False
 # =========================================================
 # CLICKHOUSE DATE COLUMNS
 # =========================================================
 DATE_COLUMNS = [
    'visit_date'
 ]
 # =========================================================
 # CLICKHOUSE DATETIME COLUMNS
 # =========================================================
 DATETIME_COLUMNS = [
    'create_date',
    'update_date'
 ]
 # =========================================================
 # CLEAN DATAFRAME
 # =========================================================
 def clean_dataframe(df):
    try:
        # ---------------------------------------------
        # Replace NaN with None
        # ---------------------------------------------
        df = df.replace({np.nan: None})
        # ---------------------------------------------
        # Process Each Column
        # ---------------------------------------------
        for col in df.columns:
            try:
                print(f"\nCleaning Column : {col}")
                # =====================================
                # DATE COLUMNS
                # =====================================
                if col.lower() in [
                    x.lower() for x in DATE_COLUMNS
                ]:
                    print(f"Date Column : {col}")
                    df[col] = pd.to_datetime(
                        df[col],
                        errors='coerce'
                    )
                    cleaned_dates = []
                    for val in df[col]:
                        if pd.isnull(val):
                            cleaned_dates.append(None)
                        else:
                            cleaned_dates.append(
                                val.date()
                            )
                    df[col] = cleaned_dates
                # =====================================
                # DATETIME COLUMNS
                # =====================================
                elif col.lower() in [
                    x.lower() for x in DATETIME_COLUMNS
                ]:
                    print(f"DateTime Column : {col}")
                    df[col] = pd.to_datetime(
                        df[col],
                        errors='coerce'
                    )
                    cleaned_datetime = []
                    for val in df[col]:
                        if pd.isnull(val):
                            cleaned_datetime.append(None)
                        else:
                            cleaned_datetime.append(
                                val.to_pydatetime()
                            )
                    df[col] = cleaned_datetime
                # =====================================
                # INTEGER COLUMNS
                # =====================================
                elif pd.api.types.is_integer_dtype(df[col]):
                    print(f"Integer Column : {col}")
                    df[col] = pd.to_numeric(
                        df[col],
                        errors='coerce'
                    )
                    df[col] = df[col].apply(
                        lambda x:
                        int(x)
                        if pd.notnull(x)
                        else None
                    )
                # =====================================
                # FLOAT COLUMNS
                # =====================================
                elif pd.api.types.is_float_dtype(df[col]):
                    print(f"Float Column : {col}")
                    df[col] = pd.to_numeric(
                        df[col],
                        errors='coerce'
                    )
                    non_null = df[col].dropna()
                    # ---------------------------------
                    # Convert whole float to int
                    # Example:
                    # 12.0 -> 12
                    # ---------------------------------
                    if len(non_null) > 0 and (
                        (non_null % 1 == 0).all()
                    ):
                        df[col] = df[col].apply(
                            lambda x:
                            int(x)
                            if pd.notnull(x)
                            else None
                        )
                    else:
                        df[col] = df[col].apply(
                            lambda x:
                            float(x)
                            if pd.notnull(x)
                            else None
                        )
                # =====================================
                # OBJECT / STRING COLUMNS
                # =====================================
                else:
                    print(f"String/Object Column : {col}")
                    cleaned = []
                    for val in df[col]:
                        try:
                            # -------------------------
                            # NULL
                            # -------------------------
                            if pd.isnull(val):
                                cleaned.append(None)
                            # -------------------------
                            # STRING
                            # -------------------------
                            elif isinstance(val, str):
                                cleaned.append(
                                    val.strip()
                                )
                            # -------------------------
                            # BOOLEAN
                            # -------------------------
                            elif isinstance(val, bool):
                                cleaned.append(
                                    int(val)
                                )
                            # -------------------------
                            # INTEGER
                            # -------------------------
                            elif isinstance(
                                val,
                                (
                                    int,
                                    np.integer
                                )
                            ):
                                cleaned.append(
                                    int(val)
                                )
                            # -------------------------
                            # FLOAT
                            # -------------------------
                            elif isinstance(
                                val,
                                (
                                    float,
                                    np.floating
                                )
                            ):
                                if np.isnan(val):
                                    cleaned.append(None)
                                else:
                                    if val.is_integer():
                                        cleaned.append(
                                            int(val)
                                        )
                                    else:
                                        cleaned.append(
                                            float(val)
                                        )
                            # -------------------------
                            # DATETIME
                            # -------------------------
                            elif isinstance(
                                val,
                                (
                                    datetime,
                                    pd.Timestamp
                                )
                            ):
                                if isinstance(
                                    val,
                                    pd.Timestamp
                                ):
                                    cleaned.append(
                                        val.to_pydatetime()
                                    )
                                else:
                                    cleaned.append(val)
                            # -------------------------
                            # DATE
                            # -------------------------
                            elif hasattr(val, 'year'):
                                cleaned.append(val)
                            # -------------------------
                            # OTHER
                            # -------------------------
                            else:
                                cleaned.append(
                                    str(val)
                                )
                        except Exception as row_error:
                            print(
                                f"Row Cleaning Error "
                                f"in Column {col}"
                            )
                            print(str(row_error))
                            cleaned.append(None)
                    df[col] = cleaned
            except Exception as col_error:
                print("\n================================")
                print(f"COLUMN FAILED : {col}")
                print(str(col_error))
                print("================================")
        return df
    except Exception as clean_error:
        print("\n================================")
        print("DATA CLEAN FAILED")
        print(str(clean_error))
        print(traceback.format_exc())
        print("================================")
        return df
 # =========================================================
 # MAIN PROCESS
 # =========================================================
 try:
    # =====================================================
    # CONNECT SQL SERVER
    # =====================================================
    print("\nConnecting SQL Server...")
    sql_conn = pyodbc.connect(SQL_CONN_STR)
    print("Connected to SQL Server")
    # =====================================================
    # CONNECT CLICKHOUSE
    # =====================================================
    print("\nConnecting ClickHouse...")
    ch_client = clickhouse_connect.get_client(**CH_CONFIG)
    print("Connected to ClickHouse")
    # =====================================================
    # QUERY
    # =====================================================
    query = f"""
    SELECT *
    FROM dbo.[{TABLE_NAME}]
    WHERE Project_Id = {PROJECT_ID}
    """
    print("\n====================================")
    print("Executing Query")
    print("====================================")
    print(query)
    # =====================================================
    # CHUNK SIZE
    # =====================================================
    chunk_size = 100000
    total_rows = 0
    # =====================================================
    # READ DATA
    # =====================================================
    for chunk in pd.read_sql(
        query,
        sql_conn,
        chunksize=chunk_size
    ):
        try:
            print("\n====================================")
            print(f"Processing Rows : {len(chunk)}")
            print("====================================")
            # =================================================
            # CLEAN DATA
            # =================================================
            chunk = clean_dataframe(chunk)
            # =================================================
            # DEBUG COLUMN TYPES
            # =================================================
            print("\nCOLUMN TYPES")
            for col in chunk.columns:
                try:
                    sample = chunk[col].dropna()
                    if len(sample) > 0:
                        print(
                            col,
                            type(sample.iloc[0]),
                            sample.iloc[0]
                        )
                except:
                    pass
            # =================================================
            # TRUNCATE TABLE
            # =================================================
            if TRUNCATE_BEFORE_LOAD and not table_truncated:
                try:
                    print("\n====================================")
                    print(f"TRUNCATING : {TABLE_NAME}")
                    print("====================================")
                    truncate_query = f"""
                    TRUNCATE TABLE
                    `{CH_CONFIG['database']}`.`{TABLE_NAME}`
                    """
                    print(truncate_query)
                    ch_client.command(
                        truncate_query
                    )
                    print(
                        "TABLE TRUNCATED SUCCESSFULLY"
                    )
                    table_truncated = True
                except Exception as truncate_error:
                    print("\nTRUNCATE FAILED")
                    print(str(truncate_error))
                    raise
            # =================================================
            # INSERT DATA
            # =================================================
            try:
                print("\n====================================")
                print("INSERTING DATA INTO CLICKHOUSE")
                print("====================================")
                ch_client.insert_df(
                    table=f"`{TABLE_NAME}`",
                    df=chunk,
                    database=CH_CONFIG['database']
                )
                total_rows += len(chunk)
                print(
                    f"\nTOTAL INSERTED : "
                    f"{total_rows}"
                )
            except Exception as insert_error:
                print("\nINSERT FAILED")
                print(str(insert_error))
                traceback.print_exc()
                # =============================================
                # SAVE ERROR LOG
                # =============================================
                with open(
                    "clickhouse_insert_error.log",
                    "a",
                    encoding="utf-8"
                ) as log:
                    log.write(
                        "\n\n================================"
                    )
                    log.write(
                        f"\nTIME : {datetime.now()}"
                    )
                    log.write(
                        f"\nTABLE : {TABLE_NAME}"
                    )
                    log.write(
                        f"\nERROR : {str(insert_error)}"
                    )
                    log.write(
                        f"\nTRACEBACK :\n"
                        f"{traceback.format_exc()}"
                    )
                    log.write(
                        "\n================================"
                    )
                continue
        except Exception as chunk_error:
            print("\n====================================")
            print("CHUNK PROCESS FAILED")
            print("====================================")
            print(str(chunk_error))
            traceback.print_exc()
            continue
    print("\n====================================")
    print("ETL COMPLETED SUCCESSFULLY")
    print(f"TOTAL ROWS INSERTED : {total_rows}")
    print("====================================")
 # =========================================================
 # MAIN ERROR
 # =========================================================
 except Exception as main_error:
    print("\n====================================")
    print("MAIN ERROR")
    print("====================================")
    print(str(main_error))
    traceback.print_exc()
    with open(
        "clickhouse_main_error.log",
        "a",
        encoding="utf-8"
    ) as log:
        log.write(
            "\n\n================================"
        )
        log.write(
            f"\nTIME : {datetime.now()}"
        )
        log.write(
            f"\nERROR : {str(main_error)}"
        )
        log.write(
            f"\nTRACEBACK :\n"
            f"{traceback.format_exc()}"
        )
        log.write(
            "\n================================"
        )
 # =========================================================
 # CLOSE CONNECTIONS
 # =========================================================
 finally:
    try:
        sql_conn.close()
        print("\nSQL Server Connection Closed")
    except:
        pass
    try:
        ch_client.close()
        print("ClickHouse Connection Closed")
    except:
        pass
 print("\n====================================")
 print("ETL Finished :", datetime.now())
 print("====================================")
@@ -0,0 +1,604 @@
 import pyodbc
 import pandas as pd
 import clickhouse_connect
 import numpy as np
 from datetime import datetime
 import traceback
 import warnings
 import time
 # =========================================================
 # IGNORE WARNINGS
 # =========================================================
 warnings.filterwarnings(
    'ignore',
    'pandas only supports SQLAlchemy connectable'
 )
 print("\n================================================")
 print("ETL STARTED :", datetime.now())
 print("================================================")
 # =========================================================
 # SQL SERVER CONNECTION STRING
 # =========================================================
 SQL_CONN_STR = (
    'DRIVER={ODBC Driver 17 for SQL Server};'
    'SERVER=10.200.25.65;'
    'DATABASE=CPMIndiaBusinessInsight;'
    'UID=bsgteam_test;'
    'PWD=B$gt3@m#00512;'
    'TrustServerCertificate=yes;'
    'Connection Timeout=60;'
 )
 # =========================================================
 # CLICKHOUSE CONFIG
 # =========================================================
 CH_CONFIG = {
    'host': '172.188.12.194',
    'port': 8123,
    'username': 'default',
    'password': 'dipanshu_k',
    'database': 'DaburIndia_BI'
 }
 # =========================================================
 # TABLE DETAILS
 # =========================================================
 TABLE_NAME = 'Sales'
 PROJECT_ID = 41654
 # =========================================================
 # SETTINGS
 # =========================================================
 TRUNCATE_BEFORE_LOAD = True
 table_truncated = False
 # =========================================================
 # CHUNK SIZE
 # =========================================================
 chunk_size = 10000
 # =========================================================
 # CONNECT SQL SERVER
 # =========================================================
 def connect_sql():
    try:
        conn = pyodbc.connect(
            SQL_CONN_STR,
            autocommit=True
        )
        print("Connected SQL Server")
        return conn
    except Exception as e:
        print("SQL CONNECTION FAILED")
        print(str(e))
        raise
 # =========================================================
 # CONNECT CLICKHOUSE
 # =========================================================
 def connect_clickhouse():
    try:
        client = clickhouse_connect.get_client(
            **CH_CONFIG
        )
        print("Connected ClickHouse")
        return client
    except Exception as e:
        print("CLICKHOUSE CONNECTION FAILED")
        print(str(e))
        raise
 # =========================================================
 # CLEAN DATAFRAME
 # =========================================================
 def clean_dataframe(df):
    try:
        # ---------------------------------------------
        # Replace NaN
        # ---------------------------------------------
        df = df.replace({np.nan: None})
        # ---------------------------------------------
        # Process Column Wise
        # ---------------------------------------------
        for col in df.columns:
            try:
                print(f"\nCleaning Column : {col}")
                # =====================================
                # AUTO DETECT DATE / TIME COLUMNS
                # =====================================
                if (
                    'date' in col.lower()
                    or 'time' in col.lower()
                ):
                    print(f"Date Column : {col}")
                    df[col] = pd.to_datetime(
                        df[col],
                        errors='coerce'
                    )
                    cleaned_dates = []
                    for val in df[col]:
                        if pd.isnull(val):
                            cleaned_dates.append(None)
                        else:
                            cleaned_dates.append(
                                val.to_pydatetime()
                            )
                    df[col] = cleaned_dates
                # =====================================
                # INTEGER COLUMNS
                # =====================================
                elif pd.api.types.is_integer_dtype(df[col]):
                    print(f"Integer Column : {col}")
                    df[col] = pd.to_numeric(
                        df[col],
                        errors='coerce'
                    )
                    df[col] = df[col].apply(
                        lambda x:
                        int(x)
                        if pd.notnull(x)
                        else None
                    )
                # =====================================
                # FLOAT COLUMNS
                # =====================================
                elif pd.api.types.is_float_dtype(df[col]):
                    print(f"Float Column : {col}")
                    df[col] = pd.to_numeric(
                        df[col],
                        errors='coerce'
                    )
                    df[col] = df[col].apply(
                        lambda x:
                        float(x)
                        if pd.notnull(x)
                        else None
                    )
                # =====================================
                # OBJECT / STRING COLUMNS
                # =====================================
                else:
                    print(f"String/Object Column : {col}")
                    cleaned = []
                    for val in df[col]:
                        # NULL
                        if pd.isnull(val):
                            cleaned.append(None)
                        # DATETIME
                        elif isinstance(
                            val,
                            (
                                datetime,
                                pd.Timestamp
                            )
                        ):
                            if isinstance(
                                val,
                                pd.Timestamp
                            ):
                                cleaned.append(
                                    val.to_pydatetime()
                                )
                            else:
                                cleaned.append(val)
                        # INTEGER
                        elif isinstance(
                            val,
                            (
                                int,
                                np.integer
                            )
                        ):
                            cleaned.append(
                                int(val)
                            )
                        # FLOAT
                        elif isinstance(
                            val,
                            (
                                float,
                                np.floating
                            )
                        ):
                            if np.isnan(val):
                                cleaned.append(None)
                            else:
                                cleaned.append(
                                    float(val)
                                )
                        # BOOLEAN
                        elif isinstance(
                            val,
                            bool
                        ):
                            cleaned.append(
                                int(val)
                            )
                        # STRING
                        else:
                            cleaned.append(
                                str(val).strip()
                            )
                    df[col] = cleaned
            except Exception as col_error:
                print("\n================================")
                print(f"COLUMN FAILED : {col}")
                print(str(col_error))
                print("================================")
        return df
    except Exception as clean_error:
        print("\n================================")
        print("DATA CLEAN FAILED")
        print(str(clean_error))
        print("================================")
        return df
 # =========================================================
 # MAIN PROCESS
 # =========================================================
 try:
    # =====================================================
    # CONNECT SQL SERVER
    # =====================================================
    sql_conn = connect_sql()
    # =====================================================
    # CONNECT CLICKHOUSE
    # =====================================================
    ch_client = connect_clickhouse()
    # =====================================================
    # QUERY
    # =====================================================
    query = f"""
    SELECT *
    FROM dbo.[{TABLE_NAME}]
    WHERE Project_Id = {PROJECT_ID}
    """
    print("\nExecuting Query")
    print(query)
    # =====================================================
    # RETRY SETTINGS
    # =====================================================
    retry_count = 0
    max_retry = 5
    # =====================================================
    # MAIN RETRY LOOP
    # =====================================================
    while retry_count < max_retry:
        try:
            # =============================================
            # READ SQL DATA
            # =============================================
            for chunk in pd.read_sql(
                query,
                sql_conn,
                chunksize=chunk_size
            ):
                try:
                    print("\n================================")
                    print(
                        f"Processing Rows : "
                        f"{len(chunk)}"
                    )
                    print("================================")
                    # =====================================
                    # CLEAN DATA
                    # =====================================
                    chunk = clean_dataframe(chunk)
                    # =====================================
                    # DEBUG COLUMN TYPES
                    # =====================================
                    print("\nCOLUMN DATATYPES")
                    print(chunk.dtypes)
                    print("\nCOLUMN SAMPLE TYPES")
                    for col in chunk.columns:
                        sample = chunk[col].dropna()
                        if len(sample) > 0:
                            print(
                                col,
                                type(sample.iloc[0]),
                                sample.iloc[0]
                            )
                    # =====================================
                    # TRUNCATE TABLE FIRST TIME ONLY
                    # =====================================
                    if (
                        TRUNCATE_BEFORE_LOAD
                        and
                        not table_truncated
                    ):
                        print("\n================================")
                        print(
                            f"TRUNCATING TABLE : "
                            f"{TABLE_NAME}"
                        )
                        print("================================")
                        # IMPORTANT FIX
                        truncate_query = f"""
                        TRUNCATE TABLE
                        `{CH_CONFIG['database']}`.`{TABLE_NAME}`
                        """
                        ch_client.command(
                            truncate_query
                        )
                        print(
                            "TABLE TRUNCATED SUCCESSFULLY"
                        )
                        table_truncated = True
                    # =====================================
                    # INSERT INTO CLICKHOUSE
                    # =====================================
                    print(
                        "\nINSERTING INTO CLICKHOUSE..."
                    )
                    ch_client.insert_df(
                        table=TABLE_NAME,
                        df=chunk,
                        database=CH_CONFIG['database']
                    )
                    print(
                        f"INSERTED : "
                        f"{len(chunk)} ROWS"
                    )
                except Exception as insert_error:
                    print("\n================================")
                    print("INSERT FAILED")
                    print("================================")
                    print(str(insert_error))
                    traceback.print_exc()
                    # =================================
                    # SAVE ERROR LOG
                    # =================================
                    with open(
                        "insert_error.log",
                        "a",
                        encoding="utf-8"
                    ) as log:
                        log.write(
                            "\n\n================================"
                        )
                        log.write(
                            f"\nTIME : "
                            f"{datetime.now()}"
                        )
                        log.write(
                            f"\nTABLE : "
                            f"{TABLE_NAME}"
                        )
                        log.write(
                            f"\nERROR : "
                            f"{str(insert_error)}"
                        )
                        log.write(
                            f"\nTRACEBACK :\n"
                            f"{traceback.format_exc()}"
                        )
                        log.write(
                            "\n================================"
                        )
                    continue
            # =============================================
            # SUCCESS
            # =============================================
            break
        # =================================================
        # SQL CONNECTION FAILURE
        # =================================================
        except pyodbc.OperationalError as op_error:
            retry_count += 1
            print("\n================================")
            print(
                f"SQL CONNECTION LOST "
                f"- RETRY {retry_count}"
            )
            print("================================")
            print(str(op_error))
            time.sleep(10)
            try:
                sql_conn.close()
            except:
                pass
            # RECONNECT SQL
            sql_conn = connect_sql()
        # =================================================
        # OTHER ERROR
        # =================================================
        except Exception as loop_error:
            print("\n================================")
            print("MAIN LOOP ERROR")
            print("================================")
            print(str(loop_error))
            traceback.print_exc()
            break
    print("\n================================")
    print("ETL COMPLETED SUCCESSFULLY")
    print("================================")
 # =========================================================
 # MAIN ERROR
 # =========================================================
 except Exception as main_error:
    print("\n================================")
    print("MAIN ERROR")
    print("================================")
    print(str(main_error))
    traceback.print_exc()
    with open(
        "main_error.log",
        "a",
        encoding="utf-8"
    ) as log:
        log.write(
            "\n\n================================"
        )
        log.write(
            f"\nTIME : {datetime.now()}"
        )
        log.write(
            f"\nERROR : {str(main_error)}"
        )
        log.write(
            f"\nTRACEBACK :\n"
            f"{traceback.format_exc()}"
        )
        log.write(
            "\n================================"
        )
 # =========================================================
 # CLOSE CONNECTIONS
 # =========================================================
 finally:
    try:
        sql_conn.close()
        print("\nSQL SERVER CONNECTION CLOSED")
    except:
        pass
    try:
        ch_client.close()
        print("CLICKHOUSE CONNECTION CLOSED")
    except:
        pass
 print("\n================================================")
 print("ETL FINISHED :", datetime.now())
 print("================================================")
@@ -0,0 +1,632 @@
 import pyodbc
 import pandas as pd
 import clickhouse_connect
 import numpy as np
 from datetime import datetime
 import traceback
 import warnings
 # =========================================================
 # IGNORE WARNINGS
 # =========================================================
 warnings.filterwarnings(
    'ignore',
    'pandas only supports SQLAlchemy connectable'
 )
 print("\n====================================")
 print("ETL Started :", datetime.now())
 print("====================================")
 # =========================================================
 # SQL SERVER CONNECTION
 # =========================================================
 SQL_CONN_STR = (
    'DRIVER={ODBC Driver 17 for SQL Server};'
    'SERVER=10.200.25.65;'
    'DATABASE=CPMIndiaBusinessInsight;'
    'UID=bsgteam_test;'
    'PWD=B$gt3@m#00512;'
    'TrustServerCertificate=yes;'
 )
 # =========================================================
 # CLICKHOUSE CONFIG
 # =========================================================
 CH_CONFIG = {
    'host': '172.188.12.194',
    'port': 8123,
    'username': 'default',
    'password': 'dipanshu_k',
    'database': 'DaburIndia_BI'
 }
 # =========================================================
 # TABLE DETAILS
 # =========================================================
 TABLE_NAME = 'Web Logins'
 PROJECT_ID = 41654
 # =========================================================
 # SETTINGS
 # =========================================================
 TRUNCATE_BEFORE_LOAD = True
 table_truncated = False
 # =========================================================
 # CLICKHOUSE DATE COLUMNS
 # =========================================================
 DATE_COLUMNS = [
    'visit_date'
 ]
 # =========================================================
 # CLICKHOUSE DATETIME COLUMNS
 # =========================================================
 DATETIME_COLUMNS = [
    'create_date',
    'update_date'
 ]
 # =========================================================
 # CLEAN DATAFRAME
 # =========================================================
 def clean_dataframe(df):
    try:
        # ---------------------------------------------
        # Replace NaN with None
        # ---------------------------------------------
        df = df.replace({np.nan: None})
        # ---------------------------------------------
        # Process Each Column
        # ---------------------------------------------
        for col in df.columns:
            try:
                print(f"\nCleaning Column : {col}")
                # =====================================
                # DATE COLUMNS
                # =====================================
                if col.lower() in [
                    x.lower() for x in DATE_COLUMNS
                ]:
                    print(f"Date Column : {col}")
                    df[col] = pd.to_datetime(
                        df[col],
                        errors='coerce'
                    )
                    cleaned_dates = []
                    for val in df[col]:
                        if pd.isnull(val):
                            cleaned_dates.append(None)
                        else:
                            cleaned_dates.append(
                                val.date()
                            )
                    df[col] = cleaned_dates
                # =====================================
                # DATETIME COLUMNS
                # =====================================
                elif col.lower() in [
                    x.lower() for x in DATETIME_COLUMNS
                ]:
                    print(f"DateTime Column : {col}")
                    df[col] = pd.to_datetime(
                        df[col],
                        errors='coerce'
                    )
                    cleaned_datetime = []
                    for val in df[col]:
                        if pd.isnull(val):
                            cleaned_datetime.append(None)
                        else:
                            cleaned_datetime.append(
                                val.to_pydatetime()
                            )
                    df[col] = cleaned_datetime
                # =====================================
                # INTEGER COLUMNS
                # =====================================
                elif pd.api.types.is_integer_dtype(df[col]):
                    print(f"Integer Column : {col}")
                    df[col] = pd.to_numeric(
                        df[col],
                        errors='coerce'
                    )
                    df[col] = df[col].apply(
                        lambda x:
                        int(x)
                        if pd.notnull(x)
                        else None
                    )
                # =====================================
                # FLOAT COLUMNS
                # =====================================
                elif pd.api.types.is_float_dtype(df[col]):
                    print(f"Float Column : {col}")
                    df[col] = pd.to_numeric(
                        df[col],
                        errors='coerce'
                    )
                    non_null = df[col].dropna()
                    # ---------------------------------
                    # Convert whole float to int
                    # Example:
                    # 12.0 -> 12
                    # ---------------------------------
                    if len(non_null) > 0 and (
                        (non_null % 1 == 0).all()
                    ):
                        df[col] = df[col].apply(
                            lambda x:
                            int(x)
                            if pd.notnull(x)
                            else None
                        )
                    else:
                        df[col] = df[col].apply(
                            lambda x:
                            float(x)
                            if pd.notnull(x)
                            else None
                        )
                # =====================================
                # OBJECT / STRING COLUMNS
                # =====================================
                else:
                    print(f"String/Object Column : {col}")
                    cleaned = []
                    for val in df[col]:
                        try:
                            # -------------------------
                            # NULL
                            # -------------------------
                            if pd.isnull(val):
                                cleaned.append(None)
                            # -------------------------
                            # STRING
                            # -------------------------
                            elif isinstance(val, str):
                                cleaned.append(
                                    val.strip()
                                )
                            # -------------------------
                            # BOOLEAN
                            # -------------------------
                            elif isinstance(val, bool):
                                cleaned.append(
                                    int(val)
                                )
                            # -------------------------
                            # INTEGER
                            # -------------------------
                            elif isinstance(
                                val,
                                (
                                    int,
                                    np.integer
                                )
                            ):
                                cleaned.append(
                                    int(val)
                                )
                            # -------------------------
                            # FLOAT
                            # -------------------------
                            elif isinstance(
                                val,
                                (
                                    float,
                                    np.floating
                                )
                            ):
                                if np.isnan(val):
                                    cleaned.append(None)
                                else:
                                    if val.is_integer():
                                        cleaned.append(
                                            int(val)
                                        )
                                    else:
                                        cleaned.append(
                                            float(val)
                                        )
                            # -------------------------
                            # DATETIME
                            # -------------------------
                            elif isinstance(
                                val,
                                (
                                    datetime,
                                    pd.Timestamp
                                )
                            ):
                                if isinstance(
                                    val,
                                    pd.Timestamp
                                ):
                                    cleaned.append(
                                        val.to_pydatetime()
                                    )
                                else:
                                    cleaned.append(val)
                            # -------------------------
                            # DATE
                            # -------------------------
                            elif hasattr(val, 'year'):
                                cleaned.append(val)
                            # -------------------------
                            # OTHER
                            # -------------------------
                            else:
                                cleaned.append(
                                    str(val)
                                )
                        except Exception as row_error:
                            print(
                                f"Row Cleaning Error "
                                f"in Column {col}"
                            )
                            print(str(row_error))
                            cleaned.append(None)
                    df[col] = cleaned
            except Exception as col_error:
                print("\n================================")
                print(f"COLUMN FAILED : {col}")
                print(str(col_error))
                print("================================")
        return df
    except Exception as clean_error:
        print("\n================================")
        print("DATA CLEAN FAILED")
        print(str(clean_error))
        print(traceback.format_exc())
        print("================================")
        return df
 # =========================================================
 # MAIN PROCESS
 # =========================================================
 try:
    # =====================================================
    # CONNECT SQL SERVER
    # =====================================================
    print("\nConnecting SQL Server...")
    sql_conn = pyodbc.connect(SQL_CONN_STR)
    print("Connected to SQL Server")
    # =====================================================
    # CONNECT CLICKHOUSE
    # =====================================================
    print("\nConnecting ClickHouse...")
    ch_client = clickhouse_connect.get_client(**CH_CONFIG)
    print("Connected to ClickHouse")
    # =====================================================
    # QUERY
    # =====================================================
    query = f"""
    SELECT *
    FROM dbo.[{TABLE_NAME}]
    WHERE Project_Id = {PROJECT_ID}
    """
    print("\n====================================")
    print("Executing Query")
    print("====================================")
    print(query)
    # =====================================================
    # CHUNK SIZE
    # =====================================================
    chunk_size = 100000
    total_rows = 0
    # =====================================================
    # READ DATA
    # =====================================================
    for chunk in pd.read_sql(
        query,
        sql_conn,
        chunksize=chunk_size
    ):
        try:
            print("\n====================================")
            print(f"Processing Rows : {len(chunk)}")
            print("====================================")
            # =================================================
            # CLEAN DATA
            # =================================================
            chunk = clean_dataframe(chunk)
            # =================================================
            # DEBUG COLUMN TYPES
            # =================================================
            print("\nCOLUMN TYPES")
            for col in chunk.columns:
                try:
                    sample = chunk[col].dropna()
                    if len(sample) > 0:
                        print(
                            col,
                            type(sample.iloc[0]),
                            sample.iloc[0]
                        )
                except:
                    pass
            # =================================================
            # TRUNCATE TABLE
            # =================================================
            if TRUNCATE_BEFORE_LOAD and not table_truncated:
                try:
                    print("\n====================================")
                    print(f"TRUNCATING : {TABLE_NAME}")
                    print("====================================")
                    truncate_query = f"""
                    TRUNCATE TABLE
                    `{CH_CONFIG['database']}`.`{TABLE_NAME}`
                    """
                    print(truncate_query)
                    ch_client.command(
                        truncate_query
                    )
                    print(
                        "TABLE TRUNCATED SUCCESSFULLY"
                    )
                    table_truncated = True
                except Exception as truncate_error:
                    print("\nTRUNCATE FAILED")
                    print(str(truncate_error))
                    raise
            # =================================================
            # INSERT DATA
            # =================================================
            try:
                print("\n====================================")
                print("INSERTING DATA INTO CLICKHOUSE")
                print("====================================")
                ch_client.insert_df(
                    table=f"`{TABLE_NAME}`",
                    df=chunk,
                    database=CH_CONFIG['database']
                )
                total_rows += len(chunk)
                print(
                    f"\nTOTAL INSERTED : "
                    f"{total_rows}"
                )
            except Exception as insert_error:
                print("\nINSERT FAILED")
                print(str(insert_error))
                traceback.print_exc()
                # =============================================
                # SAVE ERROR LOG
                # =============================================
                with open(
                    "clickhouse_insert_error.log",
                    "a",
                    encoding="utf-8"
                ) as log:
                    log.write(
                        "\n\n================================"
                    )
                    log.write(
                        f"\nTIME : {datetime.now()}"
                    )
                    log.write(
                        f"\nTABLE : {TABLE_NAME}"
                    )
                    log.write(
                        f"\nERROR : {str(insert_error)}"
                    )
                    log.write(
                        f"\nTRACEBACK :\n"
                        f"{traceback.format_exc()}"
                    )
                    log.write(
                        "\n================================"
                    )
                continue
        except Exception as chunk_error:
            print("\n====================================")
            print("CHUNK PROCESS FAILED")
            print("====================================")
            print(str(chunk_error))
            traceback.print_exc()
            continue
    print("\n====================================")
    print("ETL COMPLETED SUCCESSFULLY")
    print(f"TOTAL ROWS INSERTED : {total_rows}")
    print("====================================")
 # =========================================================
 # MAIN ERROR
 # =========================================================
 except Exception as main_error:
    print("\n====================================")
    print("MAIN ERROR")
    print("====================================")
    print(str(main_error))
    traceback.print_exc()
    with open(
        "clickhouse_main_error.log",
        "a",
        encoding="utf-8"
    ) as log:
        log.write(
            "\n\n================================"
        )
        log.write(
            f"\nTIME : {datetime.now()}"
        )
        log.write(
            f"\nERROR : {str(main_error)}"
        )
        log.write(
            f"\nTRACEBACK :\n"
            f"{traceback.format_exc()}"
        )
        log.write(
            "\n================================"
        )
 # =========================================================
 # CLOSE CONNECTIONS
 # =========================================================
 finally:
    try:
        sql_conn.close()
        print("\nSQL Server Connection Closed")
    except:
        pass
    try:
        ch_client.close()
        print("ClickHouse Connection Closed")
    except:
        pass
 print("\n====================================")
 print("ETL Finished :", datetime.now())
 print("====================================")
@@ -0,0 +1,231 @@
 ================================
 TIME : 2026-05-19 17:13:53.695971
 TABLE : Sales
 ERROR : Error ('Connection aborted.', TimeoutError('timed out')) executing HTTP request attempt 1 (http://172.188.12.194:8123)
 TRACEBACK :
 Traceback (most recent call last):
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 788, in urlopen
    response = self._make_request(
        conn,
    ...<10 lines>...
        **response_kw,
    )
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 493, in _make_request
    conn.request(
    ~~~~~~~~~~~~^
        method,
        ^^^^^^^
    ...<6 lines>...
        enforce_content_length=enforce_content_length,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connection.py", line 512, in request
    self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk))
    ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\http\client.py", line 1086, in send
    self.sock.sendall(data)
    ~~~~~~~~~~~~~~~~~^^^^^^
 TimeoutError: timed out
 During handling of the above exception, another exception occurred:
 Traceback (most recent call last):
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 546, in _raw_request
    response = self.http.request(method, url, **kwargs)
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\_request_methods.py", line 143, in request
    return self.request_encode_body(
           ~~~~~~~~~~~~~~~~~~~~~~~~^
        method, url, fields=fields, headers=headers, **urlopen_kw
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\_request_methods.py", line 278, in request_encode_body
    return self.urlopen(method, url, **extra_kw)
           ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\poolmanager.py", line 457, in urlopen
    response = conn.urlopen(method, u.request_uri, **kw)
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 842, in urlopen
    retries = retries.increment(
        method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
    )
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\util\retry.py", line 498, in increment
    raise reraise(type(error), error, _stacktrace)
          ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\util\util.py", line 38, in reraise
    raise value.with_traceback(tb)
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 788, in urlopen
    response = self._make_request(
        conn,
    ...<10 lines>...
        **response_kw,
    )
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 493, in _make_request
    conn.request(
    ~~~~~~~~~~~~^
        method,
        ^^^^^^^
    ...<6 lines>...
        enforce_content_length=enforce_content_length,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connection.py", line 512, in request
    self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk))
    ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\http\client.py", line 1086, in send
    self.sock.sendall(data)
    ~~~~~~~~~~~~~~~~~^^^^^^
 urllib3.exceptions.ProtocolError: ('Connection aborted.', TimeoutError('timed out'))
 The above exception was the direct cause of the following exception:
 Traceback (most recent call last):
  File "d:\Python Code\Sales_Import.py", line 494, in <module>
    ch_client.insert_df(
    ~~~~~~~~~~~~~~~~~~~^
        table=f"`{TABLE_NAME}`",
        ^^^^^^^^^^^^^^^^^^^^^^^^
        df=chunk,
        ^^^^^^^^^
        database=CH_CONFIG['database']
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 1013, in insert_df
    return self.insert(table,
           ~~~~~~~~~~~^^^^^^^
                       df,
                       ^^^
    ...<5 lines>...
                       transport_settings=transport_settings,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                       context=context)
                       ^^^^^^^^^^^^^^^^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 978, in insert
    return self.data_insert(context)
           ~~~~~~~~~~~~~~~~^^^^^^^^^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 347, in data_insert
    response = self._raw_request(block_gen, params, headers, error_handler=error_handler, server_wait=False)
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 558, in _raw_request
    raise OperationalError(f'Error {ex} executing HTTP request attempt {attempts}{err_url}') from ex
 clickhouse_connect.driver.exceptions.OperationalError: Error ('Connection aborted.', TimeoutError('timed out')) executing HTTP request attempt 1 (http://172.188.12.194:8123)
 ================================
 ================================
 TIME : 2026-05-19 17:15:26.425862
 TABLE : Sales
 ERROR : Error ('Connection aborted.', TimeoutError('timed out')) executing HTTP request attempt 1 (http://172.188.12.194:8123)
 TRACEBACK :
 Traceback (most recent call last):
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 788, in urlopen
    response = self._make_request(
        conn,
    ...<10 lines>...
        **response_kw,
    )
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 493, in _make_request
    conn.request(
    ~~~~~~~~~~~~^
        method,
        ^^^^^^^
    ...<6 lines>...
        enforce_content_length=enforce_content_length,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connection.py", line 512, in request
    self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk))
    ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\http\client.py", line 1086, in send
    self.sock.sendall(data)
    ~~~~~~~~~~~~~~~~~^^^^^^
 TimeoutError: timed out
 During handling of the above exception, another exception occurred:
 Traceback (most recent call last):
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 546, in _raw_request
    response = self.http.request(method, url, **kwargs)
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\_request_methods.py", line 143, in request
    return self.request_encode_body(
           ~~~~~~~~~~~~~~~~~~~~~~~~^
        method, url, fields=fields, headers=headers, **urlopen_kw
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\_request_methods.py", line 278, in request_encode_body
    return self.urlopen(method, url, **extra_kw)
           ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\poolmanager.py", line 457, in urlopen
    response = conn.urlopen(method, u.request_uri, **kw)
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 842, in urlopen
    retries = retries.increment(
        method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
    )
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\util\retry.py", line 498, in increment
    raise reraise(type(error), error, _stacktrace)
          ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\util\util.py", line 38, in reraise
    raise value.with_traceback(tb)
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 788, in urlopen
    response = self._make_request(
        conn,
    ...<10 lines>...
        **response_kw,
    )
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connectionpool.py", line 493, in _make_request
    conn.request(
    ~~~~~~~~~~~~^
        method,
        ^^^^^^^
    ...<6 lines>...
        enforce_content_length=enforce_content_length,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\urllib3\connection.py", line 512, in request
    self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk))
    ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\http\client.py", line 1086, in send
    self.sock.sendall(data)
    ~~~~~~~~~~~~~~~~~^^^^^^
 urllib3.exceptions.ProtocolError: ('Connection aborted.', TimeoutError('timed out'))
 The above exception was the direct cause of the following exception:
 Traceback (most recent call last):
  File "d:\Python Code\Sales_Import.py", line 494, in <module>
    ch_client.insert_df(
    ~~~~~~~~~~~~~~~~~~~^
        table=f"`{TABLE_NAME}`",
        ^^^^^^^^^^^^^^^^^^^^^^^^
        df=chunk,
        ^^^^^^^^^
        database=CH_CONFIG['database']
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 1013, in insert_df
    return self.insert(table,
           ~~~~~~~~~~~^^^^^^^
                       df,
                       ^^^
    ...<5 lines>...
                       transport_settings=transport_settings,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                       context=context)
                       ^^^^^^^^^^^^^^^^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\client.py", line 978, in insert
    return self.data_insert(context)
           ~~~~~~~~~~~~~~~~^^^^^^^^^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 347, in data_insert
    response = self._raw_request(block_gen, params, headers, error_handler=error_handler, server_wait=False)
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\clickhouse_connect\driver\httpclient.py", line 558, in _raw_request
    raise OperationalError(f'Error {ex} executing HTTP request attempt {attempts}{err_url}') from ex
 clickhouse_connect.driver.exceptions.OperationalError: Error ('Connection aborted.', TimeoutError('timed out')) executing HTTP request attempt 1 (http://172.188.12.194:8123)
 ================================
@@ -89,4 +89,103 @@ Traceback (most recent call last):
                        ^^^^^^^^^^
 NameError: name 'PROJECT_ID' is not defined. Did you mean: 'PROJECTID'?
 ================================
 ================================
 TIME : 2026-05-19 15:19:40.194819
 ERROR : ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: An existing connection was forcibly closed by the remote host.\r\n (10054) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10054)')
 TRACEBACK :
 Traceback (most recent call last):
  File "d:\Python Code\PaidVisibility_Import.py", line 345, in <module>
    for chunk in pd.read_sql(
                 ~~~~~~~~~~~^
        query,
        ^^^^^^
        sql_conn,
        ^^^^^^^^^
        chunksize=chunk_size
        ^^^^^^^^^^^^^^^^^^^^
    ):
    ^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\pandas\io\sql.py", line 2730, in _query_iterator
    data = cursor.fetchmany(chunksize)
 pyodbc.OperationalError: ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: An existing connection was forcibly closed by the remote host.\r\n (10054) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10054)')
 ================================
 ================================
 TIME : 2026-05-19 15:26:07.910371
 ERROR : ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [5].  (5) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (5)')
 TRACEBACK :
 Traceback (most recent call last):
  File "d:\Python Code\OQaD Import.py", line 310, in <module>
    sql_conn = pyodbc.connect(SQL_CONN_STR)
 pyodbc.OperationalError: ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [5].  (5) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (5)')
 ================================
 ================================
 TIME : 2026-05-19 16:01:03.630328
 ERROR : ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.\r\n (10060) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10060)')
 TRACEBACK :
 Traceback (most recent call last):
  File "d:\Python Code\PaidVisibility_Import.py", line 343, in <module>
    for chunk in pd.read_sql(
                 ~~~~~~~~~~~^
        query,
        ^^^^^^
        sql_conn,
        ^^^^^^^^^
        chunksize=chunk_size
        ^^^^^^^^^^^^^^^^^^^^
    ):
    ^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\pandas\io\sql.py", line 2730, in _query_iterator
    data = cursor.fetchmany(chunksize)
 pyodbc.OperationalError: ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.\r\n (10060) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10060)')
 ================================
 ================================
 TIME : 2026-05-19 16:36:09.557213
 ERROR : ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.\r\n (10060) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10060)')
 TRACEBACK :
 Traceback (most recent call last):
  File "d:\Python Code\PaidVisibility_Compliance Import.py", line 371, in <module>
    for chunk in pd.read_sql(
                 ~~~~~~~~~~~^
        query,
        ^^^^^^
        sql_conn,
        ^^^^^^^^^
        chunksize=chunk_size
        ^^^^^^^^^^^^^^^^^^^^
    ):
    ^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\pandas\io\sql.py", line 2730, in _query_iterator
    data = cursor.fetchmany(chunksize)
 pyodbc.OperationalError: ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.\r\n (10060) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10060)')
 ================================
 ================================
 TIME : 2026-05-19 17:16:08.366780
 ERROR : ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: An existing connection was forcibly closed by the remote host.\r\n (10054) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10054)')
 TRACEBACK :
 Traceback (most recent call last):
  File "d:\Python Code\Sales_Import.py", line 411, in <module>
    for chunk in pd.read_sql(
                 ~~~~~~~~~~~^
        query,
        ^^^^^^
        sql_conn,
        ^^^^^^^^^
        chunksize=chunk_size
        ^^^^^^^^^^^^^^^^^^^^
    ):
    ^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\pandas\io\sql.py", line 2730, in _query_iterator
    data = cursor.fetchmany(chunksize)
 pyodbc.OperationalError: ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: An existing connection was forcibly closed by the remote host.\r\n (10054) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10054)')
 ================================
@@ -0,0 +1,85 @@
 ================================
 TIME : 2026-05-20 09:30:27.532149
 ERROR : ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [2].  (2) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (2)')
 TRACEBACK :
 Traceback (most recent call last):
  File "d:\Python Code\PaidVisibility_Compliance Import.py", line 313, in <module>
    sql_conn = connect_sql()
  File "d:\Python Code\PaidVisibility_Compliance Import.py", line 70, in connect_sql
    conn = pyodbc.connect(
        SQL_CONN_STR,
        autocommit=True
    )
 pyodbc.OperationalError: ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [2].  (2) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (2)')
 ================================
 ================================
 TIME : 2026-05-20 10:38:12.600484
 ERROR : ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [64].  (64) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (64)')
 TRACEBACK :
 Traceback (most recent call last):
  File "d:\Python Code\PaidVisibility_Compliance Import.py", line 348, in <module>
    for chunk in pd.read_sql(
                 ~~~~~~~~~~~^
        query,
        ^^^^^^
        sql_conn,
        ^^^^^^^^^
        chunksize=chunk_size
        ^^^^^^^^^^^^^^^^^^^^
    ):
    ^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\pandas\io\sql.py", line 2730, in _query_iterator
    data = cursor.fetchmany(chunksize)
 pyodbc.OperationalError: ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: An existing connection was forcibly closed by the remote host.\r\n (10054) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10054)')
 During handling of the above exception, another exception occurred:
 Traceback (most recent call last):
  File "d:\Python Code\PaidVisibility_Compliance Import.py", line 518, in <module>
    sql_conn = connect_sql()
  File "d:\Python Code\PaidVisibility_Compliance Import.py", line 70, in connect_sql
    conn = pyodbc.connect(
        SQL_CONN_STR,
        autocommit=True
    )
 pyodbc.OperationalError: ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [64].  (64) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (64)')
 ================================
 ================================
 TIME : 2026-05-20 12:28:25.483277
 ERROR : ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [53].  (53) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (53)')
 TRACEBACK :
 Traceback (most recent call last):
  File "d:\Python Code\PaidVisibility_Compliance Import.py", line 348, in <module>
    for chunk in pd.read_sql(
                 ~~~~~~~~~~~^
        query,
        ^^^^^^
        sql_conn,
        ^^^^^^^^^
        chunksize=chunk_size
        ^^^^^^^^^^^^^^^^^^^^
    ):
    ^
  File "C:\Users\dipanshuk\AppData\Local\Python\pythoncore-3.14-64\Lib\site-packages\pandas\io\sql.py", line 2730, in _query_iterator
    data = cursor.fetchmany(chunksize)
 pyodbc.OperationalError: ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]TCP Provider: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.\r\n (10060) (SQLGetData); [08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (10060)')
 During handling of the above exception, another exception occurred:
 Traceback (most recent call last):
  File "d:\Python Code\PaidVisibility_Compliance Import.py", line 518, in <module>
    sql_conn = connect_sql()
  File "d:\Python Code\PaidVisibility_Compliance Import.py", line 70, in connect_sql
    conn = pyodbc.connect(
        SQL_CONN_STR,
        autocommit=True
    )
 pyodbc.OperationalError: ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]Named Pipes Provider: Could not open a connection to SQL Server [53].  (53) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (53)')
 ================================
Author	SHA1	Message	Date
Dipanshu Kumar	579d59e1b0	Journey_Plan data Import	2026-05-20 12:40:47 +05:30
Dipanshu Kumar	3be8cd7259	[Web Logins] Import	2026-05-19 16:58:54 +05:30