final commit

2026-06-23 18:23:58 +05:30
parent e218aafc26
commit 6b2d754981
15 changed files with 2803 additions and 323 deletions
@@ -25,3 +25,7 @@ RUN uv sync --frozen
 COPY . .
 ENV PATH="/opt/airflow/project/.venv/bin:${PATH}"
 # Airflow DAG folder
 ENV AIRFLOW__CORE__DAGS_FOLDER=/opt/airflow/dags
@@ -2,4 +2,3 @@ pipeline:
  error_message: null
  run_date: null
  status: null
  last_successful_run_date: '2026-06-22'
@@ -9,4 +9,4 @@ CH_HOST=172.188.12.194
 CH_PORT=8123
 CH_USER=default
 CH_PASS=dipanshu_k
-CH_DB=kelloggs_1
+CH_DB=kelloggs_z
@@ -0,0 +1,5 @@
 pipeline:
  start_date: '2026-06-01'
  end_date: '2026-06-20'
  flag: N
  Note: ' In flag (yes-:Y and no-:N) '
@@ -0,0 +1,3 @@
 - pipeline_trigeered_on_date: '2026-06-23'
  failed_run_date: none
  attempt: none
@@ -52,6 +52,45 @@ from src.dim import *
 # Helpers
 # ==========================================================
 def get_dates_from_yaml(filename: str):
    with open(filename, "r") as file:
        data = yaml.safe_load(file)
    start_date = date.fromisoformat(
        str(data["pipeline"]["start_date"])
    )
    end_date = date.fromisoformat(
        str(data["pipeline"]["end_date"])
    )
    flag=str(data["pipeline"]["flag"])
    return start_date, end_date , flag
 def write_table_to_yaml(
    data: dict,
    run_date: date,
    filename: str | None = None
 ):
    """Write table data to a YAML file."""
    if filename is None:
        filename = f"elt_pipeline_{run_date}.yml"
    with open(filename, "w") as file:
        yaml.dump(
            data,
            file,
            default_flow_style=False,
            sort_keys=False
        )
    print(f"Table written to {filename}")
 def table_exists(
    client,
    table_name: str,
@@ -68,7 +107,7 @@ def table_exists(
 # Main
 # ==========================================================
-def main():
+def elt(run_date : date):
    log.info("=" * 80)
    log.info("Hello from data-move Python data pipeline!")
@@ -77,13 +116,7 @@ def main():
    # Run Date
    # ------------------------------------------------------
-    if len(sys.argv) > 1:
+
        run_date = datetime.strptime(
            sys.argv[1],
            "%Y-%m-%d",
        ).date()
    else:
        run_date = date.today() - timedelta(days=1)
    log.info(
        "Pipeline Run Date: %s",
@@ -126,7 +159,7 @@ def main():
    # ------------------------------------------------------
    with open(
-        "t.yml",
+        "y.yml",
        "r",
    ) as file:
@@ -211,8 +244,6 @@ def main():
                        table_name,
                    )
                elif   operation =="ONLY_INSERT" :
                    continue
                else:
                    delete_existing_data(
@@ -253,7 +284,7 @@ def main():
    log.info("=" * 80)
-if __name__ == "__main__":
+def main() :
    config_file = Path("Pipeline_config.yml")
@@ -274,34 +305,62 @@ if __name__ == "__main__":
    p_start_date, p_end_date , flag= get_dates_from_yaml("elt_pipeline_custom_dates.yml")
    if flag =="Y" :
         start_date=p_start_date
         end_date=p_end_date
    elif len(sys.argv) > 1:
                        start_date = datetime.strptime(
                            sys.argv[1],
                            "%Y-%m-%d",
                        ).date()
                        end_date=start_date + timedelta(days=1)
    else:
            start_date = date.today() - timedelta(days=1)
            end_date=start_date
    log.info(
                "Pipeline Start Date: %s",
                start_date,
            )
    failed_dates=[]
    successful_dates=[]
    filename_successful = "successful_Pipeline_dates_config.yml"
    filename_failed = "failed_Pipeline_dates_config.yml"
    while start_date <=end_date:
            run_date = start_date
            for attempt in range(3):
                    try:
-                main()
+                        elt(run_date)
-                with open("Pipeline_config.yml", "r") as f:
+                        successful_dates.append({
-                    config = yaml.safe_load(f)
+                                            'pipeline_trigeered_on_date': str(date.today()),
-
+                                            'last_successful_run_date': run_date,
-                config["pipeline"]["last_successful_run_date"] = str(date.today())
+                                        })
                with open("Pipeline_config.yml", "w") as f:
                    yaml.safe_dump(config, f, sort_keys=False)
                        log.info(
                            f"Pipeline completed successfully. "
-                    f"last_successful_run_date={date.today()}"
+                            f"pipeline_trigeered_on_date={date.today()}"
                            f"last_successful_run_date={run_date}"
                        )
                        break
                    except Exception as e:
                with open("Pipeline_config.yml", "r") as f:
                    config = yaml.safe_load(f)
                config["pipeline"]["run_date"] = str(date.today())
-                with open("Pipeline_config.yml", "w") as f:
+                        failed_dates.append({
-                    yaml.safe_dump(config, f, sort_keys=False)
+                                            'pipeline_trigeered_on_date': str(date.today()),
                                            'failed_run_date': run_date,
                                            "attempt" : attempt
                                        })
                        if attempt == 2:
                            raise
@@ -311,3 +370,31 @@ if __name__ == "__main__":
                        )
                        sleep(5)
            start_date=start_date + timedelta(days=1)
    with open(filename_successful, "w") as f:
                yaml.dump(
                    successful_dates,
                    f,
                    default_flow_style=False,
                    sort_keys=False,
                )
    if len(failed_dates) == 0 : 
                                  failed_dates.append({
                                            'pipeline_trigeered_on_date': str(date.today()),
                                            'failed_run_date': "none",
                                            "attempt" : "none"
                                        })
    with open(filename_failed, "w") as f:
                            yaml.dump(failed_dates,
                                       f, default_flow_style=False,
                                         sort_keys=False)    
 if __name__ == "__main__":
    main()
@@ -14,9 +14,6 @@ from db_con.connection import (
 )
 def fetch_mapping_storevisibility(
    sql_engine: Engine,
    table_name: str,
@@ -25,86 +22,117 @@ def fetch_mapping_storevisibility(
     run_date: date
 ) -> pl.DataFrame:
    run_date = run_date + timedelta(days=1)
    client= get_clickhouse_client()          
    def table_exists(
                client,
                table_name: str,
            ) -> bool:
                return bool(
                    client.command(
                        f"EXISTS TABLE {table_name}"
                    )
                )
    def get_reason_ids_mapping_storevisibility(
                client,
                run_date: date,
                table_name: str = "mapping_storevisibility",
            ) -> list[int] :
                if not table_exists(client, table_name):
                    log.warning(f"Table '{table_name}' does not exist. During collecting store_ids")
                    return [0]
-                query = f"""
+    log.info(f"Fetching data from sql server for {table_type} table......")
                SELECT DISTINCT StoreId
                FROM mapping_storevisibility 
                WHERE toDate(Fromdate) <= toDate('{run_date + timedelta(days= 1)}')
                AND toDate(Todate) >= toDate('{run_date + timedelta(days= 1)}')
                AND project_Id = '40148'
                """
                # ClickHouse -> PyArrow -> Polars
                arrow_table = client.query_arrow(query)
                df= pl.from_arrow(arrow_table)
                list=df["reason_id"].to_list()
                return list
    def fetch_data(    
        engine: Engine,
        table_name: str,
        table_type: str,
        run_date: date,
        store_id: list[int]
 ) -> pl.DataFrame:                           
                    log.info(f"Fetching data from sql server for Master table......")
                    store_id_list = ",".join(str(sid) for sid in store_id)
    sql_file = Path("src")  / "sql" / f"bridge" / f"{table_name}.sql"
    with open(sql_file, "r", encoding="utf-8") as f:
        sql_template = f.read()
-                    sql = sql_template.format( 
+    sql = sql_template.format(  )
                            store_id_list=store_id_list,
                            run_date=run_date.strftime("%Y-%m-%d")
                    )
    log.info(f"Fetching in progress .... ")
    df = pl.read_database(
        query=sql,
-                        connection=engine
+        connection=sql_engine
    )
    log.info(f"Fetched {len(df):,} rows from SQL Server")
    return df
    store_id=get_reason_ids_mapping_storevisibility(client, run_date, "mapping_storevisibility")
    df=fetch_data(engine=sql_engine,
                                table_name=table_name,
                                table_type=table_type,
                                run_date=run_date,
                                store_id=store_id,
                                )
    log.info(f"Fetched {len(df):,} rows from SQL Server")
-    return df
+# def fetch_mapping_storevisibility(
 #     sql_engine: Engine,
 #     table_name: str,
 #     table_type: str,
 #     mids: list[int],
 #     run_date: date
 # ) -> pl.DataFrame:
 #     run_date = run_date + timedelta(days=1)
 #     client= get_clickhouse_client()          
 #     def table_exists(
 #                 client,
 #                 table_name: str,
 #             ) -> bool:
 #                 return bool(
 #                     client.command(
 #                         f"EXISTS TABLE {table_name}"
 #                     )
 #                 )
 #     def get_reason_ids_mapping_storevisibility(
 #                 client,
 #                 run_date: date,
 #                 table_name: str = "mapping_storevisibility",
 #             ) -> list[int] :
 #                 if not table_exists(client, table_name):
 #                     log.warning(f"Table '{table_name}' does not exist. During collecting store_ids")
 #                     return [0]
 #                 query = f"""
 #                 SELECT DISTINCT StoreId
 #                 FROM mapping_storevisibility 
 #                 WHERE toDate(Fromdate) <= toDate('{run_date + timedelta(days= 1)}')
 #                 AND toDate(Todate) >= toDate('{run_date + timedelta(days= 1)}')
 #                 AND project_Id = '40148'
 #                 """
 #                 # ClickHouse -> PyArrow -> Polars
 #                 arrow_table = client.query_arrow(query)
 #                 df= pl.from_arrow(arrow_table)
 #                 list=df["reason_id"].to_list()
 #                 return list
 #     def fetch_data(    
 #         engine: Engine,
 #         table_name: str,
 #         table_type: str,
 #         run_date: date,
 #         store_id: list[int]
 # ) -> pl.DataFrame:                           
 #                     log.info(f"Fetching data from sql server for Master table......")
 #                     store_id_list = ",".join(str(sid) for sid in store_id)
 #                     sql_file = Path("src")  / "sql" / f"bridge" / f"{table_name}.sql"
 #                     with open(sql_file, "r", encoding="utf-8") as f:
 #                         sql_template = f.read()
 #                     sql = sql_template.format( 
 #                             store_id_list=store_id_list,
 #                             run_date=run_date.strftime("%Y-%m-%d")
 #                     )
 #                     log.info(f"Fetching in progress .... ")
 #                     df = pl.read_database(
 #                         query=sql,
 #                         connection=engine
 #                     )
 #                     log.info(f"Fetched {len(df):,} rows from SQL Server")
 #                     return df
 #     store_id=get_reason_ids_mapping_storevisibility(client, run_date, "mapping_storevisibility")
 #     df=fetch_data(engine=sql_engine,
 #                                 table_name=table_name,
 #                                 table_type=table_type,
 #                                 run_date=run_date,
 #                                 store_id=store_id,
 #                                 )
 #     log.info(f"Fetched {len(df):,} rows from SQL Server")
 #     return df
@@ -134,6 +134,7 @@ def fetch_additional_visibility( engine: Engine,
    return df
 def fetch_OQaD(
    sql_engine: Engine,
    table_name: str,
@@ -142,83 +143,85 @@ def fetch_OQaD(
    run_date: date
 ) -> pl.DataFrame:
    # ─────────────────────────────────────────────
    # INNER HELPERS  (defined once, used below)
    # ─────────────────────────────────────────────
-            client= get_clickhouse_client()          
+    client = get_clickhouse_client()
            def table_exists(
                client,
                table_name: str,
            ) -> bool:
-                return bool(
+    # ── Does a ClickHouse table exist? ────────────
-                    client.command(
+    def table_exists(client, table_name: str) -> bool:
-                        f"EXISTS TABLE {table_name}"
+
-                    )
+        return bool(client.command(f"EXISTS TABLE {table_name}"))
-                )
+
    # ── STEP 1: Who submitted yesterday in SQL Server? ───
    def fetch_quiz_empids(engine: Engine, run_date: date) -> pl.DataFrame:
        # Format date ONCE safely — avoids f-string injection bugs
        run_date_str  = run_date.strftime("%Y-%m-%d")
        next_date_str = (run_date + timedelta(days=1)).strftime("%Y-%m-%d")
-            def fetch_quiz_empids(engine: Engine, run_date : date) -> pl.DataFrame:
+        sql = f"""
                sql_template = f"""
            WITH MID_TABLE_COV1 AS
            (
-                SELECT EmpId, VisitDate
+                -- Records CREATED yesterday
                SELECT EmpId, CAST(VisitDate AS DATE) AS VisitDate
                FROM   OneApp_KelloggsMT.dbo.T_OQAD
-                WHERE CreateDate >= {run_date}
+                WHERE  CreateDate >= '{run_date_str}'
-                AND CreateDate < DATEADD(DAY,1,'{run_date}')
+                  AND  CreateDate <  '{next_date_str}'
                UNION ALL
-                SELECT EmpId, VisitDate
+                -- Records UPDATED yesterday (different rows, safe to UNION ALL)
                SELECT EmpId, CAST(VisitDate AS DATE) AS VisitDate
                FROM   OneApp_KelloggsMT.dbo.T_OQAD
-                WHERE UpdateDate >= {run_date}
+                WHERE  UpdateDate >= '{run_date_str}'
-                AND UpdateDate < DATEADD(DAY,1, '{run_date}')
+                  AND  UpdateDate <  '{next_date_str}'
            ),
            QUIZ AS
            (
-            SELECT Distinct E.EmpId as empid                          
+                SELECT DISTINCT
-            , CONVERT(date,DQ.VisitDate) AS visitdate                                                                                             
+                       E.EmpId   AS empid,
-            FROM OneApp_KelloggsMT.dbo.T_OQAD DQ INNER JOIN                                                                   
+                       CAST(DQ.VisitDate AS DATE) AS visitdate
-            OneApp_KelloggsMT.dbo.vw_Employee_Detail E ON DQ.EmpId = E.EmpId inner join                                        
+                FROM       OneApp_KelloggsMT.dbo.T_OQAD              DQ
-            OneApp_KelloggsMT.dbo.Master_OQAD_Question QU on DQ.QuestionId= qu.QuestionId inner join                            
+                INNER JOIN OneApp_KelloggsMT.dbo.vw_Employee_Detail   E
-            OneApp_KelloggsMT.dbo.Master_OQAD_Category qc on qu.QuestionCategoryId= qc.QuestionCategoryId                                       
+                        ON DQ.EmpId = E.EmpId
-            where e.EmpName not like 'test%' and e.RightId  in (6)   
+                INNER JOIN OneApp_KelloggsMT.dbo.Master_OQAD_Question QU
-            and (E.ResignDate is null or E.ResignDate>=''+CONVERT(VARCHAR,'{run_date}')+'')     AND E.EmpName NOT LIKE '%TEST%'
+                        ON DQ.QuestionId = QU.QuestionId
-            AND DQ.EmpId IN (SELECT EmpId FROM MID_TABLE_COV1 A  WHERE       
+                INNER JOIN OneApp_KelloggsMT.dbo.Master_OQAD_Category QC
-            DQ.EmpId=A.EmpId AND CONVERT(date,VisitDate)=CONVERT(date,A.VisitDate) )                          
+                        ON QU.QuestionCategoryId = QC.QuestionCategoryId
-            ) select * from quiz 
+                WHERE  E.EmpName NOT LIKE '%TEST%'        -- exclude test employees
                  AND  E.RightId = 6                      -- only field reps
                  AND  (
                           E.ResignDate IS NULL
                           OR CAST(E.ResignDate AS DATE) >= '{run_date_str}'
                       )
                  AND EXISTS (                            -- ✅ EXISTS beats IN for large sets
                          SELECT 1
                          FROM   MID_TABLE_COV1 A
                          WHERE  A.EmpId = DQ.EmpId
                            AND  A.VisitDate = CAST(DQ.VisitDate AS DATE)
                      )
            )
            SELECT * FROM QUIZ
        """
                sql = sql_template.format(
                    run_date=run_date.strftime("%Y-%m-%d")
                )  
                log.info(f"Fetching  quiz_empids data for  EMPID and Visitid")
                df = pl.read_database(
                    query=sql,
                    connection=engine
                )
                log.info(f"Fetched {len(df):,} total empid and visitdate fetched for OQAD from SQL Server")
        log.info("Fetching quiz empids for run_date=%s", run_date_str)
        df = pl.read_database(query=sql, connection=engine)
        log.info("Fetched %s (EmpId, VisitDate) pairs from SQL Server", len(df))
        return df
-
+    # ── STEP 2: Who do we ALREADY have in ClickHouse? ───
    def get_empids_clickhouse_OQAD(
        client,
        table_name: str = "OQaD",
    ) -> pl.DataFrame:
        if not table_exists(client, table_name):
-                    log.warning(f"Table '{table_name}' does not exist.")
+            log.warning("Table '%s' does not exist in ClickHouse.", table_name)
-                    return pl.DataFrame(
+            return pl.DataFrame(schema={"empid": pl.Int64, "visitdate": pl.Date})
                        schema={
                            "empid": pl.Int64,
                            "visitdate": pl.Date,
                        }
                    )
        query = f"""
            SELECT DISTINCT
@@ -227,82 +230,249 @@ def fetch_OQaD(
            FROM   {table_name}
        """
                # ClickHouse -> PyArrow -> Polars
        arrow_table = client.query_arrow(query)
        df = pl.from_arrow(arrow_table)
        log.info("Fetched %s existing (EmpId, VisitDate) pairs from ClickHouse", len(df))
        return df
-                return pl.from_arrow(arrow_table)
+    # ── STEP 3: Who is NEW? (in SQL Server but NOT yet in ClickHouse) ───
    def find_new_empids(
        sql_df: pl.DataFrame,
        ch_df: pl.DataFrame,
    ) -> list[int]:
-
+        new_df = sql_df.join(
-            qf=fetch_quiz_empids(sql_engine,run_date)
+            ch_df,
            db_df = get_empids_clickhouse_OQAD(client)
            matched = qf.join(
            db_df,
            on=["empid", "visitdate"],
-            how="inner",
+            how="anti",          # ✅ anti = keep rows NOT found in ch_df
        )
-            if matched.is_empty():
+        if new_df.is_empty():
            log.warning("No new EmpIds found for table=%s — nothing to fetch.", table_name)
            return [0]  # sentinel value — the .sql WHERE will return 0 rows safely
-                empids=[0]
+        empids = new_df["empid"].unique().to_list()
-                log.warning(
+        log.info("Found %s NEW empids to fetch for %s", len(empids), table_name)
-                    "%s  Matched df  in OQaD returned no rows",
+        return empids
                    table_name,
                )
            else:
                empids=matched["empid"].to_list()
            log.info(f"Fetched {len(empids):,} matched empids fetched for OQAD ")
    # ── STEP 4: Fetch full quiz data for new empids ───
    def fetch_data(
        engine: Engine,
        table_name: str,
        table_type: str,
        empids: list[int],
-                run_date: date
+        run_date: date,
    ) -> pl.DataFrame:
                empid_list = ",".join(str(empid) for empid in empids)
        run_date_str = run_date.strftime("%Y-%m-%d")
        empid_list   = ", ".join(str(e) for e in empids)  # "101, 102, 103"
        sql_file = Path("src") / "sql" / "fact" / f"{table_name}.sql"
-
+        log.info("Loading SQL from: %s (exists=%s)", sql_file.resolve(), sql_file.exists())
                log.info(f"Exists: {sql_file.exists()}")
                log.info(f"Path: {sql_file.resolve()}")
        with open(sql_file, "r", encoding="utf-8") as f:
            sql_template = f.read()
        sql = sql_template.format(
            empid_list=empid_list,
-                    run_date=run_date.strftime("%Y-%m-%d")
+            run_date=run_date_str,
        )
-                log.info(f"Fetching data for {len(empids):,} EMPIDs")
+        log.info("Fetching full OQaD data for %s empids, run_date=%s", len(empids), run_date_str)
-
+        df = pl.read_database(query=sql, connection=engine)
-                log.info("Fetching OQaD data for run_date=%s", run_date)
+        log.info("Fetched %s rows from SQL Server for table=%s", len(df), table_name)
                df = pl.read_database(
                    query=sql,
                    connection=engine,
                )
                log.info("fn name is fetch_OQad ------Fetched %s rows", len(df))
        return df
-            df=fetch_data(  engine=sql_engine,
+
    # ─────────────────────────────────────────────
    # MAIN FLOW  (the 4 steps, clearly sequenced)
    # ─────────────────────────────────────────────
    qf     = fetch_quiz_empids(sql_engine, run_date)       # Step 1
    db_df  = get_empids_clickhouse_OQAD(client, table_name) # Step 2
    empids = find_new_empids(qf, db_df)                     # Step 3
    df = fetch_data(                                         # Step 4
        engine=sql_engine,
        table_name=table_name,
        table_type=table_type,
        empids=empids,
-                                run_date=run_date
+        run_date=run_date,
    )
            log.info(f"Fetched {len(df):,} rows from SQL Server")
    log.info("fetch_OQaD complete — returning %s rows", len(df))
    return df
 # def fetch_OQaD(
 #     sql_engine: Engine,
 #     table_name: str,
 #     table_type: str,
 #     mids: list[int],
 #     run_date: date
 # ) -> pl.DataFrame:
 #             client= get_clickhouse_client()          
 #             def table_exists(
 #                 client,
 #                 table_name: str,
 #             ) -> bool:
 #                 return bool(
 #                     client.command(
 #                         f"EXISTS TABLE {table_name}"
 #                     )
 #                 )
 #             def fetch_quiz_empids(engine: Engine, run_date : date) -> pl.DataFrame:
 #                 sql_template = f"""
 #                         WITH MID_TABLE_COV1 AS
 #             (
 #                 SELECT EmpId, VisitDate
 #                 FROM OneApp_KelloggsMT.dbo.T_OQAD
 #                 WHERE CreateDate >= {run_date}
 #                 AND CreateDate < DATEADD(DAY,1,'{run_date}')
 #                 UNION
 #                 SELECT EmpId, VisitDate
 #                 FROM OneApp_KelloggsMT.dbo.T_OQAD
 #                 WHERE UpdateDate >= {run_date}
 #                 AND UpdateDate < DATEADD(DAY,1, '{run_date}')
 #             ),
 #             QUIZ AS
 #             (                                                                  
 #             SELECT Distinct E.EmpId as empid                          
 #             , CONVERT(date,DQ.VisitDate) AS visitdate                                                                                             
 #             FROM OneApp_KelloggsMT.dbo.T_OQAD DQ INNER JOIN                                                                   
 #             OneApp_KelloggsMT.dbo.vw_Employee_Detail E ON DQ.EmpId = E.EmpId inner join                                        
 #             OneApp_KelloggsMT.dbo.Master_OQAD_Question QU on DQ.QuestionId= qu.QuestionId inner join                            
 #             OneApp_KelloggsMT.dbo.Master_OQAD_Category qc on qu.QuestionCategoryId= qc.QuestionCategoryId                                       
 #             where e.EmpName not like 'test%' and e.RightId  in (6)   
 #             and (E.ResignDate is null or E.ResignDate>=''+CONVERT(VARCHAR,'{run_date}')+'')     AND E.EmpName NOT LIKE '%TEST%'
 #             AND DQ.EmpId IN (SELECT EmpId FROM MID_TABLE_COV1 A  WHERE       
 #             DQ.EmpId=A.EmpId AND CONVERT(date,VisitDate)=CONVERT(date,A.VisitDate) )                          
 #             ) select * from quiz 
 #             """
 #                 sql = sql_template.format(
 #                     run_date=run_date.strftime("%Y-%m-%d")
 #                 )  
 #                 log.info(f"Fetching  quiz_empids data for  EMPID and Visitid")
 #                 df = pl.read_database(
 #                     query=sql,
 #                     connection=engine
 #                 )
 #                 log.info(f"Fetched {len(df):,} total empid and visitdate fetched for OQAD from SQL Server")
 #                 return df
 #             def get_empids_clickhouse_OQAD(
 #                 client,
 #                 table_name: str = "OQaD",
 #             ) -> pl.DataFrame:
 #                 if not table_exists(client, table_name):
 #                     log.warning(f"Table '{table_name}' does not exist.")
 #                     return pl.DataFrame(
 #                         schema={
 #                             "empid": pl.Int64,
 #                             "visitdate": pl.Date,
 #                         }
 #                     )
 #                 query = f"""
 #                 SELECT DISTINCT
 #                     employee_id AS empid,
 #                     visit_date AS visitdate
 #                 FROM {table_name}
 #                 """
 #                 # ClickHouse -> PyArrow -> Polars
 #                 arrow_table = client.query_arrow(query)
 #                 return pl.from_arrow(arrow_table)
 #             qf=fetch_quiz_empids(sql_engine,run_date)
 #             db_df = get_empids_clickhouse_OQAD(client)
 #             matched = qf.join(
 #             db_df,
 #             on=["empid", "visitdate"],
 #             how="inner",
 #         )
 #             if matched.is_empty():
 #                 empids=[0]
 #                 log.warning(
 #                     "%s  Matched df  in OQaD returned no rows",
 #                     table_name,
 #                 )
 #             else:
 #                 empids=matched["empid"].to_list()
 #             log.info(f"Fetched {len(empids):,} matched empids fetched for OQAD ")
 #             def fetch_data(
 #                 engine: Engine,
 #                 table_name: str,
 #                 table_type: str,
 #                 empids: list[int],
 #                 run_date: date
 #             ) -> pl.DataFrame:
 #                 empid_list = ",".join(str(empid) for empid in empids)
 #                 sql_file = Path("src") / "sql" / "fact"  / f"{table_name}.sql"
 #                 log.info(f"Exists: {sql_file.exists()}")
 #                 log.info(f"Path: {sql_file.resolve()}")
 #                 with open(sql_file, "r", encoding="utf-8") as f:
 #                     sql_template = f.read()
 #                 sql = sql_template.format(
 #                     empid_list=empid_list,
 #                     run_date=run_date.strftime("%Y-%m-%d")
 #                 )
 #                 log.info(f"Fetching data for {len(empids):,} EMPIDs")
 #                 log.info("Fetching OQaD data for run_date=%s", run_date)
 #                 df = pl.read_database(
 #                     query=sql,
 #                     connection=engine,
 #                 )
 #                 log.info("fn name is fetch_OQad ------Fetched %s rows", len(df))
 #                 return df
 #             df=fetch_data(  engine=sql_engine,
 #                                 table_name=table_name,
 #                                 table_type=table_type,
 #                                 empids=empids,
 #                                 run_date=run_date
 #                                 )
 #             log.info(f"Fetched {len(df):,} rows from SQL Server")
 #             return df
 # def fetch_OQaD(
 #     engine: Engine,
@@ -1,11 +1,10 @@
 with  mapping_storevisibility
 (Project_Id,StoreId,VisibilityDefinitionid,Fromdate,Todate,CreateDate,CreateBy)
 AS (
 select DISTINCT '40148' as Project_Id,StoreId,VisibilityDefinitionid,Fromdate,Todate,getdate(),'SP-Pius'
-FROM OneApp_KelloggsMT.dbo.mapping_storevisibility z WHERE 
+FROM OneApp_KelloggsMT.dbo.mapping_storevisibility 
 convert(date,FROMDATE,101)<=convert(Date,getdate(),101)  AND CONVERT(DATE,ToDate,101)>=convert(Date,getdate(),101)
 AND z.VisibilityDefinitionid IN 
 (SELECT DISTINCT VisibilityDefinitionid FROM OneApp_KelloggsMT.dbo.MASTER_VISIBILITYDEFINITION WHERE MENUID=22 )
 AND  z.StoreId NOT IN ({store_id_list})
 )
 select * from mapping_storevisibility
@@ -1,19 +1,19 @@
 WITH MID_TABLE_COV1 AS
 (
-    SELECT EmpId, VisitDate
+    
    SELECT EmpId, CAST(VisitDate AS DATE) AS VisitDate
    FROM   OneApp_KelloggsMT.dbo.T_OQAD
-    WHERE CreateDate >= {run_date}
+    WHERE  CreateDate >= '{run_date}'
-      AND CreateDate < DATEADD(DAY,1,'{run_date}')
+      AND  CreateDate <  DATEADD(DAY, 1, '{run_date}')
    UNION ALL
-    SELECT EmpId, VisitDate
+    SELECT EmpId, CAST(VisitDate AS DATE) AS VisitDate
    FROM   OneApp_KelloggsMT.dbo.T_OQAD
-    WHERE UpdateDate >= {run_date}
+    WHERE  UpdateDate >= '{run_date}'
-      AND UpdateDate < DATEADD(DAY,1, '{run_date}')
+      AND  UpdateDate <  DATEADD(DAY, 1, '{run_date}')
 ),
 QUIZ AS
 (
    SELECT DISTINCT
@@ -39,17 +39,15 @@ QUIZ AS
            ON QU.QuestionCategoryId = QC.QuestionCategoryId
    WHERE  E.EmpName NOT LIKE '%TEST%'
      AND  E.RightId = 6
-      AND (
+      AND  (E.ResignDate IS NULL OR CAST(E.ResignDate AS DATE) >= '{run_date}')
-              E.ResignDate IS NULL
+      AND  EXISTS (
              OR CAST(E.ResignDate AS DATE) >= '{run_date}'
          )
      AND EXISTS
      (
               SELECT 1
               FROM   MID_TABLE_COV1 A
               WHERE  A.EmpId    = DQ.EmpId
-            AND CAST(A.VisitDate AS DATE) = CAST(DQ.VisitDate AS DATE)
+                 AND  A.VisitDate = CAST(DQ.VisitDate AS DATE)
           )
      -- ✅ Exclude EmpIds already loaded into ClickHouse
      AND  E.EmpId NOT IN ({empid_list})
 )
 SELECT
@@ -61,8 +59,8 @@ SELECT
    Q.QuestionCategory              AS question_category,
    QM.QuestionId                   AS question_id,
    QM.Question                     AS question,
-       ISNULL(QA.AnswerId,0) AS answer_id,
+    ISNULL(QA.AnswerId, 0)          AS answer_id,
-       ISNULL(QA.Answer,'') AS answer,
+    ISNULL(QA.Answer,  '')          AS answer,
    CASE
        WHEN QA.AnswerId    IS NULL THEN 'Not Answer'
        WHEN QA.RightAnswer = 1     THEN 'Y'
@@ -76,6 +74,3 @@ INNER JOIN OneApp_KelloggsMT.dbo.Master_OQAD_Question QM
        ON Q.QuestionId = QM.QuestionId
 LEFT  JOIN OneApp_KelloggsMT.dbo.Master_OQAD_Answer   QA
        ON Q.AnswerId   = QA.AnswerId
 where Q.EmpId not in ({empid_list})
@@ -0,0 +1,2 @@
 - pipeline_trigeered_on_date: '2026-06-23'
  last_successful_run_date: 2026-06-22
@@ -4,11 +4,6 @@ tables:
    operation: INSERT
    fetch_by: mids
  - name: OQaD
    type: FACT
    operation: INSERT
    fetch_by: run_date
  - name: Survey
    type: FACT
    operation: INSERT
@@ -87,7 +82,7 @@ tables:
  - name: mapping_storevisibility
    type: BRIDGE
-    operation: ONLY_INSERT
+    operation: DELETE+INSERT
    fetch_by: run_date
  - name: Master_VisibilityReason
@@ -105,3 +100,9 @@ tables:
    type: DIMENSION
    operation: DELETE+INSERT
    fetch_by: none
  - name: OQaD
    type: FACT
    operation: INSERT
    fetch_by: run_date
@@ -4,10 +4,10 @@ tables:
  #   operation: INSERT
  #   fetch_by: mids
-  # # - name: OQaD
+  - name: OQaD
-  # #   type: FACT
+    type: FACT
-  # #   operation: INSERT
+    operation: INSERT
-  # #   fetch_by: run_date
+    fetch_by: run_date
  # - name: additional_visibility
  #   type: FACT
@@ -102,8 +102,8 @@ tables:
  #   operation: DELETE+INSERT
  #   fetch_by: none
-  - name: Promotion
+  # - name: Promotion
-    type: FACT
+  #   type: FACT
-    operation: INSERT
+  #   operation: INSERT
-    fetch_by: mids
+  #   fetch_by: mids
		`@@ -0,0 +1,2 @@`
							`- pipeline_trigeered_on_date: '2026-06-23'`
							`last_successful_run_date: 2026-06-22`