final commit

This commit is contained in:
Ankit Malik
2026-06-23 18:23:58 +05:30
parent e218aafc26
commit 6b2d754981
15 changed files with 2803 additions and 323 deletions
+4
View File
@@ -25,3 +25,7 @@ RUN uv sync --frozen
COPY . .
ENV PATH="/opt/airflow/project/.venv/bin:${PATH}"
# Airflow DAG folder
ENV AIRFLOW__CORE__DAGS_FOLDER=/opt/airflow/dags
-1
View File
@@ -2,4 +2,3 @@ pipeline:
error_message: null
run_date: null
status: null
last_successful_run_date: '2026-06-22'
+1 -1
View File
@@ -9,4 +9,4 @@ CH_HOST=172.188.12.194
CH_PORT=8123
CH_USER=default
CH_PASS=dipanshu_k
CH_DB=kelloggs_1
CH_DB=kelloggs_z
+5
View File
@@ -0,0 +1,5 @@
pipeline:
start_date: '2026-06-01'
end_date: '2026-06-20'
flag: N
Note: ' In flag (yes-:Y and no-:N) '
+3
View File
@@ -0,0 +1,3 @@
- pipeline_trigeered_on_date: '2026-06-23'
failed_run_date: none
attempt: none
File diff suppressed because it is too large Load Diff
+113 -26
View File
@@ -52,6 +52,45 @@ from src.dim import *
# Helpers
# ==========================================================
def get_dates_from_yaml(filename: str):
with open(filename, "r") as file:
data = yaml.safe_load(file)
start_date = date.fromisoformat(
str(data["pipeline"]["start_date"])
)
end_date = date.fromisoformat(
str(data["pipeline"]["end_date"])
)
flag=str(data["pipeline"]["flag"])
return start_date, end_date , flag
def write_table_to_yaml(
data: dict,
run_date: date,
filename: str | None = None
):
"""Write table data to a YAML file."""
if filename is None:
filename = f"elt_pipeline_{run_date}.yml"
with open(filename, "w") as file:
yaml.dump(
data,
file,
default_flow_style=False,
sort_keys=False
)
print(f"Table written to {filename}")
def table_exists(
client,
table_name: str,
@@ -68,7 +107,7 @@ def table_exists(
# Main
# ==========================================================
def main():
def elt(run_date : date):
log.info("=" * 80)
log.info("Hello from data-move Python data pipeline!")
@@ -77,13 +116,7 @@ def main():
# Run Date
# ------------------------------------------------------
if len(sys.argv) > 1:
run_date = datetime.strptime(
sys.argv[1],
"%Y-%m-%d",
).date()
else:
run_date = date.today() - timedelta(days=1)
log.info(
"Pipeline Run Date: %s",
@@ -126,7 +159,7 @@ def main():
# ------------------------------------------------------
with open(
"t.yml",
"y.yml",
"r",
) as file:
@@ -211,8 +244,6 @@ def main():
table_name,
)
elif operation =="ONLY_INSERT" :
continue
else:
delete_existing_data(
@@ -253,7 +284,7 @@ def main():
log.info("=" * 80)
if __name__ == "__main__":
def main() :
config_file = Path("Pipeline_config.yml")
@@ -274,34 +305,62 @@ if __name__ == "__main__":
p_start_date, p_end_date , flag= get_dates_from_yaml("elt_pipeline_custom_dates.yml")
if flag =="Y" :
start_date=p_start_date
end_date=p_end_date
elif len(sys.argv) > 1:
start_date = datetime.strptime(
sys.argv[1],
"%Y-%m-%d",
).date()
end_date=start_date + timedelta(days=1)
else:
start_date = date.today() - timedelta(days=1)
end_date=start_date
log.info(
"Pipeline Start Date: %s",
start_date,
)
failed_dates=[]
successful_dates=[]
filename_successful = "successful_Pipeline_dates_config.yml"
filename_failed = "failed_Pipeline_dates_config.yml"
while start_date <=end_date:
run_date = start_date
for attempt in range(3):
try:
main()
elt(run_date)
with open("Pipeline_config.yml", "r") as f:
config = yaml.safe_load(f)
config["pipeline"]["last_successful_run_date"] = str(date.today())
with open("Pipeline_config.yml", "w") as f:
yaml.safe_dump(config, f, sort_keys=False)
successful_dates.append({
'pipeline_trigeered_on_date': str(date.today()),
'last_successful_run_date': run_date,
})
log.info(
f"Pipeline completed successfully. "
f"last_successful_run_date={date.today()}"
f"pipeline_trigeered_on_date={date.today()}"
f"last_successful_run_date={run_date}"
)
break
except Exception as e:
with open("Pipeline_config.yml", "r") as f:
config = yaml.safe_load(f)
config["pipeline"]["run_date"] = str(date.today())
with open("Pipeline_config.yml", "w") as f:
yaml.safe_dump(config, f, sort_keys=False)
failed_dates.append({
'pipeline_trigeered_on_date': str(date.today()),
'failed_run_date': run_date,
"attempt" : attempt
})
if attempt == 2:
raise
@@ -311,3 +370,31 @@ if __name__ == "__main__":
)
sleep(5)
start_date=start_date + timedelta(days=1)
with open(filename_successful, "w") as f:
yaml.dump(
successful_dates,
f,
default_flow_style=False,
sort_keys=False,
)
if len(failed_dates) == 0 :
failed_dates.append({
'pipeline_trigeered_on_date': str(date.today()),
'failed_run_date': "none",
"attempt" : "none"
})
with open(filename_failed, "w") as f:
yaml.dump(failed_dates,
f, default_flow_style=False,
sort_keys=False)
if __name__ == "__main__":
main()
View File
+93 -65
View File
@@ -14,9 +14,6 @@ from db_con.connection import (
)
def fetch_mapping_storevisibility(
sql_engine: Engine,
table_name: str,
@@ -25,86 +22,117 @@ def fetch_mapping_storevisibility(
run_date: date
) -> pl.DataFrame:
run_date = run_date + timedelta(days=1)
client= get_clickhouse_client()
def table_exists(
client,
table_name: str,
) -> bool:
return bool(
client.command(
f"EXISTS TABLE {table_name}"
)
)
def get_reason_ids_mapping_storevisibility(
client,
run_date: date,
table_name: str = "mapping_storevisibility",
) -> list[int] :
if not table_exists(client, table_name):
log.warning(f"Table '{table_name}' does not exist. During collecting store_ids")
return [0]
query = f"""
SELECT DISTINCT StoreId
FROM mapping_storevisibility
WHERE toDate(Fromdate) <= toDate('{run_date + timedelta(days= 1)}')
AND toDate(Todate) >= toDate('{run_date + timedelta(days= 1)}')
AND project_Id = '40148'
"""
# ClickHouse -> PyArrow -> Polars
arrow_table = client.query_arrow(query)
df= pl.from_arrow(arrow_table)
list=df["reason_id"].to_list()
return list
def fetch_data(
engine: Engine,
table_name: str,
table_type: str,
run_date: date,
store_id: list[int]
) -> pl.DataFrame:
log.info(f"Fetching data from sql server for Master table......")
store_id_list = ",".join(str(sid) for sid in store_id)
log.info(f"Fetching data from sql server for {table_type} table......")
sql_file = Path("src") / "sql" / f"bridge" / f"{table_name}.sql"
with open(sql_file, "r", encoding="utf-8") as f:
sql_template = f.read()
sql = sql_template.format(
store_id_list=store_id_list,
run_date=run_date.strftime("%Y-%m-%d")
)
sql = sql_template.format( )
log.info(f"Fetching in progress .... ")
df = pl.read_database(
query=sql,
connection=engine
connection=sql_engine
)
log.info(f"Fetched {len(df):,} rows from SQL Server")
return df
store_id=get_reason_ids_mapping_storevisibility(client, run_date, "mapping_storevisibility")
df=fetch_data(engine=sql_engine,
table_name=table_name,
table_type=table_type,
run_date=run_date,
store_id=store_id,
)
log.info(f"Fetched {len(df):,} rows from SQL Server")
return df
# def fetch_mapping_storevisibility(
# sql_engine: Engine,
# table_name: str,
# table_type: str,
# mids: list[int],
# run_date: date
# ) -> pl.DataFrame:
# run_date = run_date + timedelta(days=1)
# client= get_clickhouse_client()
# def table_exists(
# client,
# table_name: str,
# ) -> bool:
# return bool(
# client.command(
# f"EXISTS TABLE {table_name}"
# )
# )
# def get_reason_ids_mapping_storevisibility(
# client,
# run_date: date,
# table_name: str = "mapping_storevisibility",
# ) -> list[int] :
# if not table_exists(client, table_name):
# log.warning(f"Table '{table_name}' does not exist. During collecting store_ids")
# return [0]
# query = f"""
# SELECT DISTINCT StoreId
# FROM mapping_storevisibility
# WHERE toDate(Fromdate) <= toDate('{run_date + timedelta(days= 1)}')
# AND toDate(Todate) >= toDate('{run_date + timedelta(days= 1)}')
# AND project_Id = '40148'
# """
# # ClickHouse -> PyArrow -> Polars
# arrow_table = client.query_arrow(query)
# df= pl.from_arrow(arrow_table)
# list=df["reason_id"].to_list()
# return list
# def fetch_data(
# engine: Engine,
# table_name: str,
# table_type: str,
# run_date: date,
# store_id: list[int]
# ) -> pl.DataFrame:
# log.info(f"Fetching data from sql server for Master table......")
# store_id_list = ",".join(str(sid) for sid in store_id)
# sql_file = Path("src") / "sql" / f"bridge" / f"{table_name}.sql"
# with open(sql_file, "r", encoding="utf-8") as f:
# sql_template = f.read()
# sql = sql_template.format(
# store_id_list=store_id_list,
# run_date=run_date.strftime("%Y-%m-%d")
# )
# log.info(f"Fetching in progress .... ")
# df = pl.read_database(
# query=sql,
# connection=engine
# )
# log.info(f"Fetched {len(df):,} rows from SQL Server")
# return df
# store_id=get_reason_ids_mapping_storevisibility(client, run_date, "mapping_storevisibility")
# df=fetch_data(engine=sql_engine,
# table_name=table_name,
# table_type=table_type,
# run_date=run_date,
# store_id=store_id,
# )
# log.info(f"Fetched {len(df):,} rows from SQL Server")
# return df
+261 -91
View File
@@ -134,6 +134,7 @@ def fetch_additional_visibility( engine: Engine,
return df
def fetch_OQaD(
sql_engine: Engine,
table_name: str,
@@ -142,83 +143,85 @@ def fetch_OQaD(
run_date: date
) -> pl.DataFrame:
# ─────────────────────────────────────────────
# INNER HELPERS (defined once, used below)
# ─────────────────────────────────────────────
client = get_clickhouse_client()
def table_exists(
client,
table_name: str,
) -> bool:
return bool(
client.command(
f"EXISTS TABLE {table_name}"
)
)
# ── Does a ClickHouse table exist? ────────────
def table_exists(client, table_name: str) -> bool:
return bool(client.command(f"EXISTS TABLE {table_name}"))
# ── STEP 1: Who submitted yesterday in SQL Server? ───
def fetch_quiz_empids(engine: Engine, run_date: date) -> pl.DataFrame:
sql_template = f"""
# Format date ONCE safely — avoids f-string injection bugs
run_date_str = run_date.strftime("%Y-%m-%d")
next_date_str = (run_date + timedelta(days=1)).strftime("%Y-%m-%d")
sql = f"""
WITH MID_TABLE_COV1 AS
(
SELECT EmpId, VisitDate
-- Records CREATED yesterday
SELECT EmpId, CAST(VisitDate AS DATE) AS VisitDate
FROM OneApp_KelloggsMT.dbo.T_OQAD
WHERE CreateDate >= {run_date}
AND CreateDate < DATEADD(DAY,1,'{run_date}')
WHERE CreateDate >= '{run_date_str}'
AND CreateDate < '{next_date_str}'
UNION ALL
SELECT EmpId, VisitDate
-- Records UPDATED yesterday (different rows, safe to UNION ALL)
SELECT EmpId, CAST(VisitDate AS DATE) AS VisitDate
FROM OneApp_KelloggsMT.dbo.T_OQAD
WHERE UpdateDate >= {run_date}
AND UpdateDate < DATEADD(DAY,1, '{run_date}')
WHERE UpdateDate >= '{run_date_str}'
AND UpdateDate < '{next_date_str}'
),
QUIZ AS
(
SELECT Distinct E.EmpId as empid
, CONVERT(date,DQ.VisitDate) AS visitdate
FROM OneApp_KelloggsMT.dbo.T_OQAD DQ INNER JOIN
OneApp_KelloggsMT.dbo.vw_Employee_Detail E ON DQ.EmpId = E.EmpId inner join
OneApp_KelloggsMT.dbo.Master_OQAD_Question QU on DQ.QuestionId= qu.QuestionId inner join
OneApp_KelloggsMT.dbo.Master_OQAD_Category qc on qu.QuestionCategoryId= qc.QuestionCategoryId
where e.EmpName not like 'test%' and e.RightId in (6)
and (E.ResignDate is null or E.ResignDate>=''+CONVERT(VARCHAR,'{run_date}')+'') AND E.EmpName NOT LIKE '%TEST%'
AND DQ.EmpId IN (SELECT EmpId FROM MID_TABLE_COV1 A WHERE
DQ.EmpId=A.EmpId AND CONVERT(date,VisitDate)=CONVERT(date,A.VisitDate) )
) select * from quiz
SELECT DISTINCT
E.EmpId AS empid,
CAST(DQ.VisitDate AS DATE) AS visitdate
FROM OneApp_KelloggsMT.dbo.T_OQAD DQ
INNER JOIN OneApp_KelloggsMT.dbo.vw_Employee_Detail E
ON DQ.EmpId = E.EmpId
INNER JOIN OneApp_KelloggsMT.dbo.Master_OQAD_Question QU
ON DQ.QuestionId = QU.QuestionId
INNER JOIN OneApp_KelloggsMT.dbo.Master_OQAD_Category QC
ON QU.QuestionCategoryId = QC.QuestionCategoryId
WHERE E.EmpName NOT LIKE '%TEST%' -- exclude test employees
AND E.RightId = 6 -- only field reps
AND (
E.ResignDate IS NULL
OR CAST(E.ResignDate AS DATE) >= '{run_date_str}'
)
AND EXISTS ( -- ✅ EXISTS beats IN for large sets
SELECT 1
FROM MID_TABLE_COV1 A
WHERE A.EmpId = DQ.EmpId
AND A.VisitDate = CAST(DQ.VisitDate AS DATE)
)
)
SELECT * FROM QUIZ
"""
sql = sql_template.format(
run_date=run_date.strftime("%Y-%m-%d")
)
log.info(f"Fetching quiz_empids data for EMPID and Visitid")
df = pl.read_database(
query=sql,
connection=engine
)
log.info(f"Fetched {len(df):,} total empid and visitdate fetched for OQAD from SQL Server")
log.info("Fetching quiz empids for run_date=%s", run_date_str)
df = pl.read_database(query=sql, connection=engine)
log.info("Fetched %s (EmpId, VisitDate) pairs from SQL Server", len(df))
return df
# ── STEP 2: Who do we ALREADY have in ClickHouse? ───
def get_empids_clickhouse_OQAD(
client,
table_name: str = "OQaD",
) -> pl.DataFrame:
if not table_exists(client, table_name):
log.warning(f"Table '{table_name}' does not exist.")
return pl.DataFrame(
schema={
"empid": pl.Int64,
"visitdate": pl.Date,
}
)
log.warning("Table '%s' does not exist in ClickHouse.", table_name)
return pl.DataFrame(schema={"empid": pl.Int64, "visitdate": pl.Date})
query = f"""
SELECT DISTINCT
@@ -227,82 +230,249 @@ def fetch_OQaD(
FROM {table_name}
"""
# ClickHouse -> PyArrow -> Polars
arrow_table = client.query_arrow(query)
df = pl.from_arrow(arrow_table)
log.info("Fetched %s existing (EmpId, VisitDate) pairs from ClickHouse", len(df))
return df
return pl.from_arrow(arrow_table)
# ── STEP 3: Who is NEW? (in SQL Server but NOT yet in ClickHouse) ───
def find_new_empids(
sql_df: pl.DataFrame,
ch_df: pl.DataFrame,
) -> list[int]:
qf=fetch_quiz_empids(sql_engine,run_date)
db_df = get_empids_clickhouse_OQAD(client)
matched = qf.join(
db_df,
new_df = sql_df.join(
ch_df,
on=["empid", "visitdate"],
how="inner",
how="anti", # ✅ anti = keep rows NOT found in ch_df
)
if matched.is_empty():
if new_df.is_empty():
log.warning("No new EmpIds found for table=%s — nothing to fetch.", table_name)
return [0] # sentinel value — the .sql WHERE will return 0 rows safely
empids=[0]
log.warning(
"%s Matched df in OQaD returned no rows",
table_name,
)
else:
empids=matched["empid"].to_list()
log.info(f"Fetched {len(empids):,} matched empids fetched for OQAD ")
empids = new_df["empid"].unique().to_list()
log.info("Found %s NEW empids to fetch for %s", len(empids), table_name)
return empids
# ── STEP 4: Fetch full quiz data for new empids ───
def fetch_data(
engine: Engine,
table_name: str,
table_type: str,
empids: list[int],
run_date: date
run_date: date,
) -> pl.DataFrame:
empid_list = ",".join(str(empid) for empid in empids)
run_date_str = run_date.strftime("%Y-%m-%d")
empid_list = ", ".join(str(e) for e in empids) # "101, 102, 103"
sql_file = Path("src") / "sql" / "fact" / f"{table_name}.sql"
log.info(f"Exists: {sql_file.exists()}")
log.info(f"Path: {sql_file.resolve()}")
log.info("Loading SQL from: %s (exists=%s)", sql_file.resolve(), sql_file.exists())
with open(sql_file, "r", encoding="utf-8") as f:
sql_template = f.read()
sql = sql_template.format(
empid_list=empid_list,
run_date=run_date.strftime("%Y-%m-%d")
run_date=run_date_str,
)
log.info(f"Fetching data for {len(empids):,} EMPIDs")
log.info("Fetching OQaD data for run_date=%s", run_date)
df = pl.read_database(
query=sql,
connection=engine,
)
log.info("fn name is fetch_OQad ------Fetched %s rows", len(df))
log.info("Fetching full OQaD data for %s empids, run_date=%s", len(empids), run_date_str)
df = pl.read_database(query=sql, connection=engine)
log.info("Fetched %s rows from SQL Server for table=%s", len(df), table_name)
return df
df=fetch_data( engine=sql_engine,
# ─────────────────────────────────────────────
# MAIN FLOW (the 4 steps, clearly sequenced)
# ─────────────────────────────────────────────
qf = fetch_quiz_empids(sql_engine, run_date) # Step 1
db_df = get_empids_clickhouse_OQAD(client, table_name) # Step 2
empids = find_new_empids(qf, db_df) # Step 3
df = fetch_data( # Step 4
engine=sql_engine,
table_name=table_name,
table_type=table_type,
empids=empids,
run_date=run_date
run_date=run_date,
)
log.info(f"Fetched {len(df):,} rows from SQL Server")
log.info("fetch_OQaD complete — returning %s rows", len(df))
return df
# def fetch_OQaD(
# sql_engine: Engine,
# table_name: str,
# table_type: str,
# mids: list[int],
# run_date: date
# ) -> pl.DataFrame:
# client= get_clickhouse_client()
# def table_exists(
# client,
# table_name: str,
# ) -> bool:
# return bool(
# client.command(
# f"EXISTS TABLE {table_name}"
# )
# )
# def fetch_quiz_empids(engine: Engine, run_date : date) -> pl.DataFrame:
# sql_template = f"""
# WITH MID_TABLE_COV1 AS
# (
# SELECT EmpId, VisitDate
# FROM OneApp_KelloggsMT.dbo.T_OQAD
# WHERE CreateDate >= {run_date}
# AND CreateDate < DATEADD(DAY,1,'{run_date}')
# UNION
# SELECT EmpId, VisitDate
# FROM OneApp_KelloggsMT.dbo.T_OQAD
# WHERE UpdateDate >= {run_date}
# AND UpdateDate < DATEADD(DAY,1, '{run_date}')
# ),
# QUIZ AS
# (
# SELECT Distinct E.EmpId as empid
# , CONVERT(date,DQ.VisitDate) AS visitdate
# FROM OneApp_KelloggsMT.dbo.T_OQAD DQ INNER JOIN
# OneApp_KelloggsMT.dbo.vw_Employee_Detail E ON DQ.EmpId = E.EmpId inner join
# OneApp_KelloggsMT.dbo.Master_OQAD_Question QU on DQ.QuestionId= qu.QuestionId inner join
# OneApp_KelloggsMT.dbo.Master_OQAD_Category qc on qu.QuestionCategoryId= qc.QuestionCategoryId
# where e.EmpName not like 'test%' and e.RightId in (6)
# and (E.ResignDate is null or E.ResignDate>=''+CONVERT(VARCHAR,'{run_date}')+'') AND E.EmpName NOT LIKE '%TEST%'
# AND DQ.EmpId IN (SELECT EmpId FROM MID_TABLE_COV1 A WHERE
# DQ.EmpId=A.EmpId AND CONVERT(date,VisitDate)=CONVERT(date,A.VisitDate) )
# ) select * from quiz
# """
# sql = sql_template.format(
# run_date=run_date.strftime("%Y-%m-%d")
# )
# log.info(f"Fetching quiz_empids data for EMPID and Visitid")
# df = pl.read_database(
# query=sql,
# connection=engine
# )
# log.info(f"Fetched {len(df):,} total empid and visitdate fetched for OQAD from SQL Server")
# return df
# def get_empids_clickhouse_OQAD(
# client,
# table_name: str = "OQaD",
# ) -> pl.DataFrame:
# if not table_exists(client, table_name):
# log.warning(f"Table '{table_name}' does not exist.")
# return pl.DataFrame(
# schema={
# "empid": pl.Int64,
# "visitdate": pl.Date,
# }
# )
# query = f"""
# SELECT DISTINCT
# employee_id AS empid,
# visit_date AS visitdate
# FROM {table_name}
# """
# # ClickHouse -> PyArrow -> Polars
# arrow_table = client.query_arrow(query)
# return pl.from_arrow(arrow_table)
# qf=fetch_quiz_empids(sql_engine,run_date)
# db_df = get_empids_clickhouse_OQAD(client)
# matched = qf.join(
# db_df,
# on=["empid", "visitdate"],
# how="inner",
# )
# if matched.is_empty():
# empids=[0]
# log.warning(
# "%s Matched df in OQaD returned no rows",
# table_name,
# )
# else:
# empids=matched["empid"].to_list()
# log.info(f"Fetched {len(empids):,} matched empids fetched for OQAD ")
# def fetch_data(
# engine: Engine,
# table_name: str,
# table_type: str,
# empids: list[int],
# run_date: date
# ) -> pl.DataFrame:
# empid_list = ",".join(str(empid) for empid in empids)
# sql_file = Path("src") / "sql" / "fact" / f"{table_name}.sql"
# log.info(f"Exists: {sql_file.exists()}")
# log.info(f"Path: {sql_file.resolve()}")
# with open(sql_file, "r", encoding="utf-8") as f:
# sql_template = f.read()
# sql = sql_template.format(
# empid_list=empid_list,
# run_date=run_date.strftime("%Y-%m-%d")
# )
# log.info(f"Fetching data for {len(empids):,} EMPIDs")
# log.info("Fetching OQaD data for run_date=%s", run_date)
# df = pl.read_database(
# query=sql,
# connection=engine,
# )
# log.info("fn name is fetch_OQad ------Fetched %s rows", len(df))
# return df
# df=fetch_data( engine=sql_engine,
# table_name=table_name,
# table_type=table_type,
# empids=empids,
# run_date=run_date
# )
# log.info(f"Fetched {len(df):,} rows from SQL Server")
# return df
# def fetch_OQaD(
# engine: Engine,
+4 -5
View File
@@ -1,11 +1,10 @@
with mapping_storevisibility
(Project_Id,StoreId,VisibilityDefinitionid,Fromdate,Todate,CreateDate,CreateBy)
AS (
select DISTINCT '40148' as Project_Id,StoreId,VisibilityDefinitionid,Fromdate,Todate,getdate(),'SP-Pius'
FROM OneApp_KelloggsMT.dbo.mapping_storevisibility z WHERE
convert(date,FROMDATE,101)<=convert(Date,getdate(),101) AND CONVERT(DATE,ToDate,101)>=convert(Date,getdate(),101)
AND z.VisibilityDefinitionid IN
(SELECT DISTINCT VisibilityDefinitionid FROM OneApp_KelloggsMT.dbo.MASTER_VISIBILITYDEFINITION WHERE MENUID=22 )
AND z.StoreId NOT IN ({store_id_list})
FROM OneApp_KelloggsMT.dbo.mapping_storevisibility
)
select * from mapping_storevisibility
+10 -15
View File
@@ -1,19 +1,19 @@
WITH MID_TABLE_COV1 AS
(
SELECT EmpId, VisitDate
SELECT EmpId, CAST(VisitDate AS DATE) AS VisitDate
FROM OneApp_KelloggsMT.dbo.T_OQAD
WHERE CreateDate >= {run_date}
WHERE CreateDate >= '{run_date}'
AND CreateDate < DATEADD(DAY, 1, '{run_date}')
UNION ALL
SELECT EmpId, VisitDate
SELECT EmpId, CAST(VisitDate AS DATE) AS VisitDate
FROM OneApp_KelloggsMT.dbo.T_OQAD
WHERE UpdateDate >= {run_date}
WHERE UpdateDate >= '{run_date}'
AND UpdateDate < DATEADD(DAY, 1, '{run_date}')
),
QUIZ AS
(
SELECT DISTINCT
@@ -39,17 +39,15 @@ QUIZ AS
ON QU.QuestionCategoryId = QC.QuestionCategoryId
WHERE E.EmpName NOT LIKE '%TEST%'
AND E.RightId = 6
AND (
E.ResignDate IS NULL
OR CAST(E.ResignDate AS DATE) >= '{run_date}'
)
AND EXISTS
(
AND (E.ResignDate IS NULL OR CAST(E.ResignDate AS DATE) >= '{run_date}')
AND EXISTS (
SELECT 1
FROM MID_TABLE_COV1 A
WHERE A.EmpId = DQ.EmpId
AND CAST(A.VisitDate AS DATE) = CAST(DQ.VisitDate AS DATE)
AND A.VisitDate = CAST(DQ.VisitDate AS DATE)
)
-- ✅ Exclude EmpIds already loaded into ClickHouse
AND E.EmpId NOT IN ({empid_list})
)
SELECT
@@ -76,6 +74,3 @@ INNER JOIN OneApp_KelloggsMT.dbo.Master_OQAD_Question QM
ON Q.QuestionId = QM.QuestionId
LEFT JOIN OneApp_KelloggsMT.dbo.Master_OQAD_Answer QA
ON Q.AnswerId = QA.AnswerId
where Q.EmpId not in ({empid_list})
+2
View File
@@ -0,0 +1,2 @@
- pipeline_trigeered_on_date: '2026-06-23'
last_successful_run_date: 2026-06-22
+7 -6
View File
@@ -4,11 +4,6 @@ tables:
operation: INSERT
fetch_by: mids
- name: OQaD
type: FACT
operation: INSERT
fetch_by: run_date
- name: Survey
type: FACT
operation: INSERT
@@ -87,7 +82,7 @@ tables:
- name: mapping_storevisibility
type: BRIDGE
operation: ONLY_INSERT
operation: DELETE+INSERT
fetch_by: run_date
- name: Master_VisibilityReason
@@ -105,3 +100,9 @@ tables:
type: DIMENSION
operation: DELETE+INSERT
fetch_by: none
- name: OQaD
type: FACT
operation: INSERT
fetch_by: run_date
+8 -8
View File
@@ -4,10 +4,10 @@ tables:
# operation: INSERT
# fetch_by: mids
# # - name: OQaD
# # type: FACT
# # operation: INSERT
# # fetch_by: run_date
- name: OQaD
type: FACT
operation: INSERT
fetch_by: run_date
# - name: additional_visibility
# type: FACT
@@ -102,8 +102,8 @@ tables:
# operation: DELETE+INSERT
# fetch_by: none
- name: Promotion
type: FACT
operation: INSERT
fetch_by: mids
# - name: Promotion
# type: FACT
# operation: INSERT
# fetch_by: mids