19-06-2026 1st commit

This commit is contained in:
Ankit Malik
2026-06-19 16:51:03 +05:30
parent d60740ae48
commit 97b22ea0f8
23 changed files with 2026 additions and 109 deletions
+142
View File
@@ -0,0 +1,142 @@
from pathlib import Path
import polars as pl
from sqlalchemy import Engine
from datetime import date , timedelta
from log import log
from db_con.connection import (
build_sql_server_engine,
build_clickhouse_engine,
get_clickhouse_client,
)
def fetch_mapping_storevisibility(
sql_engine: Engine,
table_name: str,
table_type: str,
mids: list[int],
run_date: date
) -> pl.DataFrame:
client= get_clickhouse_client()
def table_exists(
client,
table_name: str,
) -> bool:
return bool(
client.command(
f"EXISTS TABLE {table_name}"
)
)
def get_reason_ids_mapping_storevisibility(
client,
table_name: str = "coverage_remarks",
) -> list[int] :
if not table_exists(client, table_name):
log.warning(f"Table '{table_name}' does not exist. During collecting reason_ids")
return [0]
query = f"""
SELECT DISTINCT StoreId
FROM mapping_storevisibility
WHERE toDate(Fromdate) <= {run_date}
AND toDate(Todate) >= {run_date}
AND a.Project_Id = '40148'
"""
# ClickHouse -> PyArrow -> Polars
arrow_table = client.query_arrow(query)
df= pl.from_arrow(arrow_table)
list=df["reason_id"].to_list()
return list
def fetch_data(
engine: Engine,
table_name: str,
table_type: str,
reason_ids: list[int]
) -> pl.DataFrame:
log.info(f"Fetching data from sql server for Master table......")
resaon_id_list = ",".join(str(rid) for rid in reason_ids)
sql_file = Path("src") / "sql" / f"dim" / f"{table_name}.sql"
with open(sql_file, "r", encoding="utf-8") as f:
sql_template = f.read()
sql = sql_template.format(
resaon_id_list=resaon_id_list
)
log.info(f"Fetching in progress .... ")
df = pl.read_database(
query=sql,
connection=engine
)
log.info(f"Fetched {len(df):,} rows from SQL Server")
return df
store_id=get_reason_ids_mapping_storevisibility(client, "coverage_remarks")
df=fetch_data(engine=sql_engine,
table_name=table_name,
table_type=table_type,
store_id=store_id,
)
log.info(f"Fetched {len(df):,} rows from SQL Server")
return df
sql = f"""
SELECT DISTINCT
40148 AS project_id,
Z.StoreId AS store_id,
Z.VisibilityDefinitionId AS visibility_definition_id,
Z.FromDate AS from_date,
Z.ToDate AS to_date
FROM OneApp_KelloggsMT.dbo.Mapping_StoreVisibility Z
WHERE CAST(Z.FromDate AS DATE) <= '{run_date}'
AND CAST(Z.ToDate AS DATE) >= '{run_date}'
AND Z.VisibilityDefinitionId IN
(
SELECT DISTINCT VisibilityDefinitionId
FROM OneApp_KelloggsMT.dbo.Master_VisibilityDefinition
WHERE MenuId = 22
)
"""
log.info(
f"Fetching Mapping Store Visibility for {run_date}"
)
df = pl.read_database(
query=sql,
connection=engine
)
log.info(
f"Fetched {len(df):,} Mapping Store Visibility records"
)
return df