18-06-2026 1st commit

This commit is contained in:
Ankit Malik
2026-06-18 18:16:50 +05:30
parent 9cd4135d7b
commit d60740ae48
11 changed files with 3104 additions and 311 deletions
+21 -65
View File
@@ -65,51 +65,27 @@ def table_exists(
def get_dataframe(
sql_engine,
fn_name: str,
fetch_by: str,
sql_engine,
table_name: str,
table_type: str ,
mids,
run_date,
):
fn = globals()[fn_name]
if fetch_by == "mids":
return fn(sql_engine, mids)
if fetch_by == "mids" or "run_date":
return fn(sql_engine, table_name , table_type, mids, run_date)
if fetch_by == "run_date":
return fn(sql_engine, run_date)
return fn(sql_engine)
return fn(sql_engine ,table_name,table_type)
def get_empids_clickhouse_OQAD(
client,
table_name: str = "OQaD",
) -> pl.DataFrame:
if not table_exists(client, table_name):
log.warning(f"Table '{table_name}' does not exist.")
return pl.DataFrame(
schema={
"empid": pl.Int64,
"visitdate": pl.Date,
}
)
query = f"""
SELECT DISTINCT
employee_id AS empid,
visit_date AS visitdate
FROM {table_name}
"""
# ClickHouse -> PyArrow -> Polars
arrow_table = client.query_arrow(query)
return pl.from_arrow(arrow_table)
# ==========================================================
# Main
@@ -166,17 +142,7 @@ def main():
sql_engine,
run_date,
)
qf=fetch_quiz_empids(sql_engine,run_date)
db_df = get_empids_clickhouse_OQAD(client)
matched = qf.join(
db_df,
on=["empid", "visitdate"],
how="inner",
)
empids=matched["empid"].to_list()
log.info(f"Fetched {len(empids):,} matched empids fetched for OQAD ")
# ------------------------------------------------------
# Config
@@ -201,37 +167,27 @@ def main():
table_type=table["type"]
log.info("=" * 80)
log.info(
"Processing Table: %s",
table_name,
)
log.info(f"Processing Table: {table_name} | Table type is -: {table_type} | Based on -{fetch_by} and operation is used -{operation} " )
try:
# ------------------------------------------
# Fetch Data
# ------------------------------------------
if table_name =="OQaD":
df=fetch_OQaD( engine=sql_engine,
table_name=table_name,
table_type=table_type,
empids=empids,
run_date=run_date
)
else:
df = fetch_data(
engine=sql_engine,
table_name=table_name,
table_type=table_type,
mids=mids,
run_date=run_date,
)
log.info(f"Fetching Data from sql server for table-: {table_name} ..............")
fn_name = f"fetch_{table_name}"
df = get_dataframe(
sql_engine,
fn_name=fn_name,
fetch_by=fetch_by,
table_name=table_name,
table_type=table_type,
mids=mids,
run_date=run_date,
)
log.info(f"Fetched total row -: {len(df)} from sql server for table-:{table_name} ...........!!!")
if df.is_empty():
@@ -295,7 +251,7 @@ def main():
# ------------------------------------------
# Load Data
# ------------------------------------------
log.info("_ _ _ _Inserting data into clickhouse db from sql server_ _ _ _")
load_to_clickhouse(
client=client,
table_name=table_name,