16-06-2026 2nd commit

This commit is contained in:
Ankit Malik
2026-06-16 17:33:13 +05:30
parent 822b4d2fdf
commit 3337c62dd7
4 changed files with 122 additions and 12 deletions
+26 -10
View File
@@ -82,26 +82,35 @@ def get_dataframe(
return fn(sql_engine)
def get_employee_ids(
def get_empids_clickhouse_OQAD(
client,
clickhouse_engine: Engine,
table_name: str = "OQaD",
) -> list[int]:
) -> pl.DataFrame:
if not table_exists(client, table_name):
log.warning(f"Table '{table_name}' does not exist.")
return []
return pl.DataFrame(
schema={
"EmpId": pl.Int64,
"VisitDate": pl.Date,
}
)
query = f"""
SELECT DISTINCT employee_id
SELECT DISTINCT
employee_id AS EmpId,
toDate(visit_date) AS VisitDate
FROM {table_name}
"""
return (
pl.read_database(query, clickhouse_engine)
.get_column("employee_id")
.to_list()
)
# ClickHouse -> PyArrow -> Polars
arrow_table = client.query_arrow(query)
return pl.from_arrow(arrow_table)
# ==========================================================
# Main
# ==========================================================
@@ -157,7 +166,14 @@ def main():
sql_engine,
run_date,
)
qf=fetch_quiz_empids()
db_df = get_empids_clickhouse_OQAD(client)
matched = qf.join(
db_df,
on=["EmpId", "VisitDate"],
how="inner",
)
# ------------------------------------------------------
# Config
# ------------------------------------------------------