3rd commit

This commit is contained in:
Ankit Malik
2026-06-12 15:39:39 +05:30
parent 80bb585cdb
commit 8aaae1e27d
12 changed files with 1280 additions and 120 deletions
+49 -16
View File
@@ -2,34 +2,46 @@
# requires-python = ">=3.11"
# dependencies = [
# "polars>=0.20.0",
# "pyarrow>=18.0.0",
# "sqlalchemy>=2.0.0",
# "pyodbc>=5.0.0",
# "pyyaml>=6.0.3",
# "clickhouse-connect>=0.7.0",
# "clickhouse-sqlalchemy>=0.3.2",
# "pyyaml>=6.0.3",
# "python-dotenv>=1.0.0",
# "pyarrow>=18.0.0"
# ]
# ///
from __future__ import annotations
import os
import yaml
import pyarrow
# import pyarrow
import sys
import logging
from datetime import date, timedelta
import polars as pl
from sqlalchemy import create_engine, text
from sqlalchemy.engine import Engine, URL
import clickhouse_connect
import yaml
from dotenv import load_dotenv
from log import log
from clickhouse_task.create_table import create_clickhouse_table , check
from sqlalchemy import create_engine, text
from sqlalchemy.engine import Engine, URL
import clickhouse_connect
from log import log
from clickhouse_task.create_table import *
from clickhouse_task.delete_task import *
from clickhouse_task.load_table import *
from db_con.connection import *
from mids import *
from masters.dimensions import *
from masters.bridge import *
from kpi.facts import *
@@ -39,16 +51,15 @@ from mids import *
def main():
log.info("=" * 80)
log.info("Hello from data-move Python data pipeline !")
check()
if len(sys.argv) > 1:
run_date = datetime.strptime(sys.argv[1], "%Y-%m-%d").date()
else:
run_date = date.today() - timedelta(days=1)
print(run_date)
print(type(run_date))
log.info(f"Data-pipeline running Date is -:{run_date}")
# connecting with both db servers sql-server
@@ -74,11 +85,15 @@ def main():
for table in config["tables"]:
table_name=table["name"],
table_type=table["type"],
table_name=table["name"]
table_type=table["type"]
operation=table["operation"]
log.info(table_name, operation)
log.info("=" * 80)
log.info("TABLE=%s | TYPE=%s | OPERATION=%s",
table_name,
table_type,
operation)
fn=f"fetch_{table_name}"
list=["Attendance", "Journey_Plan", "Web_Logins"]
@@ -101,6 +116,24 @@ def main():
# Step 2
if operation == "DELETE+INSERT" :
truncate_table(client , table_name )
log.info(f"Truncate a ClickHouse table - {table_name}")
load_to_clickhouse(
client=client,
table_name=table_name,
df=df,
)
else:
print("table is fact ")