final commit

This commit is contained in:
Ankit Malik
2026-06-23 18:23:58 +05:30
parent e218aafc26
commit 6b2d754981
15 changed files with 2803 additions and 323 deletions
+131 -44
View File
@@ -52,6 +52,45 @@ from src.dim import *
# Helpers
# ==========================================================
def get_dates_from_yaml(filename: str):
with open(filename, "r") as file:
data = yaml.safe_load(file)
start_date = date.fromisoformat(
str(data["pipeline"]["start_date"])
)
end_date = date.fromisoformat(
str(data["pipeline"]["end_date"])
)
flag=str(data["pipeline"]["flag"])
return start_date, end_date , flag
def write_table_to_yaml(
data: dict,
run_date: date,
filename: str | None = None
):
"""Write table data to a YAML file."""
if filename is None:
filename = f"elt_pipeline_{run_date}.yml"
with open(filename, "w") as file:
yaml.dump(
data,
file,
default_flow_style=False,
sort_keys=False
)
print(f"Table written to {filename}")
def table_exists(
client,
table_name: str,
@@ -68,7 +107,7 @@ def table_exists(
# Main
# ==========================================================
def main():
def elt(run_date : date):
log.info("=" * 80)
log.info("Hello from data-move Python data pipeline!")
@@ -77,13 +116,7 @@ def main():
# Run Date
# ------------------------------------------------------
if len(sys.argv) > 1:
run_date = datetime.strptime(
sys.argv[1],
"%Y-%m-%d",
).date()
else:
run_date = date.today() - timedelta(days=1)
log.info(
"Pipeline Run Date: %s",
@@ -126,7 +159,7 @@ def main():
# ------------------------------------------------------
with open(
"t.yml",
"y.yml",
"r",
) as file:
@@ -211,8 +244,6 @@ def main():
table_name,
)
elif operation =="ONLY_INSERT" :
continue
else:
delete_existing_data(
@@ -253,8 +284,8 @@ def main():
log.info("=" * 80)
if __name__ == "__main__":
def main() :
config_file = Path("Pipeline_config.yml")
if not config_file.exists():
@@ -273,41 +304,97 @@ if __name__ == "__main__":
config = yaml.safe_load(f)
for attempt in range(3):
try:
main()
with open("Pipeline_config.yml", "r") as f:
config = yaml.safe_load(f)
p_start_date, p_end_date , flag= get_dates_from_yaml("elt_pipeline_custom_dates.yml")
if flag =="Y" :
start_date=p_start_date
end_date=p_end_date
config["pipeline"]["last_successful_run_date"] = str(date.today())
elif len(sys.argv) > 1:
start_date = datetime.strptime(
sys.argv[1],
"%Y-%m-%d",
).date()
end_date=start_date + timedelta(days=1)
else:
start_date = date.today() - timedelta(days=1)
end_date=start_date
with open("Pipeline_config.yml", "w") as f:
yaml.safe_dump(config, f, sort_keys=False)
log.info(
"Pipeline Start Date: %s",
start_date,
)
failed_dates=[]
successful_dates=[]
filename_successful = "successful_Pipeline_dates_config.yml"
filename_failed = "failed_Pipeline_dates_config.yml"
while start_date <=end_date:
run_date = start_date
log.info(
f"Pipeline completed successfully. "
f"last_successful_run_date={date.today()}"
for attempt in range(3):
try:
elt(run_date)
successful_dates.append({
'pipeline_trigeered_on_date': str(date.today()),
'last_successful_run_date': run_date,
})
log.info(
f"Pipeline completed successfully. "
f"pipeline_trigeered_on_date={date.today()}"
f"last_successful_run_date={run_date}"
)
break
except Exception as e:
failed_dates.append({
'pipeline_trigeered_on_date': str(date.today()),
'failed_run_date': run_date,
"attempt" : attempt
})
if attempt == 2:
raise
log.warning(
f"Pipeline failed. Retry {attempt + 1}/3. Error: {e}"
)
sleep(5)
start_date=start_date + timedelta(days=1)
with open(filename_successful, "w") as f:
yaml.dump(
successful_dates,
f,
default_flow_style=False,
sort_keys=False,
)
if len(failed_dates) == 0 :
failed_dates.append({
'pipeline_trigeered_on_date': str(date.today()),
'failed_run_date': "none",
"attempt" : "none"
})
with open(filename_failed, "w") as f:
yaml.dump(failed_dates,
f, default_flow_style=False,
sort_keys=False)
break
if __name__ == "__main__":
except Exception as e:
with open("Pipeline_config.yml", "r") as f:
config = yaml.safe_load(f)
config["pipeline"]["run_date"] = str(date.today())
with open("Pipeline_config.yml", "w") as f:
yaml.safe_dump(config, f, sort_keys=False)
if attempt == 2:
raise
log.warning(
f"Pipeline failed. Retry {attempt + 1}/3. Error: {e}"
)
sleep(5)
main()