final commit
This commit is contained in:
@@ -52,6 +52,45 @@ from src.dim import *
|
||||
# Helpers
|
||||
# ==========================================================
|
||||
|
||||
def get_dates_from_yaml(filename: str):
|
||||
with open(filename, "r") as file:
|
||||
data = yaml.safe_load(file)
|
||||
|
||||
start_date = date.fromisoformat(
|
||||
str(data["pipeline"]["start_date"])
|
||||
)
|
||||
|
||||
end_date = date.fromisoformat(
|
||||
str(data["pipeline"]["end_date"])
|
||||
)
|
||||
flag=str(data["pipeline"]["flag"])
|
||||
|
||||
return start_date, end_date , flag
|
||||
|
||||
|
||||
def write_table_to_yaml(
|
||||
data: dict,
|
||||
run_date: date,
|
||||
filename: str | None = None
|
||||
):
|
||||
"""Write table data to a YAML file."""
|
||||
|
||||
if filename is None:
|
||||
filename = f"elt_pipeline_{run_date}.yml"
|
||||
|
||||
with open(filename, "w") as file:
|
||||
yaml.dump(
|
||||
data,
|
||||
file,
|
||||
default_flow_style=False,
|
||||
sort_keys=False
|
||||
)
|
||||
|
||||
print(f"Table written to {filename}")
|
||||
|
||||
|
||||
|
||||
|
||||
def table_exists(
|
||||
client,
|
||||
table_name: str,
|
||||
@@ -68,7 +107,7 @@ def table_exists(
|
||||
# Main
|
||||
# ==========================================================
|
||||
|
||||
def main():
|
||||
def elt(run_date : date):
|
||||
|
||||
log.info("=" * 80)
|
||||
log.info("Hello from data-move Python data pipeline!")
|
||||
@@ -77,13 +116,7 @@ def main():
|
||||
# Run Date
|
||||
# ------------------------------------------------------
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
run_date = datetime.strptime(
|
||||
sys.argv[1],
|
||||
"%Y-%m-%d",
|
||||
).date()
|
||||
else:
|
||||
run_date = date.today() - timedelta(days=1)
|
||||
|
||||
|
||||
log.info(
|
||||
"Pipeline Run Date: %s",
|
||||
@@ -126,7 +159,7 @@ def main():
|
||||
# ------------------------------------------------------
|
||||
|
||||
with open(
|
||||
"t.yml",
|
||||
"y.yml",
|
||||
"r",
|
||||
) as file:
|
||||
|
||||
@@ -211,8 +244,6 @@ def main():
|
||||
table_name,
|
||||
)
|
||||
|
||||
elif operation =="ONLY_INSERT" :
|
||||
continue
|
||||
else:
|
||||
|
||||
delete_existing_data(
|
||||
@@ -253,8 +284,8 @@ def main():
|
||||
log.info("=" * 80)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
def main() :
|
||||
|
||||
config_file = Path("Pipeline_config.yml")
|
||||
|
||||
if not config_file.exists():
|
||||
@@ -273,41 +304,97 @@ if __name__ == "__main__":
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
|
||||
|
||||
for attempt in range(3):
|
||||
try:
|
||||
main()
|
||||
|
||||
with open("Pipeline_config.yml", "r") as f:
|
||||
config = yaml.safe_load(f)
|
||||
p_start_date, p_end_date , flag= get_dates_from_yaml("elt_pipeline_custom_dates.yml")
|
||||
if flag =="Y" :
|
||||
start_date=p_start_date
|
||||
end_date=p_end_date
|
||||
|
||||
config["pipeline"]["last_successful_run_date"] = str(date.today())
|
||||
elif len(sys.argv) > 1:
|
||||
start_date = datetime.strptime(
|
||||
sys.argv[1],
|
||||
"%Y-%m-%d",
|
||||
).date()
|
||||
end_date=start_date + timedelta(days=1)
|
||||
else:
|
||||
start_date = date.today() - timedelta(days=1)
|
||||
end_date=start_date
|
||||
|
||||
with open("Pipeline_config.yml", "w") as f:
|
||||
yaml.safe_dump(config, f, sort_keys=False)
|
||||
log.info(
|
||||
"Pipeline Start Date: %s",
|
||||
start_date,
|
||||
)
|
||||
|
||||
|
||||
failed_dates=[]
|
||||
successful_dates=[]
|
||||
filename_successful = "successful_Pipeline_dates_config.yml"
|
||||
filename_failed = "failed_Pipeline_dates_config.yml"
|
||||
|
||||
while start_date <=end_date:
|
||||
run_date = start_date
|
||||
|
||||
log.info(
|
||||
f"Pipeline completed successfully. "
|
||||
f"last_successful_run_date={date.today()}"
|
||||
for attempt in range(3):
|
||||
try:
|
||||
elt(run_date)
|
||||
|
||||
successful_dates.append({
|
||||
'pipeline_trigeered_on_date': str(date.today()),
|
||||
'last_successful_run_date': run_date,
|
||||
})
|
||||
|
||||
log.info(
|
||||
f"Pipeline completed successfully. "
|
||||
f"pipeline_trigeered_on_date={date.today()}"
|
||||
f"last_successful_run_date={run_date}"
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
|
||||
|
||||
failed_dates.append({
|
||||
'pipeline_trigeered_on_date': str(date.today()),
|
||||
'failed_run_date': run_date,
|
||||
"attempt" : attempt
|
||||
})
|
||||
|
||||
|
||||
if attempt == 2:
|
||||
raise
|
||||
|
||||
log.warning(
|
||||
f"Pipeline failed. Retry {attempt + 1}/3. Error: {e}"
|
||||
)
|
||||
|
||||
sleep(5)
|
||||
|
||||
|
||||
start_date=start_date + timedelta(days=1)
|
||||
|
||||
|
||||
|
||||
with open(filename_successful, "w") as f:
|
||||
yaml.dump(
|
||||
successful_dates,
|
||||
f,
|
||||
default_flow_style=False,
|
||||
sort_keys=False,
|
||||
)
|
||||
if len(failed_dates) == 0 :
|
||||
failed_dates.append({
|
||||
'pipeline_trigeered_on_date': str(date.today()),
|
||||
'failed_run_date': "none",
|
||||
"attempt" : "none"
|
||||
})
|
||||
with open(filename_failed, "w") as f:
|
||||
yaml.dump(failed_dates,
|
||||
f, default_flow_style=False,
|
||||
sort_keys=False)
|
||||
|
||||
|
||||
break
|
||||
if __name__ == "__main__":
|
||||
|
||||
except Exception as e:
|
||||
|
||||
with open("Pipeline_config.yml", "r") as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
config["pipeline"]["run_date"] = str(date.today())
|
||||
|
||||
with open("Pipeline_config.yml", "w") as f:
|
||||
yaml.safe_dump(config, f, sort_keys=False)
|
||||
|
||||
if attempt == 2:
|
||||
raise
|
||||
|
||||
log.warning(
|
||||
f"Pipeline failed. Retry {attempt + 1}/3. Error: {e}"
|
||||
)
|
||||
|
||||
sleep(5)
|
||||
main()
|
||||
Reference in New Issue
Block a user