From 02e44c0feb9b01ef63a165b9325b4d40e6af5860 Mon Sep 17 00:00:00 2001 From: SandhraSokhal Date: Thu, 1 Feb 2024 09:41:18 -0800 Subject: [PATCH] log all the errors of dataframe --- src/scripts/glue_jobs/glue_job.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/scripts/glue_jobs/glue_job.py b/src/scripts/glue_jobs/glue_job.py index 19863e3..d86cd13 100644 --- a/src/scripts/glue_jobs/glue_job.py +++ b/src/scripts/glue_jobs/glue_job.py @@ -94,9 +94,14 @@ def resolve_choice_and_write_output_frame(self, transformed_frame, partition_key self.logger.info("Glue job finished.") def log_errors(self, dynamic_frame): - self.logger.error("The total number of error count in dynamic_frame is {} ".format(dynamic_frame.stageErrorsCount())) - error_record = dynamic_frame.errorsAsDynamicFrame().toDF().head() - error_fields = error_record["error"] - for key in error_fields.asDict().keys(): - self.logger.error("{} : {}".format(key, error_fields[key])) - raise Exception("Job failed with error : {}".format(error_fields["msg"])) + self.logger.error( + "The total number of error count in dynamic frame is {} ".format(dynamic_frame.stageErrorsCount())) + df = dynamic_frame.errorsAsDynamicFrame().toDF() + last_error_msg = "UNKNOWN" + for row in df.rdd.collect(): + error_fields = row["error"] + for key in error_fields.asDict().keys(): + self.logger.error("{} : {}".format(key, error_fields[key])) + if error_fields["msg"] is not None: + last_error_msg = error_fields["msg"] + raise Exception("Job failed with error : {}".format(last_error_msg))