redpanda-data · nvartolomei · Jan 29, 2025 · Jan 28, 2025
diff --git a/src/v/datalake/record_multiplexer.cc b/src/v/datalake/record_multiplexer.cc
@@ -326,10 +326,22 @@ record_multiplexer::handle_invalid_record(
             }
         }
 
+        auto record_type = key_value_translator{}.build_type(std::nullopt);
+        if (!load_res.value().fill_registered_ids(record_type.type)) {
+            // This shouldn't happen because we ensured the schema with the
+            // call to table_creator. Probably someone managed to change the
+            // table between two calls.
+            vlog(
+              _log.warn,
+              "expected to successfully fill field IDs for record {}",
+              offset);
+            co_return writer_error::parquet_conversion_error;
+        }
+
         _invalid_record_writer = std::make_unique<partitioning_writer>(
           *_writer_factory,
           load_res.value().schema.schema_id,
-          key_value_translator{}.build_type(std::nullopt).type,
+          std::move(record_type.type),
           std::move(load_res.value().partition_spec));
     }
 

diff --git a/tests/rptest/tests/datalake/datalake_dlq_test.py b/tests/rptest/tests/datalake/datalake_dlq_test.py
@@ -234,9 +234,11 @@ def test_no_dlq_table_for_key_value_mode(self, cloud_storage_type,
 
     @cluster(num_nodes=4)
     @matrix(cloud_storage_type=supported_storage_types(),
-            query_engine=[QueryEngineType.SPARK, QueryEngineType.TRINO])
+            query_engine=[QueryEngineType.SPARK, QueryEngineType.TRINO],
+            filesystem_catalog_mode=[True, False])
     def test_dlq_table_for_invalid_records(self, cloud_storage_type,
-                                           query_engine):
+                                           query_engine,
+                                           filesystem_catalog_mode):
         """
         Produce records with no schema to `value_schema_id_prefix` mode topic.
         These records will fail translate and should be written to DLQ table.
@@ -249,7 +251,7 @@ def test_dlq_table_for_invalid_records(self, cloud_storage_type,
 
         with DatalakeServices(self.test_ctx,
                               redpanda=self.redpanda,
-                              filesystem_catalog_mode=True,
+                              filesystem_catalog_mode=filesystem_catalog_mode,
                               include_query_engines=[query_engine]) as dl:
             dl.create_iceberg_enabled_topic(
                 self.topic_name, iceberg_mode="value_schema_id_prefix")
@@ -297,17 +299,23 @@ def test_dlq_table_for_invalid_records(self, cloud_storage_type,
 
     @cluster(num_nodes=4)
     @matrix(cloud_storage_type=supported_storage_types(),
-            query_engine=[QueryEngineType.SPARK, QueryEngineType.TRINO])
+            query_engine=[QueryEngineType.SPARK, QueryEngineType.TRINO],
+            filesystem_catalog_mode=[True, False])
     def test_dlq_table_for_mixed_records(self, cloud_storage_type,
-                                         query_engine):
+                                         query_engine,
+                                         filesystem_catalog_mode):
         """
         Produce a mix of valid and invalid records to a `value_schema_id_prefix`
         mode topic. Valid records should be written to the main table and
         invalid records should be written to the DLQ table.
+
+        It is important to test with both filesystem catalog mode and w/o
+        because their behavior in assigning field ids is different and was
+        found to cause translation issues.
         """
         with DatalakeServices(self.test_ctx,
                               redpanda=self.redpanda,
-                              filesystem_catalog_mode=True,
+                              filesystem_catalog_mode=filesystem_catalog_mode,
                               include_query_engines=[query_engine]) as dl:
             dl.create_iceberg_enabled_topic(
                 self.topic_name, iceberg_mode="value_schema_id_prefix")