From 097d30f507463c3114bd154271260e58d1114110 Mon Sep 17 00:00:00 2001 From: yan ma Date: Thu, 15 Aug 2024 00:14:34 +0800 Subject: [PATCH] fix parsing array struct when repetition_type is 'REPEATED' --- velox/dwio/parquet/reader/ParquetReader.cpp | 17 +++++++++++++++++ .../examples/proto-struct-with-array.parquet | Bin 0 -> 1576 bytes 2 files changed, 17 insertions(+) create mode 100644 velox/dwio/parquet/tests/examples/proto-struct-with-array.parquet diff --git a/velox/dwio/parquet/reader/ParquetReader.cpp b/velox/dwio/parquet/reader/ParquetReader.cpp index 635366f19f136..d5b9c6995b9d4 100644 --- a/velox/dwio/parquet/reader/ParquetReader.cpp +++ b/velox/dwio/parquet/reader/ParquetReader.cpp @@ -464,6 +464,23 @@ std::unique_ptr ReaderBase::getParquetColumnInfo( maxDefine, isOptional, isRepeated); + } else { + // Row type + auto type = + createRowType(children, isFileColumnNamesReadAsLowerCase()); + return std::make_unique( + std::move(type), + std::move(children), + curSchemaIdx, + maxSchemaElementIdx, + ParquetTypeWithId::kNonLeaf, // columnIdx, + std::move(name), + std::nullopt, + std::nullopt, + maxRepeat, + maxDefine, + isOptional, + isRepeated); } } else { // Row type diff --git a/velox/dwio/parquet/tests/examples/proto-struct-with-array.parquet b/velox/dwio/parquet/tests/examples/proto-struct-with-array.parquet new file mode 100644 index 0000000000000000000000000000000000000000..325a8370ad20ec31010fde0c816895da9f5dbd27 GIT binary patch literal 1576 zcmcIlO>5gg5S?rsql%Icnq5|qgD{N<#x?oCY2!n{ZAEKv64iDOJsH{F!~)4uB-v0( z@BJM;_owufA5^-l4@Z{ekVAVh>zOxi-n`LDMyq>_0q^0x8b74l?^SjJrp%q&06h8-y4iMdSJ@MDH4c~HjX3j($=&sN1 zW|q&!OYxG3d&~^8@dlzhDa$1b0`rz(6tkBD*J153G=T1;gzF$B0g1WSKnPOy6M$~KQ|W5 z5A#DO!$$Zca>TK`;67WBib&?m76{4rqTmNgwH)UCc)*uPhjcg;fc!=Tfl{N?GyS_6 z3+tZPeSOS=k#BjS>(f75Q`2EhwX*hMsK_@Kv&ZT;SydBky3mDR6_J}cL*_TtV}7>H zA+wumr}b9v46coS`}(TY;qmaR$9wg^82X@n)jvIvzps*~J`|Fl