17
17
18
18
#include " vec/exec/format/table/iceberg/arrow_schema_util.h"
19
19
20
+ #include < arrow/api.h>
21
+ #include < arrow/io/api.h>
22
+ #include < arrow/status.h>
20
23
#include < arrow/type.h>
21
24
#include < arrow/util/key_value_metadata.h>
22
25
#include < gtest/gtest.h>
26
+ #include < parquet/api/reader.h>
27
+ #include < parquet/arrow/writer.h>
28
+ #include < parquet/schema.h>
23
29
30
+ #include " io/fs/local_file_system.h"
24
31
#include " vec/exec/format/table/iceberg/schema.h"
25
32
#include " vec/exec/format/table/iceberg/schema_parser.h"
26
33
@@ -217,5 +224,81 @@ TEST(ArrowSchemaUtilTest, test_list_field) {
217
224
EXPECT_EQ (" 32" , arrow_list->value_field ()->metadata ()->Get (pfid).ValueUnsafe ());
218
225
}
219
226
227
+ TEST (ArrowSchemaUtilTest, test_parquet_filed_id) {
228
+ std::string test_dir = " ut_dir/test_parquet_filed_id" ;
229
+ Status st;
230
+ st = io::global_local_filesystem ()->delete_directory (test_dir);
231
+ ASSERT_TRUE (st.ok ()) << st;
232
+ st = io::global_local_filesystem ()->create_directory (test_dir);
233
+ ASSERT_TRUE (st.ok ()) << st;
234
+
235
+ std::shared_ptr<arrow::Array> id_array;
236
+ std::shared_ptr<arrow::Array> name_array;
237
+
238
+ arrow::Int32Builder id_builder;
239
+ ASSERT_TRUE (id_builder.Append (1 ).ok ());
240
+ ASSERT_TRUE (id_builder.Append (2 ).ok ());
241
+ ASSERT_TRUE (id_builder.Append (3 ).ok ());
242
+ auto && result_id = id_builder.Finish ();
243
+ ASSERT_TRUE (result_id.ok ());
244
+ id_array = std::move (result_id).ValueUnsafe ();
245
+
246
+ arrow::StringBuilder name_builder;
247
+ ASSERT_TRUE (name_builder.Append (" Alice" ).ok ());
248
+ ASSERT_TRUE (name_builder.Append (" Bob" ).ok ());
249
+ ASSERT_TRUE (name_builder.Append (" Charlie" ).ok ());
250
+ auto && result_name = name_builder.Finish ();
251
+ ASSERT_TRUE (result_name.ok ());
252
+ name_array = std::move (result_name).ValueUnsafe ();
253
+
254
+ // 定义表的 Schema
255
+ std::vector<NestedField> nested_fields;
256
+ nested_fields.reserve (2 );
257
+ NestedField field1 (false , 17 , " field_1" , std::make_unique<IntegerType>(), std::nullopt);
258
+ NestedField field2 (false , 36 , " field_2" , std::make_unique<StringType>(), std::nullopt);
259
+ nested_fields.emplace_back (std::move (field1));
260
+ nested_fields.emplace_back (std::move (field2));
261
+
262
+ Schema schema (1 , std::move (nested_fields));
263
+
264
+ std::vector<std::shared_ptr<arrow::Field>> fields;
265
+ st = ArrowSchemaUtil::convert (&schema, " utc" , fields);
266
+ auto arrow_schema = arrow::schema (fields);
267
+
268
+ // create arrow table
269
+ auto table = arrow::Table::Make (arrow_schema, {id_array, name_array});
270
+
271
+ std::string file_path = test_dir + " /f1.parquet" ;
272
+ std::shared_ptr<arrow::io::FileOutputStream> outfile;
273
+ auto && result_file = arrow::io::FileOutputStream::Open (file_path);
274
+ ASSERT_TRUE (result_file.ok ());
275
+ outfile = std::move (result_file).ValueUnsafe ();
276
+
277
+ // arrow table to parquet file
278
+ PARQUET_THROW_NOT_OK (
279
+ parquet::arrow::WriteTable (*table, arrow::default_memory_pool (), outfile, 1024 ));
280
+
281
+ // open parquet with parquet's API
282
+ std::unique_ptr<parquet::ParquetFileReader> parquet_reader =
283
+ parquet::ParquetFileReader::OpenFile (file_path, false );
284
+
285
+ // get MessageType
286
+ std::shared_ptr<parquet::FileMetaData> file_metadata = parquet_reader->metadata ();
287
+ auto schema_descriptor = file_metadata->schema ();
288
+ const parquet::schema::Node& root = *schema_descriptor->group_node ();
289
+ const auto & group_node = static_cast <const parquet::schema::GroupNode&>(root);
290
+
291
+ EXPECT_EQ (2 , group_node.field_count ());
292
+ auto filed1 = group_node.field (0 );
293
+ auto filed2 = group_node.field (1 );
294
+ EXPECT_EQ (" field_1" , filed1->name ());
295
+ EXPECT_EQ (17 , filed1->field_id ());
296
+ EXPECT_EQ (" field_2" , filed2->name ());
297
+ EXPECT_EQ (36 , filed2->field_id ());
298
+
299
+ st = io::global_local_filesystem ()->delete_directory (test_dir);
300
+ EXPECT_TRUE (st.ok ()) << st;
301
+ }
302
+
220
303
} // namespace iceberg
221
304
} // namespace doris
0 commit comments