Skip to content

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit ab32e2f

Browse filesBrowse files
committed
add test
1 parent 5d0a5a9 commit ab32e2f
Copy full SHA for ab32e2f

File tree

1 file changed

+83
-0
lines changed
Filter options

1 file changed

+83
-0
lines changed

‎be/test/vec/exec/format/table/iceberg/arrow_schema_util_test.cpp

Copy file name to clipboardExpand all lines: be/test/vec/exec/format/table/iceberg/arrow_schema_util_test.cpp
+83
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,17 @@
1717

1818
#include "vec/exec/format/table/iceberg/arrow_schema_util.h"
1919

20+
#include <arrow/api.h>
21+
#include <arrow/io/api.h>
22+
#include <arrow/status.h>
2023
#include <arrow/type.h>
2124
#include <arrow/util/key_value_metadata.h>
2225
#include <gtest/gtest.h>
26+
#include <parquet/api/reader.h>
27+
#include <parquet/arrow/writer.h>
28+
#include <parquet/schema.h>
2329

30+
#include "io/fs/local_file_system.h"
2431
#include "vec/exec/format/table/iceberg/schema.h"
2532
#include "vec/exec/format/table/iceberg/schema_parser.h"
2633

@@ -217,5 +224,81 @@ TEST(ArrowSchemaUtilTest, test_list_field) {
217224
EXPECT_EQ("32", arrow_list->value_field()->metadata()->Get(pfid).ValueUnsafe());
218225
}
219226

227+
TEST(ArrowSchemaUtilTest, test_parquet_filed_id) {
228+
std::string test_dir = "ut_dir/test_parquet_filed_id";
229+
Status st;
230+
st = io::global_local_filesystem()->delete_directory(test_dir);
231+
ASSERT_TRUE(st.ok()) << st;
232+
st = io::global_local_filesystem()->create_directory(test_dir);
233+
ASSERT_TRUE(st.ok()) << st;
234+
235+
std::shared_ptr<arrow::Array> id_array;
236+
std::shared_ptr<arrow::Array> name_array;
237+
238+
arrow::Int32Builder id_builder;
239+
ASSERT_TRUE(id_builder.Append(1).ok());
240+
ASSERT_TRUE(id_builder.Append(2).ok());
241+
ASSERT_TRUE(id_builder.Append(3).ok());
242+
auto&& result_id = id_builder.Finish();
243+
ASSERT_TRUE(result_id.ok());
244+
id_array = std::move(result_id).ValueUnsafe();
245+
246+
arrow::StringBuilder name_builder;
247+
ASSERT_TRUE(name_builder.Append("Alice").ok());
248+
ASSERT_TRUE(name_builder.Append("Bob").ok());
249+
ASSERT_TRUE(name_builder.Append("Charlie").ok());
250+
auto&& result_name = name_builder.Finish();
251+
ASSERT_TRUE(result_name.ok());
252+
name_array = std::move(result_name).ValueUnsafe();
253+
254+
// 定义表的 Schema
255+
std::vector<NestedField> nested_fields;
256+
nested_fields.reserve(2);
257+
NestedField field1(false, 17, "field_1", std::make_unique<IntegerType>(), std::nullopt);
258+
NestedField field2(false, 36, "field_2", std::make_unique<StringType>(), std::nullopt);
259+
nested_fields.emplace_back(std::move(field1));
260+
nested_fields.emplace_back(std::move(field2));
261+
262+
Schema schema(1, std::move(nested_fields));
263+
264+
std::vector<std::shared_ptr<arrow::Field>> fields;
265+
st = ArrowSchemaUtil::convert(&schema, "utc", fields);
266+
auto arrow_schema = arrow::schema(fields);
267+
268+
// create arrow table
269+
auto table = arrow::Table::Make(arrow_schema, {id_array, name_array});
270+
271+
std::string file_path = test_dir + "/f1.parquet";
272+
std::shared_ptr<arrow::io::FileOutputStream> outfile;
273+
auto&& result_file = arrow::io::FileOutputStream::Open(file_path);
274+
ASSERT_TRUE(result_file.ok());
275+
outfile = std::move(result_file).ValueUnsafe();
276+
277+
// arrow table to parquet file
278+
PARQUET_THROW_NOT_OK(
279+
parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), outfile, 1024));
280+
281+
// open parquet with parquet's API
282+
std::unique_ptr<parquet::ParquetFileReader> parquet_reader =
283+
parquet::ParquetFileReader::OpenFile(file_path, false);
284+
285+
// get MessageType
286+
std::shared_ptr<parquet::FileMetaData> file_metadata = parquet_reader->metadata();
287+
auto schema_descriptor = file_metadata->schema();
288+
const parquet::schema::Node& root = *schema_descriptor->group_node();
289+
const auto& group_node = static_cast<const parquet::schema::GroupNode&>(root);
290+
291+
EXPECT_EQ(2, group_node.field_count());
292+
auto filed1 = group_node.field(0);
293+
auto filed2 = group_node.field(1);
294+
EXPECT_EQ("field_1", filed1->name());
295+
EXPECT_EQ(17, filed1->field_id());
296+
EXPECT_EQ("field_2", filed2->name());
297+
EXPECT_EQ(36, filed2->field_id());
298+
299+
st = io::global_local_filesystem()->delete_directory(test_dir);
300+
EXPECT_TRUE(st.ok()) << st;
301+
}
302+
220303
} // namespace iceberg
221304
} // namespace doris

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.