@@ -323,9 +323,6 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr& doris_column, DataType
323
323
size_t * read_rows, bool * eof, bool is_dict_filter,
324
324
bool align_rows) {
325
325
std::unique_ptr<FilterMap> nested_filter_map;
326
- std::unique_ptr<std::vector<uint8_t >> nested_filter_map_data;
327
-
328
- size_t current_row;
329
326
330
327
FilterMap* current_filter_map = &filter_map;
331
328
size_t origin_size = 0 ;
@@ -337,17 +334,22 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr& doris_column, DataType
337
334
} else {
338
335
_rep_levels.resize (0 );
339
336
_def_levels.resize (0 );
337
+ if (_nested_filter_map_data) {
338
+ _nested_filter_map_data->resize (0 );
339
+ }
340
340
}
341
341
size_t parsed_rows = 0 ;
342
342
size_t remaining_values = _chunk_reader->remaining_num_values ();
343
343
bool has_rep_level = _chunk_reader->max_rep_level () > 0 ;
344
344
bool has_def_level = _chunk_reader->max_def_level () > 0 ;
345
345
346
+ // Handle repetition levels (indicates nesting structure)
346
347
if (has_rep_level) {
347
348
LevelDecoder& rep_decoder = _chunk_reader->rep_level_decoder ();
349
+ // Read repetition levels until batch is full or no more values
348
350
while (parsed_rows <= batch_size && remaining_values > 0 ) {
349
351
level_t rep_level = rep_decoder.get_next ();
350
- if (rep_level == 0 ) {
352
+ if (rep_level == 0 ) { // rep_level 0 indicates start of new row
351
353
if (parsed_rows == batch_size) {
352
354
rep_decoder.rewind_one ();
353
355
break ;
@@ -358,13 +360,15 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr& doris_column, DataType
358
360
remaining_values--;
359
361
}
360
362
361
- if (filter_map.has_filter ()) {
362
- nested_filter_map_data = std::make_unique<std::vector<uint8_t >>();
363
- nested_filter_map_data->resize (_rep_levels.size ());
364
- current_row = _orig_filter_map_index;
363
+ // Generate nested filter map
364
+ if (filter_map.has_filter () && (!filter_map.filter_all ())) {
365
+ if (_nested_filter_map_data == nullptr ) {
366
+ _nested_filter_map_data.reset (new std::vector<uint8_t >());
367
+ }
365
368
RETURN_IF_ERROR (filter_map.generate_nested_filter_map (
366
- _rep_levels, *nested_filter_map_data, &nested_filter_map, ¤t_row, false ,
367
- 0 ));
369
+ _rep_levels, *_nested_filter_map_data, &nested_filter_map,
370
+ &_orig_filter_map_index, origin_size));
371
+ // Update current_filter_map to nested_filter_map
368
372
current_filter_map = nested_filter_map.get ();
369
373
}
370
374
} else if (!align_rows) {
@@ -374,15 +378,16 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr& doris_column, DataType
374
378
_rep_levels.resize (parsed_rows, 0 );
375
379
}
376
380
381
+ // Process definition levels (indicates null values)
377
382
size_t parsed_values = _chunk_reader->remaining_num_values () - remaining_values;
378
-
379
383
_def_levels.resize (origin_size + parsed_values);
380
384
if (has_def_level) {
381
385
_chunk_reader->def_level_decoder ().get_levels (&_def_levels[origin_size], parsed_values);
382
386
} else {
383
387
std::fill (_def_levels.begin () + origin_size, _def_levels.end (), 0 );
384
388
}
385
389
390
+ // Handle nullable columns
386
391
MutableColumnPtr data_column;
387
392
std::vector<uint16_t > null_map;
388
393
NullMap* map_data_column = nullptr ;
@@ -399,6 +404,7 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr& doris_column, DataType
399
404
data_column = doris_column->assume_mutable ();
400
405
}
401
406
407
+ // Process definition levels to build null map
402
408
size_t has_read = origin_size;
403
409
size_t ancestor_nulls = 0 ;
404
410
size_t null_size = 0 ;
@@ -445,7 +451,9 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr& doris_column, DataType
445
451
446
452
size_t num_values = parsed_values - ancestor_nulls;
447
453
454
+ // Handle filtered values
448
455
if (current_filter_map->filter_all ()) {
456
+ // Skip all values if everything is filtered
449
457
if (null_size > 0 ) {
450
458
RETURN_IF_ERROR (_chunk_reader->skip_values (null_size, false ));
451
459
}
@@ -461,7 +469,7 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr& doris_column, DataType
461
469
SCOPED_RAW_TIMER (&_decode_null_map_time);
462
470
RETURN_IF_ERROR (
463
471
select_vector.init (null_map, num_values, map_data_column, current_filter_map,
464
- nested_filter_map_data ? origin_size : _filter_map_index));
472
+ _nested_filter_map_data ? origin_size : _filter_map_index));
465
473
}
466
474
467
475
RETURN_IF_ERROR (
@@ -470,11 +478,10 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr& doris_column, DataType
470
478
RETURN_IF_ERROR (_chunk_reader->skip_values (ancestor_nulls, false ));
471
479
}
472
480
}
473
-
474
- if (!align_rows) {
475
- *read_rows = parsed_rows;
476
- }
481
+ *read_rows += parsed_rows;
477
482
_filter_map_index += parsed_values;
483
+
484
+ // Handle cross-page reading
478
485
if (_chunk_reader->remaining_num_values () == 0 ) {
479
486
if (_chunk_reader->has_next_page ()) {
480
487
RETURN_IF_ERROR (_chunk_reader->next_page ());
@@ -486,6 +493,7 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr& doris_column, DataType
486
493
}
487
494
}
488
495
496
+ // Apply filtering to repetition and definition levels
489
497
if (current_filter_map->has_filter ()) {
490
498
if (current_filter_map->filter_all ()) {
491
499
_rep_levels.resize (0 );
@@ -510,7 +518,8 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr& doris_column, DataType
510
518
}
511
519
}
512
520
513
- _orig_filter_map_index = current_row + 1 ;
521
+ // Prepare for next row
522
+ ++_orig_filter_map_index;
514
523
515
524
if (_rep_levels.size () > 0 ) {
516
525
// make sure the rows of complex type are aligned correctly,
0 commit comments