@@ -8,7 +8,7 @@ use super::{PhysicalPredicate, ScanMetadata};
88use crate :: actions:: deletion_vector:: DeletionVectorDescriptor ;
99use crate :: actions:: get_log_add_schema;
1010use crate :: engine_data:: { GetData , RowVisitor , TypedGetData as _} ;
11- use crate :: expressions:: { column_name, ColumnName , Expression , ExpressionRef , PredicateRef } ;
11+ use crate :: expressions:: { column_name, ColumnName , Expression , ExpressionRef , PredicateRef , UnaryExpressionOp } ;
1212use crate :: kernel_predicates:: { DefaultKernelPredicateEvaluator , KernelPredicateEvaluator as _} ;
1313use crate :: log_replay:: { ActionsBatch , FileActionDeduplicator , FileActionKey , LogReplayProcessor } ;
1414use crate :: scan:: Scalar ;
@@ -44,6 +44,7 @@ pub(crate) struct ScanLogReplayProcessor {
4444 partition_filter : Option < PredicateRef > ,
4545 data_skipping_filter : Option < DataSkippingFilter > ,
4646 add_transform : Arc < dyn ExpressionEvaluator > ,
47+ add_checkpoint_transform : Arc < dyn ExpressionEvaluator > ,
4748 state_info : Arc < StateInfo > ,
4849 /// A set of (data file path, dv_unique_id) pairs that have been seen thus
4950 /// far in the log. This is used to filter out files with Remove actions as
@@ -84,6 +85,11 @@ impl ScanLogReplayProcessor {
8485 partition_filter : physical_predicate. as_ref ( ) . map ( |( e, _) | e. clone ( ) ) ,
8586 data_skipping_filter,
8687 add_transform : engine. evaluation_handler ( ) . new_expression_evaluator (
88+ get_log_add_schema ( ) . clone ( ) ,
89+ get_add_transform_expr ( false ) ,
90+ SCAN_ROW_DATATYPE . clone ( ) ,
91+ ) ?,
92+ add_checkpoint_transform : engine. evaluation_handler ( ) . new_expression_evaluator (
8793 get_log_add_schema ( ) . clone ( ) ,
8894 get_add_transform_expr ( skip_stats) ,
8995 SCAN_ROW_DATATYPE . clone ( ) ,
@@ -317,11 +323,21 @@ pub(crate) static SCAN_ROW_SCHEMA: LazyLock<Arc<StructType>> = LazyLock::new(||
317323pub ( crate ) static SCAN_ROW_DATATYPE : LazyLock < DataType > =
318324 LazyLock :: new ( || SCAN_ROW_SCHEMA . clone ( ) . into ( ) ) ;
319325
326+ static STATS_JSON_EXPR : LazyLock < ExpressionRef > = LazyLock :: new ( || {
327+ Arc :: new (
328+ Expression :: unary (
329+ UnaryExpressionOp :: ToJson ,
330+ Expression :: column ( [ "add" , "stats_parsed" ] ) ,
331+ )
332+ )
333+ } ) ;
334+
320335fn get_add_transform_expr ( skip_stats : bool ) -> ExpressionRef {
321336 use crate :: expressions:: column_expr_ref;
322337
323338 let stats_expr = if skip_stats {
324- Arc :: new ( Expression :: Literal ( Scalar :: Null ( DataType :: STRING ) ) )
339+ // Arc::new(Expression::Literal(Scalar::Null(DataType::STRING)))
340+ STATS_JSON_EXPR . clone ( )
325341 } else {
326342 column_expr_ref ! ( "add.stats" )
327343 } ;
@@ -387,7 +403,11 @@ impl LogReplayProcessor for ScanLogReplayProcessor {
387403 visitor. visit_rows_of ( actions. as_ref ( ) ) ?;
388404
389405 // TODO: Teach expression eval to respect the selection vector we just computed so carefully!
390- let result = self . add_transform . evaluate ( actions. as_ref ( ) ) ?;
406+ let result = if is_log_batch {
407+ self . add_transform . evaluate ( actions. as_ref ( ) ) ?
408+ } else {
409+ self . add_checkpoint_transform . evaluate ( actions. as_ref ( ) ) ?
410+ } ;
391411 ScanMetadata :: try_new (
392412 result,
393413 visitor. selection_vector ,
0 commit comments