@@ -22,7 +22,11 @@ use std::sync::Arc;
2222use substrait:: proto:: expression_reference:: ExprType ;
2323
2424use datafusion:: arrow:: datatypes:: { Field , IntervalUnit } ;
25- use datafusion:: logical_expr:: { Aggregate , Distinct , EmptyRelation , Extension , Filter , Join , Like , Limit , Partitioning , Projection , Repartition , Sort , SortExpr , SubqueryAlias , TableScan , TableSource , TryCast , Union , Values , Window , WindowFrameUnits } ;
25+ use datafusion:: logical_expr:: {
26+ Aggregate , Distinct , EmptyRelation , Extension , Filter , Join , Like , Limit ,
27+ Partitioning , Projection , Repartition , Sort , SortExpr , SubqueryAlias , TableScan ,
28+ TableSource , TryCast , Union , Values , Window , WindowFrameUnits ,
29+ } ;
2630use datafusion:: {
2731 arrow:: datatypes:: { DataType , TimeUnit } ,
2832 error:: { DataFusionError , Result } ,
@@ -50,9 +54,10 @@ use datafusion::execution::SessionState;
5054use datafusion:: logical_expr:: expr:: {
5155 Alias , BinaryExpr , Case , Cast , GroupingSet , InList , InSubquery , WindowFunction ,
5256} ;
57+ use datafusion:: logical_expr:: registry:: NamedBytes ;
5358use datafusion:: logical_expr:: { expr, Between , JoinConstraint , LogicalPlan , Operator } ;
5459use datafusion:: prelude:: Expr ;
55- use pbjson_types:: { Any as ProtoAny , Any } ;
60+ use pbjson_types:: Any as ProtoAny ;
5661use substrait:: proto:: exchange_rel:: { ExchangeKind , RoundRobin , ScatterFields } ;
5762use substrait:: proto:: expression:: cast:: FailureBehavior ;
5863use substrait:: proto:: expression:: field_reference:: { RootReference , RootType } ;
@@ -66,8 +71,8 @@ use substrait::proto::expression::subquery::InPredicate;
6671use substrait:: proto:: expression:: window_function:: BoundsType ;
6772use substrait:: proto:: expression:: ScalarFunction ;
6873use substrait:: proto:: read_rel:: { ExtensionTable , VirtualTable } ;
69- use substrait:: proto:: rel_common:: EmitKind ;
7074use substrait:: proto:: rel_common:: EmitKind :: Emit ;
75+ use substrait:: proto:: rel_common:: { EmitKind , Hint } ;
7176use substrait:: proto:: {
7277 fetch_rel, rel_common, ExchangeRel , ExpressionReference , ExtendedExpression ,
7378 RelCommon ,
@@ -363,10 +368,10 @@ pub trait SubstraitProducer: Send + Sync + Sized {
363368 from_in_subquery ( self , in_subquery, schema)
364369 }
365370
366- fn handle_extension_table (
371+ fn handle_custom_table (
367372 & mut self ,
368373 _table : & dyn TableSource ,
369- ) -> Result < ExtensionTable > {
374+ ) -> Result < Option < ExtensionTable > > {
370375 not_impl_err ! ( "Not implemented" )
371376 }
372377}
@@ -395,12 +400,12 @@ impl SubstraitProducer for DefaultSubstraitProducer<'_> {
395400 }
396401
397402 fn handle_extension ( & mut self , plan : & Extension ) -> Result < Box < Rel > > {
398- let extension_bytes = self
403+ let NamedBytes ( qualifier , bytes ) = self
399404 . serializer_registry
400405 . serialize_logical_plan ( plan. node . as_ref ( ) ) ?;
401406 let detail = ProtoAny {
402- type_url : plan . node . name ( ) . to_string ( ) ,
403- value : extension_bytes . into ( ) ,
407+ type_url : qualifier . to_string ( ) ,
408+ value : bytes . to_owned ( ) . into ( ) ,
404409 } ;
405410 let mut inputs_rel = plan
406411 . node
@@ -429,14 +434,22 @@ impl SubstraitProducer for DefaultSubstraitProducer<'_> {
429434 } ) )
430435 }
431436
432- fn handle_extension_table ( & mut self , table : & dyn TableSource ) -> Result < ExtensionTable > {
433- let bytes = self . serializer_registry . serialize_custom_table ( table) ?;
434- Ok ( ExtensionTable {
435- detail : Some ( Any {
436- type_url : "/substrait.ExtensionTable" . into ( ) ,
437- value : bytes. into ( ) ,
438- } )
439- } )
437+ fn handle_custom_table (
438+ & mut self ,
439+ table : & dyn TableSource ,
440+ ) -> Result < Option < ExtensionTable > > {
441+ if let Some ( NamedBytes ( qualifier, bytes) ) =
442+ self . serializer_registry . serialize_custom_table ( table) ?
443+ {
444+ Ok ( Some ( ExtensionTable {
445+ detail : Some ( ProtoAny {
446+ type_url : qualifier. to_string ( ) ,
447+ value : bytes. to_owned ( ) . into ( ) ,
448+ } ) ,
449+ } ) )
450+ } else {
451+ Ok ( None )
452+ }
440453 }
441454}
442455
@@ -572,21 +585,31 @@ pub fn from_table_scan(
572585 let table_schema = scan. source . schema ( ) . to_dfschema_ref ( ) ?;
573586 let base_schema = to_substrait_named_struct ( & table_schema) ?;
574587
575- let table = if let Ok ( ext_table) = producer
576- . handle_extension_table ( scan. source . as_ref ( ) )
577- {
578- ReadType :: ExtensionTable ( ext_table)
579- } else {
580- ReadType :: NamedTable ( NamedTable {
581- names : scan. table_name . to_vec ( ) ,
582- advanced_extension : None ,
583- } )
584- } ;
585-
588+ let ( table, common) =
589+ if let Ok ( Some ( ext_table) ) = producer. handle_custom_table ( scan. source . as_ref ( ) ) {
590+ (
591+ ReadType :: ExtensionTable ( ext_table) ,
592+ Some ( RelCommon {
593+ hint : Some ( Hint {
594+ alias : scan. table_name . to_string ( ) ,
595+ ..Default :: default ( )
596+ } ) ,
597+ ..Default :: default ( )
598+ } ) ,
599+ )
600+ } else {
601+ (
602+ ReadType :: NamedTable ( NamedTable {
603+ names : scan. table_name . to_vec ( ) ,
604+ advanced_extension : None ,
605+ } ) ,
606+ None ,
607+ )
608+ } ;
586609
587610 Ok ( Box :: new ( Rel {
588611 rel_type : Some ( RelType :: Read ( Box :: new ( ReadRel {
589- common : None ,
612+ common,
590613 base_schema : Some ( base_schema) ,
591614 filter : None ,
592615 best_effort_filter : None ,
@@ -1715,7 +1738,7 @@ pub fn from_in_subquery(
17151738 subquery_type : Some (
17161739 substrait:: proto:: expression:: subquery:: SubqueryType :: InPredicate (
17171740 Box :: new ( InPredicate {
1718- needles : ( vec ! [ substrait_expr] ) ,
1741+ needles : vec ! [ substrait_expr] ,
17191742 haystack : Some ( subquery_plan) ,
17201743 } ) ,
17211744 ) ,
@@ -2909,6 +2932,7 @@ mod test {
29092932 #[ tokio:: test]
29102933 async fn round_trip_extension_table ( ) {
29112934 const TABLE_NAME : & str = "custom_table" ;
2935+ const TYPE_URL : & str = "/substrait.test.CustomTable" ;
29122936 const SERIALIZED : & [ u8 ] = "table definition" . as_bytes ( ) ;
29132937
29142938 fn custom_table ( ) -> Arc < dyn TableProvider > {
@@ -2921,9 +2945,12 @@ mod test {
29212945 #[ derive( Debug ) ]
29222946 struct Registry ;
29232947 impl SerializerRegistry for Registry {
2924- fn serialize_custom_table ( & self , table : & dyn TableSource ) -> Result < Vec < u8 > > {
2948+ fn serialize_custom_table (
2949+ & self ,
2950+ table : & dyn TableSource ,
2951+ ) -> Result < Option < NamedBytes > > {
29252952 if table. schema ( ) == custom_table ( ) . schema ( ) {
2926- Ok ( SERIALIZED . to_vec ( ) )
2953+ Ok ( Some ( NamedBytes ( TYPE_URL . to_string ( ) , SERIALIZED . to_vec ( ) ) ) )
29272954 } else {
29282955 Err ( DataFusionError :: Internal ( "Not our table" . into ( ) ) )
29292956 }
@@ -2933,7 +2960,7 @@ mod test {
29332960 name : & str ,
29342961 bytes : & [ u8 ] ,
29352962 ) -> Result < Arc < dyn TableSource > > {
2936- if name == TABLE_NAME && bytes == SERIALIZED {
2963+ if name == TYPE_URL && bytes == SERIALIZED {
29372964 Ok ( Arc :: new ( DefaultTableSource :: new ( custom_table ( ) ) ) )
29382965 } else {
29392966 panic ! ( "Unexpected extension table: {name}" ) ;
@@ -2965,7 +2992,7 @@ mod test {
29652992 assert_contains ! (
29662993 // confirm that the Substrait plan contains our custom_table as an ExtensionTable
29672994 serde_json:: to_string( substrait. as_ref( ) ) . unwrap( ) ,
2968- format!( r#""extensionTable":{{"detail":{{"typeUrl":"{TABLE_NAME }","# )
2995+ format!( r#""extensionTable":{{"detail":{{"typeUrl":"{TYPE_URL }","# )
29692996 ) ;
29702997 remote // make sure the restored plan is fully working in the remote context
29712998 . execute_logical_plan ( restored. clone ( ) )
0 commit comments