2222from collections .abc import Callable
2323
2424from pandas import DataFrame as PandasDataFrame
25+ from pandas .core .generic import NDFrame as PandasNDFrame
2526from pyarrow import feather
27+ from rpy2 .rinterface_lib .sexp import NULLType
2628from rpy2 .robjects import StrVector
2729from rpy2 .robjects import globalenv as rpy2_globalenv
2830from rpy2 .robjects import r as rpy2_r_interface
3234from pysits .backend .pkgs import r_pkg_arrow , r_pkg_base , r_pkg_sits
3335
3436
37+ #
38+ # Helper functions
39+ #
3540def _load_arrow_table_reader_function () -> Callable [[str , list [str ]], RDataFrame ]:
3641 """Load and return an R function for reading Arrow tables with nested columns.
3742
@@ -53,15 +58,40 @@ def _load_arrow_table_reader_function() -> Callable[[str, list[str]], RDataFrame
5358
5459 for (col in nested_cols) {
5560 row_nested <- row_data[[col]]
56- row_nested <- list(tidyr::unnest(
57- row_nested,
58- cols = dplyr::everything()
59- ))
60- row_data[[col]] <- NULL
61- row_data <- tibble::tibble(
62- row_data,
63- !!col := row_nested
64- )
61+
62+ # Handle arrow_list class
63+ if (inherits(row_nested, "arrow_list")) {
64+ row_nested <- lapply(row_nested, function(v) {
65+ if (is.null(v)) return(NULL)
66+ # Try to parse as JSON first
67+ tryCatch({
68+ parsed <- jsonlite::fromJSON(v)
69+ setNames(as.character(parsed), names(parsed))
70+ }, error = function(e) {
71+ # If JSON parsing fails, return NULL
72+ NULL
73+ })
74+ })
75+ # If any values in row_nested are NULL, set the whole
76+ # thing to NULL
77+ if (any(sapply(row_nested, is.null))) {
78+ row_nested <- NULL
79+ }
80+ } else {
81+ row_nested <- list(tidyr::unnest(
82+ row_nested,
83+ cols = dplyr::everything()
84+ ))
85+ }
86+
87+ # Only create tibble if row_nested is not NULL
88+ if (!is.null(row_nested)) {
89+ row_data[[col]] <- NULL
90+ row_data <- tibble::tibble(
91+ row_data,
92+ !!col := row_nested
93+ )
94+ }
6595 }
6696 row_data
6797 })
@@ -71,8 +101,41 @@ def _load_arrow_table_reader_function() -> Callable[[str, list[str]], RDataFrame
71101 return rpy2_globalenv ["load_arrow_table" ]
72102
73103
104+ def _named_vector_to_json (x : RDataFrame , colname : str ) -> RDataFrame :
105+ """Convert a named vector to a JSON string.
106+
107+ Args:
108+ x (RDataFrame): R DataFrame containing a column with named vectors
109+
110+ colname (str): Name of the column containing named vectors.
111+
112+ Returns:
113+ RDataFrame: DataFrame with named vectors converted to JSON strings
114+ """
115+ # Define R code to convert named vector to JSON
116+ rpy2_r_interface (f"""
117+ named_vector_to_json <- function(x) {{
118+ vec_list <- lapply(x${ colname } , function(v) {{
119+ if (is.null(names(v))) return(NULL)
120+ class(v) <- NULL
121+ json <- jsonlite::toJSON(as.list(setNames(as.character(v), names(v))),
122+ auto_unbox=TRUE)
123+ class(json) <- NULL
124+ json
125+ }})
126+ x${ colname } <- vec_list
127+ x
128+ }}
129+ """ )
130+
131+ # Call the R function and return result
132+ return rpy2_globalenv ["named_vector_to_json" ](x )
133+
134+
74135def _tibble_to_pandas_arrow (
75- instance : RDataFrame , nested_columns : list [str ] | None = None
136+ instance : RDataFrame ,
137+ nested_columns : list [str ] | None = None ,
138+ table_processor : Callable [[RDataFrame ], RDataFrame ] | None = None ,
76139) -> PandasDataFrame :
77140 """Convert an R DataFrame (tibble) to a Pandas DataFrame using Arrow format.
78141
@@ -113,6 +176,10 @@ def _tibble_to_pandas_arrow(
113176 # Select regular columns (using ``[]``) and convert to Pandas
114177 rdf_data = instance .rx (StrVector (data_columns_valid ))
115178
179+ # Process table
180+ if table_processor :
181+ rdf_data = table_processor (rdf_data )
182+
116183 # Write to Feather format
117184 r_pkg_arrow .write_feather (rdf_data , tmp_path )
118185
@@ -171,7 +238,11 @@ def _pandas_to_tibble_arrow(
171238 # Convert nested columns to R DataFrame
172239 for nested_column in nested_columns :
173240 instance [nested_column ] = instance [nested_column ].apply (
174- lambda arr : arr .to_dict (orient = "list" )
241+ lambda arr : (
242+ arr .to_dict (orient = "list" )
243+ if isinstance (arr , PandasNDFrame )
244+ else arr
245+ )
175246 )
176247
177248 # Write to Feather
@@ -188,7 +259,9 @@ def _pandas_to_tibble_arrow(
188259# General conversions
189260#
190261def tibble_nested_to_pandas_arrow (
191- data : RDataFrame , nested_columns : list [str ]
262+ data : RDataFrame ,
263+ nested_columns : list [str ],
264+ table_processor : Callable [[RDataFrame ], RDataFrame ] | None = None ,
192265) -> PandasDataFrame :
193266 """Convert any tibble to Pandas DataFrame.
194267
@@ -198,7 +271,7 @@ def tibble_nested_to_pandas_arrow(
198271 Returns:
199272 pandas.DataFrame: R Data Frame as Pandas.
200273 """
201- return _tibble_to_pandas_arrow (data , nested_columns )
274+ return _tibble_to_pandas_arrow (data , nested_columns , table_processor )
202275
203276
204277def pandas_to_tibble_arrow (
@@ -233,6 +306,7 @@ def tibble_sits_to_pandas_arrow(data: RDataFrame) -> PandasDataFrame:
233306 "label" ,
234307 "cube" ,
235308 "time_series" ,
309+ "predicted" ,
236310 "cluster" ,
237311 ]
238312
@@ -258,11 +332,17 @@ def pandas_sits_to_tibble_arrow(data: PandasDataFrame) -> RDataFrame:
258332 # Define nested columns
259333 nested_columns = ["time_series" , "predicted" ]
260334
335+ # Define data classes
336+ data_classes = ["sits" , "tbl_df" , "tbl" , "data.frame" ]
337+
338+ if "predicted" in data .columns :
339+ data_classes .append ("predicted" )
340+
261341 # Convert to R DataFrame
262342 data = pandas_to_tibble_arrow (data , nested_columns )
263343
264344 # Set class
265- data .rclass = StrVector ([ "sits" , "tbl_df" , "tbl" , "data.frame" ] )
345+ data .rclass = StrVector (data_classes )
266346
267347 # Convert to R DataFrame
268348 return data
@@ -297,8 +377,30 @@ def tibble_cube_to_pandas_arrow(data: RDataFrame) -> PandasDataFrame:
297377 # Define nested columns
298378 nested_columns = ["file_info" , "vector_info" ]
299379
380+ # Define table processor
381+ def table_processor (x : RDataFrame ) -> RDataFrame :
382+ """Process table."""
383+
384+ # Process ``labels`` column
385+ if "labels" in x .colnames :
386+ # Get labels column
387+ labels = x .rx2 ("labels" )
388+
389+ # Check if labels have names
390+ labels_has_names = all (
391+ not isinstance (label .names , NULLType ) for label in labels
392+ )
393+
394+ # If labels have names, convert to JSON
395+ if labels_has_names :
396+ x = _named_vector_to_json (x , "labels" )
397+
398+ return x
399+
300400 # Convert to Pandas DataFrame
301- data_converted = tibble_nested_to_pandas_arrow (data , nested_columns )
401+ data_converted = tibble_nested_to_pandas_arrow (
402+ data , nested_columns , table_processor
403+ )
302404
303405 # Select columns
304406 columns_available = [v for v in column_order if v in data_converted .columns ]
@@ -314,7 +416,7 @@ def pandas_cube_to_tibble_arrow(data: PandasDataFrame) -> RDataFrame:
314416 data (pandas.DataFrame): The pandas DataFrame to convert to R.
315417 """
316418 # Define nested columns
317- nested_columns = ["file_info" , "vector_info" ]
419+ nested_columns = ["labels" , " file_info" , "vector_info" ]
318420
319421 # Convert to R DataFrame
320422 data = pandas_to_tibble_arrow (data , nested_columns )
0 commit comments