@@ -193,21 +193,40 @@ encode_semantic_tokens <- function(tokens) {
193193 return (list (data = integer(0 )))
194194 }
195195
196- # Convert tokens list to vectors for efficient processing
197- # Defensive: coerce all to integer in case of mixed types
198- lines <- as.integer(vapply(tokens , function (t ) t $ line , 0.0 ))
199- cols <- as.integer(vapply(tokens , function (t ) t $ col , 0.0 ))
200- lengths <- as.integer(vapply(tokens , function (t ) t $ length , 0.0 ))
201- types <- as.integer(vapply(tokens , function (t ) t $ tokenType , 0.0 ))
202- mods <- as.integer(vapply(tokens , function (t ) t $ tokenModifiers , 0.0 ))
203-
204- # Sort by position (stable sort by line, then col)
205- order_idx <- order(lines , cols )
206- lines <- lines [order_idx ]
207- cols <- cols [order_idx ]
208- lengths <- lengths [order_idx ]
209- types <- types [order_idx ]
210- mods <- mods [order_idx ]
196+ # Pre-allocate vectors for better performance
197+ n <- length(tokens )
198+ lines <- integer(n )
199+ cols <- integer(n )
200+ lengths <- integer(n )
201+ types <- integer(n )
202+ mods <- integer(n )
203+
204+ # Single loop extraction instead of 5 vapply calls
205+ # Explicitly coerce to maintain integer type
206+ for (i in seq_along(tokens )) {
207+ t <- tokens [[i ]]
208+ lines [i ] <- as.integer(t $ line )
209+ cols [i ] <- as.integer(t $ col )
210+ lengths [i ] <- as.integer(t $ length )
211+ types [i ] <- as.integer(t $ tokenType )
212+ mods [i ] <- as.integer(t $ tokenModifiers )
213+ }
214+
215+ # Only sort if necessary (XML traversal usually produces document order)
216+ # Create ordering key: line * large_number + col for single-pass sort check
217+ if (n > 1 ) {
218+ # Use large multiplier to ensure line precedence over col
219+ order_key <- lines * 1000000L + cols
220+ if (is.unsorted(order_key , strictly = FALSE )) {
221+ logger $ info(" encode_semantic_tokens: explicit ordering required for " , n , " tokens" )
222+ order_idx <- order(lines , cols )
223+ lines <- lines [order_idx ]
224+ cols <- cols [order_idx ]
225+ lengths <- lengths [order_idx ]
226+ types <- types [order_idx ]
227+ mods <- mods [order_idx ]
228+ }
229+ }
211230
212231 # Performance: Use C implementation for encoding
213232 data <- .Call(" encode_semantic_tokens_c" ,
0 commit comments