Skip to content

Commit 72a0c4c

Browse files
committed
Update semantic
1 parent d460a79 commit 72a0c4c

File tree

1 file changed

+34
-15
lines changed

1 file changed

+34
-15
lines changed

R/semantic.R

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -193,21 +193,40 @@ encode_semantic_tokens <- function(tokens) {
193193
return(list(data = integer(0)))
194194
}
195195

196-
# Convert tokens list to vectors for efficient processing
197-
# Defensive: coerce all to integer in case of mixed types
198-
lines <- as.integer(vapply(tokens, function(t) t$line, 0.0))
199-
cols <- as.integer(vapply(tokens, function(t) t$col, 0.0))
200-
lengths <- as.integer(vapply(tokens, function(t) t$length, 0.0))
201-
types <- as.integer(vapply(tokens, function(t) t$tokenType, 0.0))
202-
mods <- as.integer(vapply(tokens, function(t) t$tokenModifiers, 0.0))
203-
204-
# Sort by position (stable sort by line, then col)
205-
order_idx <- order(lines, cols)
206-
lines <- lines[order_idx]
207-
cols <- cols[order_idx]
208-
lengths <- lengths[order_idx]
209-
types <- types[order_idx]
210-
mods <- mods[order_idx]
196+
# Pre-allocate vectors for better performance
197+
n <- length(tokens)
198+
lines <- integer(n)
199+
cols <- integer(n)
200+
lengths <- integer(n)
201+
types <- integer(n)
202+
mods <- integer(n)
203+
204+
# Single loop extraction instead of 5 vapply calls
205+
# Explicitly coerce to maintain integer type
206+
for (i in seq_along(tokens)) {
207+
t <- tokens[[i]]
208+
lines[i] <- as.integer(t$line)
209+
cols[i] <- as.integer(t$col)
210+
lengths[i] <- as.integer(t$length)
211+
types[i] <- as.integer(t$tokenType)
212+
mods[i] <- as.integer(t$tokenModifiers)
213+
}
214+
215+
# Only sort if necessary (XML traversal usually produces document order)
216+
# Create ordering key: line * large_number + col for single-pass sort check
217+
if (n > 1) {
218+
# Use large multiplier to ensure line precedence over col
219+
order_key <- lines * 1000000L + cols
220+
if (is.unsorted(order_key, strictly = FALSE)) {
221+
logger$info("encode_semantic_tokens: explicit ordering required for ", n, " tokens")
222+
order_idx <- order(lines, cols)
223+
lines <- lines[order_idx]
224+
cols <- cols[order_idx]
225+
lengths <- lengths[order_idx]
226+
types <- types[order_idx]
227+
mods <- mods[order_idx]
228+
}
229+
}
211230

212231
# Performance: Use C implementation for encoding
213232
data <- .Call("encode_semantic_tokens_c",

0 commit comments

Comments
 (0)