diff --git a/tree-sitter/src/main/java/org/treesitter/TSQuery.java b/tree-sitter/src/main/java/org/treesitter/TSQuery.java index 9309e80..e7c13f5 100644 --- a/tree-sitter/src/main/java/org/treesitter/TSQuery.java +++ b/tree-sitter/src/main/java/org/treesitter/TSQuery.java @@ -2,11 +2,15 @@ import java.lang.ref.Cleaner.Cleanable; +import java.util.ArrayList; +import java.util.List; + import static org.treesitter.TSParser.*; public class TSQuery implements AutoCloseable { private final long ptr; private TSLanguage lang; + private List> predicates; private final Cleanable cleanable; private boolean closed = false; @@ -25,7 +29,9 @@ public TSQueryCleanRunner(long ptr) { @Override public void run() { - ts_query_delete(ptr); + if (ptr != 0) { + ts_query_delete(ptr); + } } } @@ -55,7 +61,11 @@ public void close() { */ public TSQuery(TSLanguage language, String query){ this(ts_query_new(language.getPtr(), query)); + if (ptr == 0) { + throw new TSQueryException("Syntax error in query: " + query); + } this.lang = language; + this.predicates = parsePredicates(); } protected long getPtr() { @@ -209,6 +219,122 @@ public String getCaptureNameForId(int captureId) { return ts_query_capture_name_for_id(ptr, captureId); } + /** + * Get the predicates for the given pattern. + * + * @param patternIndex The index of the pattern. + * @return The list of predicates for the pattern. + * @throws IndexOutOfBoundsException if the pattern index is out of bounds. + */ + public List getPredicatesForPattern(int patternIndex) { + if (patternIndex < 0 || patternIndex >= predicates.size()) { + throw new IndexOutOfBoundsException("Pattern index " + patternIndex + " is out of bounds"); + } + return predicates.get(patternIndex); + } + + private List> parsePredicates() { + int patternCount = getPatternCount(); + List> result = new ArrayList<>(patternCount); + for (int i = 0; i < patternCount; i++) { + TSQueryPredicateStep[] steps = getPredicateForPattern(i); + List patternPredicates = new ArrayList<>(); + if (steps == null) { + result.add(patternPredicates); + continue; + } + int stepIndex = 0; + while (stepIndex < steps.length) { + // Find the number of arguments until Done sentinel + int nargs = 0; + while (stepIndex + nargs < steps.length && + steps[stepIndex + nargs].getType() != TSQueryPredicateStepType.TSQueryPredicateStepTypeDone) { + nargs++; + } + + if (nargs > 0) { + TSQueryPredicateStep firstStep = steps[stepIndex]; + if (firstStep.getType() != TSQueryPredicateStepType.TSQueryPredicateStepTypeString) { + throw new TSQueryException("Predicate must begin with a string"); + } + String name = getStringValueForId(firstStep.getValueId()); + + if (TSQueryPredicate.TSQueryPredicateEq.NAMES.contains(name)) { + patternPredicates.add(handleEq(name, steps, stepIndex, nargs)); + } else if (TSQueryPredicate.TSQueryPredicateMatch.NAMES.contains(name)) { + patternPredicates.add(handleMatch(name, steps, stepIndex, nargs)); + } else if (TSQueryPredicate.TSQueryPredicateAnyOf.NAMES.contains(name)) { + patternPredicates.add(handleAnyOf(name, steps, stepIndex, nargs)); + } else { + patternPredicates.add(new TSQueryPredicate.TSQueryPredicateGeneric(name)); + } + } + stepIndex += nargs + 1; // Move past arguments and the Done sentinel + } + result.add(patternPredicates); + } + return result; + } + + private TSQueryPredicate handleEq(String name, TSQueryPredicateStep[] steps, int start, int nargs) { + if (nargs != 3) { + throw new TSQueryException(String.format("Predicate #%s expects 2 arguments, got %d", name, nargs - 1)); + } + TSQueryPredicateStep arg1 = steps[start + 1]; + if (arg1.getType() != TSQueryPredicateStepType.TSQueryPredicateStepTypeCapture) { + throw new TSQueryException(String.format("First argument to #%s must be a capture", name)); + } + int captureId = arg1.getValueId(); + + TSQueryPredicateStep arg2 = steps[start + 2]; + int arg2ValueId = arg2.getValueId(); + boolean isCapture = arg2.getType() == TSQueryPredicateStepType.TSQueryPredicateStepTypeCapture; + String literalValue = isCapture ? null : getStringValueForId(arg2ValueId); + + return new TSQueryPredicate.TSQueryPredicateEq(name, captureId, literalValue, arg2ValueId, isCapture); + } + + private TSQueryPredicate handleMatch(String name, TSQueryPredicateStep[] steps, int start, int nargs) { + if (nargs != 3) { + throw new TSQueryException(String.format("Predicate #%s expects 2 arguments, got %d", name, nargs - 1)); + } + TSQueryPredicateStep arg1 = steps[start + 1]; + if (arg1.getType() != TSQueryPredicateStepType.TSQueryPredicateStepTypeCapture) { + throw new TSQueryException(String.format("First argument to #%s must be a capture", name)); + } + int captureId = arg1.getValueId(); + + TSQueryPredicateStep arg2 = steps[start + 2]; + if (arg2.getType() != TSQueryPredicateStepType.TSQueryPredicateStepTypeString) { + throw new TSQueryException(String.format("Second argument to #%s must be a string literal", name)); + } + String patternStr = getStringValueForId(arg2.getValueId()); + + return new TSQueryPredicate.TSQueryPredicateMatch(name, captureId, patternStr); + } + + private TSQueryPredicate handleAnyOf(String name, TSQueryPredicateStep[] steps, int start, int nargs) { + if (nargs < 3) { + throw new TSQueryException(String.format("Predicate #%s expects at least 2 arguments, got %d", name, nargs - 1)); + } + TSQueryPredicateStep arg1 = steps[start + 1]; + if (arg1.getType() != TSQueryPredicateStepType.TSQueryPredicateStepTypeCapture) { + throw new TSQueryException(String.format("First argument to #%s must be a capture", name)); + } + int captureId = arg1.getValueId(); + + List values = new ArrayList<>(nargs - 2); + for (int i = 2; i < nargs; i++) { + TSQueryPredicateStep arg = steps[start + i]; + if (arg.getType() != TSQueryPredicateStepType.TSQueryPredicateStepTypeString) { + throw new TSQueryException(String.format("Arguments to #%s must be string literals", name)); + } + values.add(getStringValueForId(arg.getValueId())); + } + + return new TSQueryPredicate.TSQueryPredicateAnyOf(name, captureId, values); + } + /** * Get the quantifier of the query's captures. Each capture is * associated * with a numeric id based on the order that it appeared in the query's source. @@ -235,7 +361,12 @@ public TSQuantifier getCaptureQuantifierForId(int patternId, int captureId) { * Get TSQueryPredicateStepTypeString by id. See {@link #getPredicateForPattern(int)} * @param id the valueId got from {@link #getPredicateForPattern(int)}. * @return the literal string value. - * @throws TSQueryException if the id is invalid. + * @throws TSException if the id is invalid. + */ + /** + * Get the string value for the given id. + * @param id the string id. + * @return the string value. */ public String getStringValueForId(int id) { ensureOpen(); diff --git a/tree-sitter/src/main/java/org/treesitter/TSQueryCursor.java b/tree-sitter/src/main/java/org/treesitter/TSQueryCursor.java index 7ef55fe..c683a25 100644 --- a/tree-sitter/src/main/java/org/treesitter/TSQueryCursor.java +++ b/tree-sitter/src/main/java/org/treesitter/TSQueryCursor.java @@ -1,9 +1,11 @@ package org.treesitter; import java.lang.ref.Cleaner.Cleanable; -import java.util.Iterator; -import java.util.NoSuchElementException; -import java.util.function.BiFunction; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.List; +import java.util.Objects; +import java.util.function.Function; import static org.treesitter.TSParser.*; import static org.treesitter.TSParser.ts_query_cursor_next_match; @@ -24,6 +26,7 @@ private void ensureOpen() { private TSNode node; private TSQuery query; + private byte[] sourceBytes; private static class TSQueryCursorCleanAction implements Runnable { private final long ptr; @@ -87,10 +90,27 @@ public TSQueryCursor() { * @param node The node to run the query on. */ public void exec(TSQuery query, TSNode node){ + exec(query, node, null); + } + + /** + * Start running a given query on a given node with source text for predicate filtering. + *

+ * Note: The {@code sourceText} is encoded as UTF-8 to align with Tree-sitter's + * default byte offsets. If the tree was parsed with a different encoding (e.g. UTF-16), + * predicate results may be incorrect. + * + * @param query The query to run. + * @param node The node to run the query on. + * @param sourceText The source text used to resolve predicates like {@code #eq?}. + */ + public void exec(TSQuery query, TSNode node, CharSequence sourceText){ ensureOpen(); executed = true; this.node = node; this.query = query; + this.sourceBytes = sourceText == null ? null : + sourceText.toString().getBytes(StandardCharsets.UTF_8); ts_query_cursor_exec(ptr, query.getPtr(), node); } @@ -103,10 +123,29 @@ public void exec(TSQuery query, TSNode node){ * @param progress The progress callback. */ public void execWithOptions(TSQuery query, TSNode node, TSQueryProgress progress){ + execWithOptions(query, node, null, progress); + } + + /** + * Start running a given query on a given node, with some options and source text. + *

+ * Note: The {@code sourceText} is encoded as UTF-8 to align with Tree-sitter's + * default byte offsets. If the tree was parsed with a different encoding (e.g. UTF-16), + * predicate results may be incorrect. + * + * @see #exec(TSQuery, TSNode, CharSequence) + * @param query The query to run. + * @param node The node to run the query on. + * @param sourceText The source text for predicates. + * @param progress The progress callback. + */ + public void execWithOptions(TSQuery query, TSNode node, CharSequence sourceText, TSQueryProgress progress){ ensureOpen(); executed = true; this.node = node; this.query = query; + this.sourceBytes = sourceText == null ? null : + sourceText.toString().getBytes(java.nio.charset.StandardCharsets.UTF_8); ts_query_cursor_exec_with_options(ptr, query.getPtr(), node, progress, progressPayloadPtr); } @@ -238,9 +277,13 @@ public boolean setContainingPointRange(TSPoint startPoint, TSPoint endPoint){ public boolean nextMatch(TSQueryMatch match){ ensureOpen(); assertExecuted(); - boolean ret = ts_query_cursor_next_match(ptr, match); - addTsTreeRef(match); - return ret; + while (ts_query_cursor_next_match(ptr, match)) { + addTsTreeRef(match); + if (satisfiesPredicates(match)) { + return true; + } + } + return false; } @@ -267,9 +310,13 @@ public void removeMatch(int matchId){ public boolean nextCapture(TSQueryMatch match){ ensureOpen(); assertExecuted(); - boolean ret = ts_query_cursor_next_capture(ptr, match); - addTsTreeRef(match); - return ret; + while (ts_query_cursor_next_capture(ptr, match)) { + addTsTreeRef(match); + if (satisfiesPredicates(match)) { + return true; + } + } + return false; } private void addTsTreeRef(TSQueryMatch match){ @@ -282,6 +329,16 @@ private void addTsTreeRef(TSQueryMatch match){ } } + private boolean satisfiesPredicates(TSQueryMatch match) { + if (query == null) return true; + List patternPredicates = query.getPredicatesForPattern(match.getPatternIndex()); + if (patternPredicates == null || patternPredicates.isEmpty()) { + return true; + } + + return patternPredicates.stream().allMatch(predicate -> predicate.test(match, sourceBytes)); + } + private void assertExecuted(){ if(!executed){ throw new TSException("Query not executed, call exec() first."); diff --git a/tree-sitter/src/main/java/org/treesitter/TSQueryPredicate.java b/tree-sitter/src/main/java/org/treesitter/TSQueryPredicate.java new file mode 100644 index 0000000..563b458 --- /dev/null +++ b/tree-sitter/src/main/java/org/treesitter/TSQueryPredicate.java @@ -0,0 +1,257 @@ +package org.treesitter; + +import java.util.*; +import java.util.function.Function; +import java.util.Objects; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * A query predicate that associates conditions with a pattern. + */ +public abstract class TSQueryPredicate { + private final String name; + + protected TSQueryPredicate(String name) { + this.name = name; + } + + /** + * Get the name of the predicate. + */ + public String getName() { + return name; + } + + /** + * Test the predicate against a match. + * + * @param match The query match. + * @param textProvider A function that provides the text for a given node. + * @return true if the predicate is satisfied. + */ + public abstract boolean test(TSQueryMatch match, Function textProvider); + + /** + * Test the predicate against a match using raw source bytes to avoid allocations. + * + * @param match The query match. + * @param sourceBytes The source bytes (UTF-8). + * @return true if the predicate is satisfied. + */ + public boolean test(TSQueryMatch match, byte[] sourceBytes) { + return test(match, n -> { + if (n == null || n.isNull() || sourceBytes == null) return ""; + int start = n.getStartByte(); + int end = n.getEndByte(); + if (start < 0 || start > end || start >= sourceBytes.length) { + return ""; + } + int length = Math.min(end, sourceBytes.length) - start; + return new String(sourceBytes, start, length, java.nio.charset.StandardCharsets.UTF_8); + }); + } + + protected static boolean arrayEquals(byte[] a, int aOffset, int aLength, byte[] b, int bOffset, int bLength) { + if (aLength != bLength) return false; + for (int i = 0; i < aLength; i++) { + if (a[aOffset + i] != b[bOffset + i]) return false; + } + return true; + } + + protected List findNodes(TSQueryMatch match, int captureId) { + TSQueryCapture[] captures = match.getCaptures(); + if (captures == null) return Collections.emptyList(); + List nodes = new ArrayList<>(); + for (TSQueryCapture capture : captures) { + // In tree-sitter, the capture index is the ID within the query. + if (capture.getIndex() == captureId) { + nodes.add(capture.getNode()); + } + } + nodes.removeIf(Objects::isNull); + return nodes; + } + + /** + * Handles {@code #eq?}, {@code #not-eq?}, {@code #any-eq?}, {@code #any-not-eq?} + */ + public static final class TSQueryPredicateEq extends TSQueryPredicate { + private final int captureId; + private final String literalValue; + private final byte[] literalBytes; + private final int valueId; + private final boolean isPositive; + private final boolean isAny; + private final boolean isCapture; + + public static final Set NAMES = Set.of("eq?", "not-eq?", "any-eq?", "any-not-eq?"); + + public TSQueryPredicateEq(String name, int captureId, String literalValue, int valueId, boolean isCapture) { + super(name); + this.captureId = captureId; + this.literalValue = literalValue; + this.literalBytes = literalValue == null ? null : literalValue.getBytes(java.nio.charset.StandardCharsets.UTF_8); + this.valueId = valueId; + this.isPositive = !name.contains("not-"); + this.isAny = name.startsWith("any-"); + this.isCapture = isCapture; + } + + @Override + public boolean test(TSQueryMatch match, Function textProvider) { + return isCapture ? testCapture(match, textProvider) : testLiteral(match, textProvider); + } + + @Override + public boolean test(TSQueryMatch match, byte[] sourceBytes) { + if (sourceBytes == null) return super.test(match, sourceBytes); + return isCapture ? testCapture(match, sourceBytes) : testLiteral(match, sourceBytes); + } + + private boolean testCapture(TSQueryMatch match, Function textProvider) { + List nodes1 = findNodes(match, captureId); + List nodes2 = findNodes(match, valueId); + if (nodes1.isEmpty() || nodes2.isEmpty()) return !isPositive; + + java.util.function.Predicate predicate = n1 -> { + String text1 = textProvider.apply(n1); + return nodes2.stream().anyMatch(n2 -> + Objects.equals(text1, textProvider.apply(n2))) == isPositive; + }; + return isAny ? nodes1.stream().anyMatch(predicate) : nodes1.stream().allMatch(predicate); + } + + private boolean testCapture(TSQueryMatch match, byte[] sourceBytes) { + List nodes1 = findNodes(match, captureId); + List nodes2 = findNodes(match, valueId); + if (nodes1.isEmpty() || nodes2.isEmpty()) return !isPositive; + + java.util.function.Predicate predicate = n1 -> { + int s1 = n1.getStartByte(); + int l1 = n1.getEndByte() - s1; + return nodes2.stream().anyMatch(n2 -> { + int s2 = n2.getStartByte(); + int l2 = n2.getEndByte() - s2; + return arrayEquals(sourceBytes, s1, l1, sourceBytes, s2, l2); + }) == isPositive; + }; + return isAny ? nodes1.stream().anyMatch(predicate) : nodes1.stream().allMatch(predicate); + } + + private boolean testLiteral(TSQueryMatch match, Function textProvider) { + List nodes = findNodes(match, captureId); + if (nodes.isEmpty()) return !isPositive; + java.util.function.Predicate predicate = node -> { + String text = textProvider.apply(node); + return Objects.equals(text, literalValue) == isPositive; + }; + return isAny ? nodes.stream().anyMatch(predicate) : nodes.stream().allMatch(predicate); + } + + private boolean testLiteral(TSQueryMatch match, byte[] sourceBytes) { + List nodes = findNodes(match, captureId); + if (nodes.isEmpty()) return !isPositive; + java.util.function.Predicate predicate = node -> { + int start = node.getStartByte(); + int length = node.getEndByte() - start; + return arrayEquals(sourceBytes, start, length, literalBytes, 0, literalBytes.length) == isPositive; + }; + return isAny ? nodes.stream().anyMatch(predicate) : nodes.stream().allMatch(predicate); + } + } + + /** + * Handles {@code #match?}, {@code #not-match?}, {@code #any-match?}, {@code #any-not-match?} + */ + public static final class TSQueryPredicateMatch extends TSQueryPredicate { + private final int captureId; + private final Pattern pattern; + private final boolean isPositive; + private final boolean isAny; + + public static final Set NAMES = Set.of("match?", "not-match?", "any-match?", "any-not-match?"); + + public TSQueryPredicateMatch(String name, int captureId, String patternStr) { + super(name); + this.captureId = captureId; + this.pattern = Pattern.compile(patternStr); + this.isPositive = !name.contains("not-"); + this.isAny = name.startsWith("any-"); + } + + @Override + public boolean test(TSQueryMatch match, Function textProvider) { + List nodes = findNodes(match, captureId); + if (nodes.isEmpty()) return !isPositive; + java.util.function.Predicate predicate = n -> { + String text = textProvider.apply(n); + return text != null && pattern.matcher(text).find() == isPositive; + }; + return isAny ? nodes.stream().anyMatch(predicate) : nodes.stream().allMatch(predicate); + } + } + + /** + * Handles {@code #any-of?}, {@code #not-any-of?} + */ + public static final class TSQueryPredicateAnyOf extends TSQueryPredicate { + private final int captureId; + private final Set values; + private final List valueBytes; + private final boolean isPositive; + + public static final Set NAMES = Set.of("any-of?", "not-any-of?"); + + public TSQueryPredicateAnyOf(String name, int captureId, List values) { + super(name); + this.captureId = captureId; + this.values = new HashSet<>(values); + this.valueBytes = values.stream() + .map(s -> s.getBytes(java.nio.charset.StandardCharsets.UTF_8)) + .collect(Collectors.toList()); + this.isPositive = name.equals("any-of?"); + } + + @Override + public boolean test(TSQueryMatch match, Function textProvider) { + List nodes = findNodes(match, captureId); + if (nodes.isEmpty()) return !isPositive; + java.util.function.Predicate predicate = node -> { + String text = textProvider.apply(node); + return (text != null && values.contains(text)) == isPositive; + }; + // #any-of? is typically treated as a filter where all captured nodes must satisfy it + return nodes.stream().allMatch(predicate); + } + + @Override + public boolean test(TSQueryMatch match, byte[] sourceBytes) { + if (sourceBytes == null) return super.test(match, sourceBytes); + List nodes = findNodes(match, captureId); + if (nodes.isEmpty()) return !isPositive; + java.util.function.Predicate predicate = node -> { + int start = node.getStartByte(); + int length = node.getEndByte() - start; + return valueBytes.stream().anyMatch(val -> + arrayEquals(sourceBytes, start, length, val, 0, val.length)) == isPositive; + }; + return nodes.stream().allMatch(predicate); + } + } + + /** + * Handles unknown predicates or directives. + */ + public static final class TSQueryPredicateGeneric extends TSQueryPredicate { + public TSQueryPredicateGeneric(String name) { + super(name); + } + + @Override + public boolean test(TSQueryMatch match, Function textProvider) { + return true; + } + } +} diff --git a/tree-sitter/src/test/java/org/treesitter/TSQueryCursorTest.java b/tree-sitter/src/test/java/org/treesitter/TSQueryCursorTest.java index 7f03cb9..6aff881 100644 --- a/tree-sitter/src/test/java/org/treesitter/TSQueryCursorTest.java +++ b/tree-sitter/src/test/java/org/treesitter/TSQueryCursorTest.java @@ -20,7 +20,7 @@ void beforeEach() { json = new TreeSitterJson(); parser.setLanguage(json); tree = parser.parseString(null, JSON_SRC); - query = new TSQuery(json, "((document) @root (#eq? @root \"foo\"))"); + query = new TSQuery(json, "((document) @root)"); cursor = new TSQueryCursor(); rootNode = tree.getRootNode(); diff --git a/tree-sitter/src/test/java/org/treesitter/TSQueryErrorTest.java b/tree-sitter/src/test/java/org/treesitter/TSQueryErrorTest.java new file mode 100644 index 0000000..4e9456a --- /dev/null +++ b/tree-sitter/src/test/java/org/treesitter/TSQueryErrorTest.java @@ -0,0 +1,30 @@ +package org.treesitter; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertThrows; + +class TSQueryErrorTest { + private TSLanguage json; + private TSParser parser; + + @BeforeEach + void beforeEach() { + parser = new TSParser(); + json = new TreeSitterJson(); + parser.setLanguage(json); + } + + @Test + void testInvalidQueryThrowsException() { + // This query is syntactically invalid (missing parentheses or bad structure) + // ts_query_new should return NULL, which TSQuery constructor should check. + assertThrows(TSQueryException.class, () -> new TSQuery(json, "invalid query")); + } + + @Test + void testQueryWithSyntaxError() { + assertThrows(TSQueryException.class, () -> new TSQuery(json, "((document) @d (#eq? @d))")); + } +} diff --git a/tree-sitter/src/test/java/org/treesitter/TSQueryPredicateTest.java b/tree-sitter/src/test/java/org/treesitter/TSQueryPredicateTest.java new file mode 100644 index 0000000..b538450 --- /dev/null +++ b/tree-sitter/src/test/java/org/treesitter/TSQueryPredicateTest.java @@ -0,0 +1,132 @@ +package org.treesitter; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.treesitter.utils.NativeUtils; + +import java.io.File; + +import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +class TSQueryPredicateTest { + public static final String JSON_SRC = "[1, null]"; + private TSTree tree; + private TSLanguage json; + private TSParser parser; + private TSQuery query; + private TSQueryCursor cursor; + private TSNode rootNode; + + @BeforeEach + void beforeEach() { + parser = new TSParser(); + json = new TreeSitterJson(); + parser.setLanguage(json); + tree = parser.parseString(null, JSON_SRC); + rootNode = tree.getRootNode(); + cursor = new TSQueryCursor(); + } + + @Test + void predicateFilteringEq() { + // [1, null] + // #eq? @val "1" matches the first element + query = new TSQuery(json, "((number) @val (#eq? @val \"1\"))"); + cursor.exec(query, rootNode, JSON_SRC); + TSQueryMatch match = new TSQueryMatch(); + assertTrue(cursor.nextMatch(match)); + assertEquals(1, match.getCaptures().length); + assertEquals("1", JSON_SRC.substring(match.getCaptures()[0].getNode().getStartByte(), match.getCaptures()[0].getNode().getEndByte())); + + // #eq? @val "2" should not match anything in [1, null] + query = new TSQuery(json, "((number) @val (#eq? @val \"2\"))"); + cursor.exec(query, rootNode, JSON_SRC); + assertFalse(cursor.nextMatch(match)); + } + + @Test + void predicateFilteringNotMatch() { + // [1, null] + // #not-match? @val "^n" matches 1 but excludes null + query = new TSQuery(json, "((_) @val (#not-match? @val \"^n\"))"); + cursor.exec(query, rootNode, JSON_SRC); + TSQueryMatch match = new TSQueryMatch(); + + boolean foundOne = false; + boolean foundNull = false; + while(cursor.nextMatch(match)) { + String text = JSON_SRC.substring(match.getCaptures()[0].getNode().getStartByte(), match.getCaptures()[0].getNode().getEndByte()); + if (text.equals("1")) foundOne = true; + if (text.equals("null")) foundNull = true; + } + assertTrue(foundOne, "Should have matched '1'"); + assertFalse(foundNull, "Should not have matched 'null' due to #not-match?"); + } + + @Test + void predicateEqWithSourceText() { + // Test #eq? @foo "bar" + String src = "[\"bar\", \"baz\"]"; + tree = parser.parseString(null, src); + query = new TSQuery(json, "((string) @foo (#eq? @foo \"\\\"bar\\\"\"))"); + cursor.exec(query, tree.getRootNode(), src); + TSQueryMatch match = new TSQueryMatch(); + assertTrue(cursor.nextMatch(match)); + assertEquals("\"bar\"", src.substring(match.getCaptures()[0].getNode().getStartByte(), match.getCaptures()[0].getNode().getEndByte())); + assertFalse(cursor.nextMatch(match)); + } + + @Test + void predicateNotMatchWithSourceText() { + // Test #not-match? @foo "^[A-Z]" + String src = "[\"Alpha\", \"beta\"]"; + tree = parser.parseString(null, src); + query = new TSQuery(json, "((string) @foo (#not-match? @foo \"^\\\"[A-Z]\"))"); + cursor.exec(query, tree.getRootNode(), src); + TSQueryMatch match = new TSQueryMatch(); + assertTrue(cursor.nextMatch(match)); + assertEquals("\"beta\"", src.substring(match.getCaptures()[0].getNode().getStartByte(), match.getCaptures()[0].getNode().getEndByte())); + assertFalse(cursor.nextMatch(match)); + } + + @Test + void predicateWithMultiByteChars() { + // Test #eq? and #not-eq? with multi-byte characters (Emoji and CJK) + // [ "😊", "δΈ–η•Œ" ] + String src = "[ \"\uD83D\uDE0A\", \"\u4E16\u754C\" ]"; + tree = parser.parseString(null, src); + TSNode root = tree.getRootNode(); + TSQueryMatch match = new TSQueryMatch(); + + // 1. Positive test for Emoji + query = new TSQuery(json, "((string) @s (#eq? @s \"\\\"\uD83D\uDE0A\\\"\"))"); + cursor.exec(query, root, src); + assertTrue(cursor.nextMatch(match), "Should match the emoji string"); + assertFalse(cursor.nextMatch(match), "Should only match once"); + + // 2. Positive test for CJK + query = new TSQuery(json, "((string) @s (#eq? @s \"\\\"\u4E16\u754C\\\"\"))"); + cursor.exec(query, root, src); + assertTrue(cursor.nextMatch(match), "Should match the CJK string"); + assertFalse(cursor.nextMatch(match), "Should only match once"); + + // 3. Negative test using #not-eq? + query = new TSQuery(json, "((string) @s (#not-eq? @s \"\\\"\uD83D\uDE0A\\\"\"))"); + cursor.exec(query, root, src); + assertTrue(cursor.nextMatch(match), "Should match 'δΈ–η•Œ' because it is not '😊'"); + // Verify it matched the second string, not the first + byte[] srcBytes = src.getBytes(java.nio.charset.StandardCharsets.UTF_8); + int start = match.getCaptures()[0].getNode().getStartByte(); + int end = match.getCaptures()[0].getNode().getEndByte(); + String matchedText = new String(srcBytes, start, end - start, java.nio.charset.StandardCharsets.UTF_8); + assertEquals("\"\u4E16\u754C\"", matchedText); + assertFalse(cursor.nextMatch(match), "Should not match the emoji string"); + + // 4. Regex test using #match? + query = new TSQuery(json, "((string) @s (#match? @s \"\u4E16\"))"); + cursor.exec(query, root, src); + assertTrue(cursor.nextMatch(match), "Should match 'δΈ–η•Œ' using partial regex"); + assertFalse(cursor.nextMatch(match)); + } +}