From 62674f903bd2fc352601fa39a3d9dea348e59f4b Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Wed, 15 Jan 2025 10:52:09 +1100 Subject: [PATCH] Fix parse of :nth-child(-n+2) The `-` sign of the nth-child step was ignored if there was no digit for the step. Also cleaned up the function a bit. Fixes #1147 --- CHANGES.md | 2 + .../java/org/jsoup/select/QueryParser.java | 69 +++++++++---------- .../java/org/jsoup/select/SelectorTest.java | 23 +++++++ 3 files changed, 57 insertions(+), 37 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index e95fcc9322..f7dae1d97e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -54,6 +54,8 @@ * A `template` tag containing an `li` within an open `li` would be parsed incorrectly, as it was not recognized as a "special" tag (which have additional processing rules). Also, added the SVG and MathML namespace tags to the list of special tags. [2258](https://github.com/jhy/jsoup/issues/2258) +* An `:nth-child` selector with a negative digit-less step, such as `:nth-child(-n+2)`, would be parsed incorrectly as a + positive step, and so would not match as expected. [1147](https://github.com/jhy/jsoup/issues/1147) ## 1.18.3 (2024-Dec-02) diff --git a/src/main/java/org/jsoup/select/QueryParser.java b/src/main/java/org/jsoup/select/QueryParser.java index bb7a614936..55ff99b33f 100644 --- a/src/main/java/org/jsoup/select/QueryParser.java +++ b/src/main/java/org/jsoup/select/QueryParser.java @@ -318,44 +318,39 @@ else if (cq.matchChomp("~=")) } //pseudo selectors :first-child, :last-child, :nth-child, ... - private static final Pattern NTH_AB = Pattern.compile("(([+-])?(\\d+)?)n(\\s*([+-])?\\s*\\d+)?", Pattern.CASE_INSENSITIVE); - private static final Pattern NTH_B = Pattern.compile("([+-])?(\\d+)"); - - private Evaluator cssNthChild(boolean backwards, boolean ofType) { - String arg = normalize(consumeParens()); - Matcher mAB = NTH_AB.matcher(arg); - Matcher mB = NTH_B.matcher(arg); - final int a, b; - if ("odd".equals(arg)) { - a = 2; - b = 1; - } else if ("even".equals(arg)) { - a = 2; - b = 0; - } else if (mAB.matches()) { - a = mAB.group(3) != null ? Integer.parseInt(mAB.group(1).replaceFirst("^\\+", "")) : 1; - b = mAB.group(4) != null ? Integer.parseInt(mAB.group(4).replaceFirst("^\\+", "")) : 0; - } else if (mB.matches()) { - a = 0; - b = Integer.parseInt(mB.group().replaceFirst("^\\+", "")); - } else { - throw new Selector.SelectorParseException("Could not parse nth-index '%s': unexpected format", arg); - } + private static final Pattern NthStepOffset = Pattern.compile("(([+-])?(\\d+)?)n(\\s*([+-])?\\s*\\d+)?", Pattern.CASE_INSENSITIVE); + private static final Pattern NthOffset = Pattern.compile("([+-])?(\\d+)"); + + private Evaluator cssNthChild(boolean last, boolean ofType) { + String arg = normalize(consumeParens()); // arg is like "odd", or "-n+2", within nth-child(odd) + final int step, offset; + if ("odd".equals(arg)) { + step = 2; + offset = 1; + } else if ("even".equals(arg)) { + step = 2; + offset = 0; + } else { + Matcher stepOffsetM, stepM; + if ((stepOffsetM = NthStepOffset.matcher(arg)).matches()) { + if (stepOffsetM.group(3) != null) // has digits, like 3n+2 or -3n+2 + step = Integer.parseInt(stepOffsetM.group(1).replaceFirst("^\\+", "")); + else // no digits, might be like n+2, or -n+2. if group(2) == "-", it’s -1; + step = "-".equals(stepOffsetM.group(2)) ? -1 : 1; + offset = + stepOffsetM.group(4) != null ? Integer.parseInt(stepOffsetM.group(4).replaceFirst("^\\+", "")) : 0; + } else if ((stepM = NthOffset.matcher(arg)).matches()) { + step = 0; + offset = Integer.parseInt(stepM.group().replaceFirst("^\\+", "")); + } else { + throw new Selector.SelectorParseException("Could not parse nth-index '%s': unexpected format", arg); + } + } - final Evaluator eval; - if (ofType) - if (backwards) - eval = new Evaluator.IsNthLastOfType(a, b); - else - eval = new Evaluator.IsNthOfType(a, b); - else { - if (backwards) - eval = (new Evaluator.IsNthLastChild(a, b)); - else - eval = new Evaluator.IsNthChild(a, b); - } - return eval; - } + return ofType + ? (last ? new Evaluator.IsNthLastOfType(step, offset) : new Evaluator.IsNthOfType(step, offset)) + : (last ? new Evaluator.IsNthLastChild(step, offset) : new Evaluator.IsNthChild(step, offset)); + } private String consumeParens() { return tq.chompBalanced('(', ')'); diff --git a/src/test/java/org/jsoup/select/SelectorTest.java b/src/test/java/org/jsoup/select/SelectorTest.java index 5229b7e6e7..5f74ee404c 100644 --- a/src/test/java/org/jsoup/select/SelectorTest.java +++ b/src/test/java/org/jsoup/select/SelectorTest.java @@ -1358,4 +1358,27 @@ public void emptyPseudo() { assertEquals(1, els.size()); assertEquals("o", els.get(0).id()); } + + @Test void negativeNthChild() { + // https://github.com/jhy/jsoup/issues/1147 + String html = "

1

2

3

4

"; + Document doc = Jsoup.parse(html); + + // Digitless + Elements pos = doc.select("p:nth-child(n+2)"); + assertSelectedOwnText(pos, "2", "3", "4"); + + Elements neg = doc.select("p:nth-child(-n+2)"); + assertSelectedOwnText(neg, "1", "2"); + + Elements combo = doc.select("p:nth-child(n+2):nth-child(-n+2)"); + assertSelectedOwnText(combo, "2"); + + // Digitful, 2n+2 or -1n+2 + Elements pos2 = doc.select("p:nth-child(2n+2)"); + assertSelectedOwnText(pos2, "2", "4"); + + Elements neg2 = doc.select("p:nth-child(-1n+2)"); + assertSelectedOwnText(neg2, "1", "2"); + } }