From b5f38ac368b20d5788323f604382da3f167cd86a Mon Sep 17 00:00:00 2001 From: aizu-m Date: Thu, 18 Jun 2026 19:07:19 +0530 Subject: [PATCH 1/2] fix out-of-bounds read in RANGE op of RegularExpression.match --- .../xmlbeans/impl/regex/RegularExpression.java | 2 +- .../java/misc/checkin/RegularExpressionTest.java | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/apache/xmlbeans/impl/regex/RegularExpression.java b/src/main/java/org/apache/xmlbeans/impl/regex/RegularExpression.java index 226b34970..47af14759 100644 --- a/src/main/java/org/apache/xmlbeans/impl/regex/RegularExpression.java +++ b/src/main/java/org/apache/xmlbeans/impl/regex/RegularExpression.java @@ -1091,7 +1091,7 @@ private int match(Context con, Op op, int offset, int dx, int opts) { returned = true; break; } - int ch = target.charAt(offset); + int ch = target.charAt(o1); if (REUtil.isHighSurrogate(ch) && o1+dx < con.limit && o1+dx >=0) { o1 += dx; ch = REUtil.composeFromSurrogates(ch, target.charAt(o1)); diff --git a/src/test/java/misc/checkin/RegularExpressionTest.java b/src/test/java/misc/checkin/RegularExpressionTest.java index 6b0f6836b..c6f54b9de 100644 --- a/src/test/java/misc/checkin/RegularExpressionTest.java +++ b/src/test/java/misc/checkin/RegularExpressionTest.java @@ -20,6 +20,7 @@ import java.util.Random; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; public class RegularExpressionTest { @@ -31,6 +32,18 @@ void testLongString() { assertTrue(regex.matches(rnd)); } + @Test + void testLookbehindRangeAtInputEnd() { + // a lookbehind containing a character class, evaluated at the end of the + // input, used to read one character past the string in the RANGE op and + // throw StringIndexOutOfBoundsException + assertTrue(new RegularExpression("(?<=[a-c])$").matches("abc")); + assertTrue(new RegularExpression(".*(?<=[0-9])").matches("ab9")); + // the same off-by-one read also returned the wrong match result: the char + // before the lookbehind is 'x', not in [a-c], so this must not match + assertFalse(new RegularExpression("x(?<=[a-c])").matches("xc")); + } + private static final String AB = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; private static final Random rnd = new Random(); From 41f457e0fd563e7e42dca9f4c69db04eac334b96 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Thu, 18 Jun 2026 15:07:55 +0100 Subject: [PATCH 2/2] refactor --- .../xmlbeans/impl/regex/RegularExpression.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/apache/xmlbeans/impl/regex/RegularExpression.java b/src/main/java/org/apache/xmlbeans/impl/regex/RegularExpression.java index 47af14759..407e571c2 100644 --- a/src/main/java/org/apache/xmlbeans/impl/regex/RegularExpression.java +++ b/src/main/java/org/apache/xmlbeans/impl/regex/RegularExpression.java @@ -647,7 +647,7 @@ private Op compile(Token tok, Op next, boolean reverse) { * @return true if the target is matched to this regular expression. */ public boolean matches(char[] target) { - return this.matches(target, 0, target .length , (Match)null); + return this.matches(target, 0, target .length , null); } /** @@ -659,7 +659,7 @@ public boolean matches(char[] target) { * @return true if the target is matched to this regular expression. */ public boolean matches(char[] target, int start, int end) { - return this.matches(target, start, end, (Match)null); + return this.matches(target, start, end, null); } /** @@ -833,7 +833,7 @@ else if (this.firstChar != null) { * @return true if the target is matched to this regular expression. */ public boolean matches(String target) { - return this.matches(target, 0, target .length() , (Match)null); + return this.matches(target, 0, target .length() , null); } /** @@ -845,7 +845,7 @@ public boolean matches(String target) { * @return true if the target is matched to this regular expression. */ public boolean matches(String target, int start, int end) { - return this.matches(target, start, end, (Match)null); + return this.matches(target, start, end, null); } /** @@ -1023,7 +1023,7 @@ else if (this.firstChar != null) { */ private int match(Context con, Op op, int offset, int dx, int opts) { final ExpressionTarget target = con.target; - final Stack opStack = new Stack(); + final Stack opStack = new Stack<>(); final IntStack dataStack = new IntStack(); final boolean isSetIgnoreCase = isSet(opts, IGNORE_CASE); int retValue = -1; @@ -1304,7 +1304,7 @@ else if (cop.no != null) { return retValue; } - op = (Op) opStack.pop(); + op = opStack.pop(); offset = dataStack.pop(); switch (op.type) { @@ -1534,7 +1534,7 @@ private static final int getWordType(ExpressionTarget target, int begin, int end * @return true if the target is matched to this regular expression. */ public boolean matches(CharacterIterator target) { - return this.matches(target, (Match)null); + return this.matches(target, null); } @@ -2401,8 +2401,7 @@ private static boolean isWordChar(int ch) { // Legacy word characters if (ch <= '9') return true; if (ch < 'A') return false; if (ch <= 'Z') return true; - if (ch < 'a') return false; - return true; + return ch >= 'a'; } private static boolean matchIgnoreCase(int chardata, int ch) {