From 9e1d0b40fcf274c01de7bd6e67ed7834a117e6c0 Mon Sep 17 00:00:00 2001
From: Vineeth Sai <vineethsai4444@gmail.com>
Date: Sat, 20 Jun 2026 17:58:12 -0700
Subject: [PATCH] Fix from_format crash on multi-character literal blocks

Formatter.parse() runs re.escape() on the format string first, so a
literal block like [de] arrives as \[de\]. The token regex only
suppressed the characters immediately adjacent to the brackets, so token
letters in the middle of a multi-character literal (for example the d and
e in [del]) were still tokenized, raising AttributeError or a "redefinition
of group name" re.error.

Match the whole escaped literal block as a single token in _FROM_FORMAT_RE
and unwrap it in _replace_tokens, keeping the inner text as a literal.

Fixes #971
---
 src/pendulum/formatting/formatter.py |  7 ++++++-
 tests/datetime/test_from_format.py   | 16 ++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/pendulum/formatting/formatter.py b/src/pendulum/formatting/formatter.py
index b09977d0..062b293b 100644
--- a/src/pendulum/formatting/formatter.py
+++ b/src/pendulum/formatting/formatter.py
@@ -66,7 +66,7 @@ class Formatter:
 
     _FORMAT_RE: re.Pattern[str] = re.compile(_TOKENS)
 
-    _FROM_FORMAT_RE: re.Pattern[str] = re.compile(r"(?<!\\\[)" + _TOKENS + r"(?!\\\])")
+    _FROM_FORMAT_RE: re.Pattern[str] = re.compile(r"\\\[.*?\\\]|" + _TOKENS)
 
     _LOCALIZABLE_TOKENS: ClassVar[
         dict[str, str | Callable[[Locale], Sequence[str]] | None]
@@ -664,6 +664,11 @@ def _get_parsed_locale_value(
             raise ValueError("Invalid date")
 
     def _replace_tokens(self, token: str, locale: Locale) -> str:
+        if token.startswith("\\[") and token.endswith("\\]"):
+            # parse() runs re.escape() on the format first, so a literal block such
+            # as "[de]" arrives here as "\\[de\\]". Drop the (escaped) brackets and
+            # keep the already-escaped inner text as a literal.
+            return token[2:-2]
         if token.startswith("[") and token.endswith("]"):
             return token[1:-1]
         elif token.startswith("\\"):
diff --git a/tests/datetime/test_from_format.py b/tests/datetime/test_from_format.py
index ecf59632..19a920eb 100644
--- a/tests/datetime/test_from_format.py
+++ b/tests/datetime/test_from_format.py
@@ -57,6 +57,22 @@ def test_from_format_with_escaped_elements_valid_tokens():
     assert d.timezone_name == "UTC"
 
 
+def test_from_format_with_multi_character_escaped_elements():
+    # GH #971: literal blocks longer than one character that contain token letters
+    # (e.g. "the" holds the h/e tokens, "del" holds the d/e tokens) were
+    # mis-tokenized and raised instead of being treated as literal text.
+    d = pendulum.from_format("the year 2023", "[the year] YYYY")
+    assert_datetime(d, 2023, 1, 1, 0, 0, 0)
+    d = pendulum.from_format(
+        "21 de noviembre del 2023", "DD [de] MMMM [del] YYYY", locale="es"
+    )
+    assert_datetime(d, 2023, 11, 21, 0, 0, 0)
+    d = pendulum.from_format(
+        "21 de noviembre de 2023", "DD [de] MMMM [de] YYYY", locale="es"
+    )
+    assert_datetime(d, 2023, 11, 21, 0, 0, 0)
+
+
 def test_from_format_with_millis():
     d = pendulum.from_format("1975-05-21 22:32:11.123456", "YYYY-MM-DD HH:mm:ss.SSSSSS")
     assert_datetime(d, 1975, 5, 21, 22, 32, 11, 123456)