Skip to content

Commit 17c018e

Browse files
committed
fix: normalize data URI parameter case
1 parent e144e0a commit 17c018e

2 files changed

Lines changed: 10 additions & 3 deletions

File tree

packages/markitdown/src/markitdown/_uri_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def parse_data_uri(uri: str) -> Tuple[str | None, Dict[str, str], bytes]:
2929

3030
is_base64 = False
3131
# Ends with base64?
32-
if parts[-1] == "base64":
32+
if parts[-1].lower() == "base64":
3333
parts.pop()
3434
is_base64 = True
3535

@@ -43,9 +43,9 @@ def parse_data_uri(uri: str) -> Tuple[str | None, Dict[str, str], bytes]:
4343
# Handle key=value pairs in the middle
4444
if "=" in part:
4545
key, value = part.split("=", 1)
46-
attributes[key] = value
46+
attributes[key.lower()] = value
4747
elif len(part) > 0:
48-
attributes[part] = ""
48+
attributes[part.lower()] = ""
4949

5050
content = base64.b64decode(data) if is_base64 else unquote_to_bytes(data)
5151

packages/markitdown/tests/test_module_misc.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,13 @@ def test_data_uris() -> None:
200200
assert attributes["charset"] == "utf-8"
201201
assert data == b"Hello, World!"
202202

203+
data_uri = "data:text/plain;CHARSET=utf-8;BASE64,SGVsbG8sIFdvcmxkIQ=="
204+
mime_type, attributes, data = parse_data_uri(data_uri)
205+
assert mime_type == "text/plain"
206+
assert len(attributes) == 1
207+
assert attributes["charset"] == "utf-8"
208+
assert data == b"Hello, World!"
209+
203210
data_uri = "data:,Hello%2C%20World%21"
204211
mime_type, attributes, data = parse_data_uri(data_uri)
205212
assert mime_type is None

0 commit comments

Comments
 (0)