diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index f03baa524d..2754d4285a 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -1970,13 +1970,14 @@ def _cast_if_needed(self, field: NestedField, values: pa.Array) -> pa.Array: return values.cast(target_type) raise ValueError(f"Unsupported schema projection from {values.type} to {target_type}") elif isinstance(field.field_type, (IntegerType, LongType)): - # Cast smaller integer types to target type for cross-platform compatibility - # Only allow widening conversions (smaller bit width to larger) - # Narrowing conversions fall through to promote() handling below + # Cast integer types for cross-platform compatibility (e.g. Spark reads): + # widening (smaller bit width to larger) and unsigned-to-signed at same width if pa.types.is_integer(values.type): source_width = values.type.bit_width target_width = target_type.bit_width - if source_width < target_width: + if source_width < target_width or ( + pa.types.is_unsigned_integer(values.type) and source_width <= target_width + ): return values.cast(target_type) if field.field_type != file_field.field_type: diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py index bcbf873c2a..e33b72366b 100644 --- a/tests/io/test_pyarrow.py +++ b/tests/io/test_pyarrow.py @@ -3084,6 +3084,7 @@ def test__to_requested_schema_timestamps_without_downcast_raises_exception( (pa.int8(), IntegerType(), pa.int32()), (pa.int16(), IntegerType(), pa.int32()), (pa.uint16(), IntegerType(), pa.int32()), + (pa.uint32(), IntegerType(), pa.int32()), (pa.uint32(), LongType(), pa.int64()), (pa.int32(), LongType(), pa.int64()), ],