fix UB in swap_endianness

fixes #3
2025-08-02 07:26:04 +00:00 · 2024-03-09 18:04:21 -06:00 · 2024-03-09 18:04:21 -06:00 · 73ab006f7c
commit 73ab006f7c
parent 4594562ef0
7 changed files with 115 additions and 60 deletions
--- a/simdnbt/src/borrow/mod.rs
+++ b/simdnbt/src/borrow/mod.rs
@ -273,11 +273,9 @@ impl<'a> NbtTag<'a> {
 mod tests {
    use std::io::Read;

-    use byteorder::{WriteBytesExt, BE};
+    use byteorder::WriteBytesExt;
    use flate2::read::GzDecoder;

-    use crate::common::{INT_ID, LIST_ID, LONG_ID};
-
    use super::*;

    #[test]
--- a/simdnbt/src/common.rs
+++ b/simdnbt/src/common.rs
@ -1,4 +1,4 @@
-use std::{io::Cursor, slice};
+use std::{io::Cursor, mem, slice};

 use crate::{
    raw_list::RawList,
@ -171,7 +171,7 @@ pub unsafe fn unchecked_push(data: &mut Vec<u8>, value: u8) {
 /// endian! Use [`slice_into_u8_big_endian`] to get big endian (the endianness that's used in NBT).
 #[inline]
 pub fn slice_into_u8_native_endian<T>(s: &[T]) -> &[u8] {
-    unsafe { slice::from_raw_parts(s.as_ptr() as *const u8, std::mem::size_of_val(s)) }
+    unsafe { slice::from_raw_parts(s.as_ptr() as *const u8, mem::size_of_val(s)) }
 }

 /// Convert a slice of any type into a Vec<u8>. This will return the data as big endian (the
@ -180,3 +180,28 @@ pub fn slice_into_u8_native_endian<T>(s: &[T]) -> &[u8] {
 pub fn slice_into_u8_big_endian<T: SwappableNumber>(s: &[T]) -> Vec<u8> {
    swap_endianness_as_u8::<T>(slice_into_u8_native_endian(s))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // this test specifically checks with little-endian
+    #[cfg(target_endian = "little")]
+    #[test]
+    fn test_slice_into_u8_native_endian() {
+        assert_eq!(slice_into_u8_native_endian(&[1u16, 2u16]), [1, 0, 2, 0]);
+        assert_eq!(
+            slice_into_u8_native_endian(&[1u32, 2u32]),
+            [1, 0, 0, 0, 2, 0, 0, 0]
+        );
+    }
+
+    #[test]
+    fn test_slice_into_u8_big_endian() {
+        assert_eq!(slice_into_u8_big_endian(&[1u16, 2u16]), [0, 1, 0, 2]);
+        assert_eq!(
+            slice_into_u8_big_endian(&[1u32, 2u32]),
+            [0, 0, 0, 1, 0, 0, 0, 2]
+        );
+    }
+}
--- a/simdnbt/src/mutf8.rs
+++ b/simdnbt/src/mutf8.rs
@ -29,8 +29,8 @@ fn is_plain_ascii(slice: &[u8]) -> bool {
        let mask = u8x16::splat(0b10000000);
        let zero = u8x16::splat(0);
        let simd = u8x16::from_array(*chunk);
-        let xor = simd & mask;
-        if xor != zero {
+        let and = simd & mask;
+        if and != zero {
            is_plain_ascii = false;
        }
    }
@ -40,8 +40,8 @@ fn is_plain_ascii(slice: &[u8]) -> bool {
        let mask = u8x8::splat(0b10000000);
        let zero = u8x8::splat(0);
        let simd = u8x8::from_array(*chunk);
-        let xor = simd & mask;
-        if xor != zero {
+        let and = simd & mask;
+        if and != zero {
            is_plain_ascii = false;
        }
    }
@ -51,8 +51,8 @@ fn is_plain_ascii(slice: &[u8]) -> bool {
        let mask = u8x4::splat(0b10000000);
        let zero = u8x4::splat(0);
        let simd = u8x4::from_array(*chunk);
-        let xor = simd & mask;
-        if xor != zero {
+        let and = simd & mask;
+        if and != zero {
            is_plain_ascii = false;
        }
    }
@ -66,8 +66,8 @@ fn is_plain_ascii(slice: &[u8]) -> bool {
        let mask = u8x32::splat(0b10000000);
        let zero = u8x32::splat(0);
        let simd = u8x32::from_array(chunk);
-        let xor = simd & mask;
-        if xor != zero {
+        let and = simd & mask;
+        if and != zero {
            is_plain_ascii = false;
        }
    }
--- a/simdnbt/src/owned/list.rs
+++ b/simdnbt/src/owned/list.rs
@ -147,7 +147,8 @@ impl NbtList {
                write_with_u32_length(data, 4, &slice_into_u8_big_endian(floats));
            }
            NbtList::Double(doubles) => {
-                write_with_u32_length(data, 8, &slice_into_u8_big_endian(doubles));
+                let bytes = slice_into_u8_big_endian(doubles);
+                write_with_u32_length(data, 8, &bytes);
            }
            NbtList::ByteArray(byte_arrays) => {
                write_u32(data, byte_arrays.len() as u32);
--- a/simdnbt/src/owned/mod.rs
+++ b/simdnbt/src/owned/mod.rs
@ -588,11 +588,9 @@ impl From<Nbt> for NbtTag {
 mod tests {
    use std::io::Read;

-    use byteorder::{WriteBytesExt, BE};
+    use byteorder::WriteBytesExt;
    use flate2::read::GzDecoder;

-    use crate::common::{INT_ID, LIST_ID, LONG_ID};
-
    use super::*;

    #[test]
--- a/simdnbt/src/raw_list.rs
+++ b/simdnbt/src/raw_list.rs
@ -1,4 +1,4 @@
-use std::marker::PhantomData;
+use std::{marker::PhantomData, mem};

 use crate::swap_endianness::{swap_endianness, swap_endianness_as_u8, SwappableNumber};

@ -18,7 +18,7 @@ impl<'a, T> RawList<'a, T> {
    }

    pub fn len(&self) -> usize {
-        self.data.len() / std::mem::size_of::<T>()
+        self.data.len() / mem::size_of::<T>()
    }

    pub fn is_empty(&self) -> bool {
--- a/simdnbt/src/swap_endianness.rs
+++ b/simdnbt/src/swap_endianness.rs
@ -1,4 +1,4 @@
-use std::simd::prelude::*;
+use std::{mem, simd::prelude::*};

 pub trait SwappableNumber {}
 impl SwappableNumber for u16 {}
@ -251,35 +251,57 @@ fn swap_endianness_64bit(bytes: &mut [u8], num: usize) {
    }
 }

-#[inline]
-pub fn swap_endianness_as_u8<T: SwappableNumber>(data: &[u8]) -> Vec<u8> {
-    let length = data.len() / std::mem::size_of::<T>();
-
-    let mut items = data.to_vec();
+/// Swap the endianness of the given array (unless we're on a big-endian system) in-place depending
+/// on the width of the given type.
+fn swap_endianness_from_type<T: SwappableNumber>(items: &mut [u8]) {
+    let item_width = mem::size_of::<T>();
+    let length = items.len() / item_width;

    if cfg!(target_endian = "little") {
-        match std::mem::size_of::<T>() {
-            2 => swap_endianness_16bit(&mut items, length),
-            4 => swap_endianness_32bit(&mut items, length),
-            8 => swap_endianness_64bit(&mut items, length),
+        match item_width {
+            2 => swap_endianness_16bit(items, length),
+            4 => swap_endianness_32bit(items, length),
+            8 => swap_endianness_64bit(items, length),
            _ => panic!("unsupported size of type"),
        }
    }
+}
+
+/// Swaps the endianness of the given data and return it as a `Vec<u8>`.
+#[inline]
+pub fn swap_endianness_as_u8<T: SwappableNumber>(data: &[u8]) -> Vec<u8> {
+    let mut items = data.to_vec();
+    swap_endianness_from_type::<T>(&mut items);

    items
 }

 #[inline]
 pub fn swap_endianness<T: SwappableNumber>(data: &[u8]) -> Vec<T> {
-    let length = data.len() / std::mem::size_of::<T>();
-    let items = swap_endianness_as_u8::<T>(data);
+    let width_of_t = mem::size_of::<T>();
+    let length_of_vec_t = data.len() / width_of_t;

-    {
-        let ptr = items.as_ptr() as *const T;
-        std::mem::forget(items);
+    // the data must be a multiple of the item width, otherwise it's UB
+    assert_eq!(data.len() % width_of_t, 0);
+
+    // have the vec be of T initially so it's aligned
+    let mut vec_t = Vec::<T>::with_capacity(length_of_vec_t);
+    let mut vec_u8: Vec<u8> = {
+        let ptr = vec_t.as_mut_ptr() as *mut u8;
+        mem::forget(vec_t);
+        // SAFETY: the new capacity is correct since we checked that data.len() is a multiple of width_of_t
+        unsafe { Vec::from_raw_parts(ptr, 0, data.len()) }
+    };
+    vec_u8.extend_from_slice(data);
+
+    swap_endianness_from_type::<T>(&mut vec_u8);
+
+    // now convert our Vec<u8> back to Vec<T>
+
+    let ptr = vec_u8.as_mut_ptr() as *mut T;
+    mem::forget(vec_u8);
    // SAFETY: The length won't be greater than the length of the original data
-        unsafe { Vec::from_raw_parts(ptr as *mut T, length, length) }
-    }
+    unsafe { Vec::from_raw_parts(ptr, length_of_vec_t, length_of_vec_t) }
 }

 #[cfg(test)]
@ -307,4 +329,15 @@ mod tests {
            [8, 7, 6, 5, 4, 3, 2, 1]
        );
    }
+
+    #[test]
+    fn test_swap_endianness_u64_vec() {
+        assert_eq!(
+            swap_endianness::<u64>(&[1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1]),
+            vec![
+                u64::from_le_bytes([8, 7, 6, 5, 4, 3, 2, 1]),
+                u64::from_le_bytes([1, 2, 3, 4, 5, 6, 7, 8])
+            ]
+        );
+    }
 }