1
0
Fork 0
mirror of https://github.com/azalea-rs/simdnbt.git synced 2025-08-02 15:36:03 +00:00

fix UB in swap_endianness

fixes #3
This commit is contained in:
mat 2024-03-09 18:04:21 -06:00
parent 4594562ef0
commit 73ab006f7c
7 changed files with 115 additions and 60 deletions

View file

@ -273,11 +273,9 @@ impl<'a> NbtTag<'a> {
mod tests { mod tests {
use std::io::Read; use std::io::Read;
use byteorder::{WriteBytesExt, BE}; use byteorder::WriteBytesExt;
use flate2::read::GzDecoder; use flate2::read::GzDecoder;
use crate::common::{INT_ID, LIST_ID, LONG_ID};
use super::*; use super::*;
#[test] #[test]

View file

@ -1,4 +1,4 @@
use std::{io::Cursor, slice}; use std::{io::Cursor, mem, slice};
use crate::{ use crate::{
raw_list::RawList, raw_list::RawList,
@ -171,7 +171,7 @@ pub unsafe fn unchecked_push(data: &mut Vec<u8>, value: u8) {
/// endian! Use [`slice_into_u8_big_endian`] to get big endian (the endianness that's used in NBT). /// endian! Use [`slice_into_u8_big_endian`] to get big endian (the endianness that's used in NBT).
#[inline] #[inline]
pub fn slice_into_u8_native_endian<T>(s: &[T]) -> &[u8] { pub fn slice_into_u8_native_endian<T>(s: &[T]) -> &[u8] {
unsafe { slice::from_raw_parts(s.as_ptr() as *const u8, std::mem::size_of_val(s)) } unsafe { slice::from_raw_parts(s.as_ptr() as *const u8, mem::size_of_val(s)) }
} }
/// Convert a slice of any type into a Vec<u8>. This will return the data as big endian (the /// Convert a slice of any type into a Vec<u8>. This will return the data as big endian (the
@ -180,3 +180,28 @@ pub fn slice_into_u8_native_endian<T>(s: &[T]) -> &[u8] {
pub fn slice_into_u8_big_endian<T: SwappableNumber>(s: &[T]) -> Vec<u8> { pub fn slice_into_u8_big_endian<T: SwappableNumber>(s: &[T]) -> Vec<u8> {
swap_endianness_as_u8::<T>(slice_into_u8_native_endian(s)) swap_endianness_as_u8::<T>(slice_into_u8_native_endian(s))
} }
#[cfg(test)]
mod tests {
use super::*;
// this test specifically checks with little-endian
#[cfg(target_endian = "little")]
#[test]
fn test_slice_into_u8_native_endian() {
assert_eq!(slice_into_u8_native_endian(&[1u16, 2u16]), [1, 0, 2, 0]);
assert_eq!(
slice_into_u8_native_endian(&[1u32, 2u32]),
[1, 0, 0, 0, 2, 0, 0, 0]
);
}
#[test]
fn test_slice_into_u8_big_endian() {
assert_eq!(slice_into_u8_big_endian(&[1u16, 2u16]), [0, 1, 0, 2]);
assert_eq!(
slice_into_u8_big_endian(&[1u32, 2u32]),
[0, 0, 0, 1, 0, 0, 0, 2]
);
}
}

View file

@ -29,8 +29,8 @@ fn is_plain_ascii(slice: &[u8]) -> bool {
let mask = u8x16::splat(0b10000000); let mask = u8x16::splat(0b10000000);
let zero = u8x16::splat(0); let zero = u8x16::splat(0);
let simd = u8x16::from_array(*chunk); let simd = u8x16::from_array(*chunk);
let xor = simd & mask; let and = simd & mask;
if xor != zero { if and != zero {
is_plain_ascii = false; is_plain_ascii = false;
} }
} }
@ -40,8 +40,8 @@ fn is_plain_ascii(slice: &[u8]) -> bool {
let mask = u8x8::splat(0b10000000); let mask = u8x8::splat(0b10000000);
let zero = u8x8::splat(0); let zero = u8x8::splat(0);
let simd = u8x8::from_array(*chunk); let simd = u8x8::from_array(*chunk);
let xor = simd & mask; let and = simd & mask;
if xor != zero { if and != zero {
is_plain_ascii = false; is_plain_ascii = false;
} }
} }
@ -51,8 +51,8 @@ fn is_plain_ascii(slice: &[u8]) -> bool {
let mask = u8x4::splat(0b10000000); let mask = u8x4::splat(0b10000000);
let zero = u8x4::splat(0); let zero = u8x4::splat(0);
let simd = u8x4::from_array(*chunk); let simd = u8x4::from_array(*chunk);
let xor = simd & mask; let and = simd & mask;
if xor != zero { if and != zero {
is_plain_ascii = false; is_plain_ascii = false;
} }
} }
@ -66,8 +66,8 @@ fn is_plain_ascii(slice: &[u8]) -> bool {
let mask = u8x32::splat(0b10000000); let mask = u8x32::splat(0b10000000);
let zero = u8x32::splat(0); let zero = u8x32::splat(0);
let simd = u8x32::from_array(chunk); let simd = u8x32::from_array(chunk);
let xor = simd & mask; let and = simd & mask;
if xor != zero { if and != zero {
is_plain_ascii = false; is_plain_ascii = false;
} }
} }

View file

@ -147,7 +147,8 @@ impl NbtList {
write_with_u32_length(data, 4, &slice_into_u8_big_endian(floats)); write_with_u32_length(data, 4, &slice_into_u8_big_endian(floats));
} }
NbtList::Double(doubles) => { NbtList::Double(doubles) => {
write_with_u32_length(data, 8, &slice_into_u8_big_endian(doubles)); let bytes = slice_into_u8_big_endian(doubles);
write_with_u32_length(data, 8, &bytes);
} }
NbtList::ByteArray(byte_arrays) => { NbtList::ByteArray(byte_arrays) => {
write_u32(data, byte_arrays.len() as u32); write_u32(data, byte_arrays.len() as u32);

View file

@ -588,11 +588,9 @@ impl From<Nbt> for NbtTag {
mod tests { mod tests {
use std::io::Read; use std::io::Read;
use byteorder::{WriteBytesExt, BE}; use byteorder::WriteBytesExt;
use flate2::read::GzDecoder; use flate2::read::GzDecoder;
use crate::common::{INT_ID, LIST_ID, LONG_ID};
use super::*; use super::*;
#[test] #[test]

View file

@ -1,4 +1,4 @@
use std::marker::PhantomData; use std::{marker::PhantomData, mem};
use crate::swap_endianness::{swap_endianness, swap_endianness_as_u8, SwappableNumber}; use crate::swap_endianness::{swap_endianness, swap_endianness_as_u8, SwappableNumber};
@ -18,7 +18,7 @@ impl<'a, T> RawList<'a, T> {
} }
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.data.len() / std::mem::size_of::<T>() self.data.len() / mem::size_of::<T>()
} }
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {

View file

@ -1,4 +1,4 @@
use std::simd::prelude::*; use std::{mem, simd::prelude::*};
pub trait SwappableNumber {} pub trait SwappableNumber {}
impl SwappableNumber for u16 {} impl SwappableNumber for u16 {}
@ -205,16 +205,16 @@ fn swap_endianness_64bit(bytes: &mut [u8], num: usize) {
for i in 0..num / 8 { for i in 0..num / 8 {
let simd: u8x64 = Simd::from_slice(bytes[i * 64..i * 64 + 64].as_ref()); let simd: u8x64 = Simd::from_slice(bytes[i * 64..i * 64 + 64].as_ref());
#[rustfmt::skip] #[rustfmt::skip]
let simd = simd_swizzle!(simd, [ let simd = simd_swizzle!(simd, [
7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15, 14, 13, 12, 11, 10, 9, 8, 15, 14, 13, 12, 11, 10, 9, 8,
23, 22, 21, 20, 19, 18, 17, 16, 23, 22, 21, 20, 19, 18, 17, 16,
31, 30, 29, 28, 27, 26, 25, 24, 31, 30, 29, 28, 27, 26, 25, 24,
39, 38, 37, 36, 35, 34, 33, 32, 39, 38, 37, 36, 35, 34, 33, 32,
47, 46, 45, 44, 43, 42, 41, 40, 47, 46, 45, 44, 43, 42, 41, 40,
55, 54, 53, 52, 51, 50, 49, 48, 55, 54, 53, 52, 51, 50, 49, 48,
63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
]); ]);
bytes[i * 64..i * 64 + 64].copy_from_slice(simd.as_array()); bytes[i * 64..i * 64 + 64].copy_from_slice(simd.as_array());
} }
@ -222,64 +222,86 @@ fn swap_endianness_64bit(bytes: &mut [u8], num: usize) {
if i + 4 <= num { if i + 4 <= num {
let simd: u8x32 = Simd::from_slice(bytes[i * 8..i * 8 + 32].as_ref()); let simd: u8x32 = Simd::from_slice(bytes[i * 8..i * 8 + 32].as_ref());
#[rustfmt::skip] #[rustfmt::skip]
let simd = simd_swizzle!(simd, [ let simd = simd_swizzle!(simd, [
7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15, 14, 13, 12, 11, 10, 9, 8, 15, 14, 13, 12, 11, 10, 9, 8,
23, 22, 21, 20, 19, 18, 17, 16, 23, 22, 21, 20, 19, 18, 17, 16,
31, 30, 29, 28, 27, 26, 25, 24, 31, 30, 29, 28, 27, 26, 25, 24,
]); ]);
bytes[i * 8..i * 8 + 32].copy_from_slice(simd.as_array()); bytes[i * 8..i * 8 + 32].copy_from_slice(simd.as_array());
i += 4; i += 4;
} }
if i + 2 <= num { if i + 2 <= num {
let simd: u8x16 = Simd::from_slice(bytes[i * 8..i * 8 + 16].as_ref()); let simd: u8x16 = Simd::from_slice(bytes[i * 8..i * 8 + 16].as_ref());
#[rustfmt::skip] #[rustfmt::skip]
let simd = simd_swizzle!(simd, [ let simd = simd_swizzle!(simd, [
7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15, 14, 13, 12, 11, 10, 9, 8, 15, 14, 13, 12, 11, 10, 9, 8,
]); ]);
bytes[i * 8..i * 8 + 16].copy_from_slice(simd.as_array()); bytes[i * 8..i * 8 + 16].copy_from_slice(simd.as_array());
i += 2; i += 2;
} }
if i < num { if i < num {
let simd: u8x8 = Simd::from_slice(bytes[i * 8..i * 8 + 8].as_ref()); let simd: u8x8 = Simd::from_slice(bytes[i * 8..i * 8 + 8].as_ref());
#[rustfmt::skip] #[rustfmt::skip]
let simd = simd_swizzle!(simd, [ let simd = simd_swizzle!(simd, [
7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0,
]); ]);
bytes[i * 8..i * 8 + 8].copy_from_slice(simd.as_array()); bytes[i * 8..i * 8 + 8].copy_from_slice(simd.as_array());
} }
} }
#[inline] /// Swap the endianness of the given array (unless we're on a big-endian system) in-place depending
pub fn swap_endianness_as_u8<T: SwappableNumber>(data: &[u8]) -> Vec<u8> { /// on the width of the given type.
let length = data.len() / std::mem::size_of::<T>(); fn swap_endianness_from_type<T: SwappableNumber>(items: &mut [u8]) {
let item_width = mem::size_of::<T>();
let mut items = data.to_vec(); let length = items.len() / item_width;
if cfg!(target_endian = "little") { if cfg!(target_endian = "little") {
match std::mem::size_of::<T>() { match item_width {
2 => swap_endianness_16bit(&mut items, length), 2 => swap_endianness_16bit(items, length),
4 => swap_endianness_32bit(&mut items, length), 4 => swap_endianness_32bit(items, length),
8 => swap_endianness_64bit(&mut items, length), 8 => swap_endianness_64bit(items, length),
_ => panic!("unsupported size of type"), _ => panic!("unsupported size of type"),
} }
} }
}
/// Swaps the endianness of the given data and return it as a `Vec<u8>`.
#[inline]
pub fn swap_endianness_as_u8<T: SwappableNumber>(data: &[u8]) -> Vec<u8> {
let mut items = data.to_vec();
swap_endianness_from_type::<T>(&mut items);
items items
} }
#[inline] #[inline]
pub fn swap_endianness<T: SwappableNumber>(data: &[u8]) -> Vec<T> { pub fn swap_endianness<T: SwappableNumber>(data: &[u8]) -> Vec<T> {
let length = data.len() / std::mem::size_of::<T>(); let width_of_t = mem::size_of::<T>();
let items = swap_endianness_as_u8::<T>(data); let length_of_vec_t = data.len() / width_of_t;
{ // the data must be a multiple of the item width, otherwise it's UB
let ptr = items.as_ptr() as *const T; assert_eq!(data.len() % width_of_t, 0);
std::mem::forget(items);
// SAFETY: The length won't be greater than the length of the original data // have the vec be of T initially so it's aligned
unsafe { Vec::from_raw_parts(ptr as *mut T, length, length) } let mut vec_t = Vec::<T>::with_capacity(length_of_vec_t);
} let mut vec_u8: Vec<u8> = {
let ptr = vec_t.as_mut_ptr() as *mut u8;
mem::forget(vec_t);
// SAFETY: the new capacity is correct since we checked that data.len() is a multiple of width_of_t
unsafe { Vec::from_raw_parts(ptr, 0, data.len()) }
};
vec_u8.extend_from_slice(data);
swap_endianness_from_type::<T>(&mut vec_u8);
// now convert our Vec<u8> back to Vec<T>
let ptr = vec_u8.as_mut_ptr() as *mut T;
mem::forget(vec_u8);
// SAFETY: The length won't be greater than the length of the original data
unsafe { Vec::from_raw_parts(ptr, length_of_vec_t, length_of_vec_t) }
} }
#[cfg(test)] #[cfg(test)]
@ -307,4 +329,15 @@ mod tests {
[8, 7, 6, 5, 4, 3, 2, 1] [8, 7, 6, 5, 4, 3, 2, 1]
); );
} }
#[test]
fn test_swap_endianness_u64_vec() {
assert_eq!(
swap_endianness::<u64>(&[1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1]),
vec![
u64::from_le_bytes([8, 7, 6, 5, 4, 3, 2, 1]),
u64::from_le_bytes([1, 2, 3, 4, 5, 6, 7, 8])
]
);
}
} }