1
0
Fork 0
mirror of https://github.com/azalea-rs/simdnbt.git synced 2025-08-02 07:26:04 +00:00

fix UB in swap_endianness

fixes #3
This commit is contained in:
mat 2024-03-09 18:04:21 -06:00
parent 4594562ef0
commit 73ab006f7c
7 changed files with 115 additions and 60 deletions

View file

@ -273,11 +273,9 @@ impl<'a> NbtTag<'a> {
mod tests {
use std::io::Read;
use byteorder::{WriteBytesExt, BE};
use byteorder::WriteBytesExt;
use flate2::read::GzDecoder;
use crate::common::{INT_ID, LIST_ID, LONG_ID};
use super::*;
#[test]

View file

@ -1,4 +1,4 @@
use std::{io::Cursor, slice};
use std::{io::Cursor, mem, slice};
use crate::{
raw_list::RawList,
@ -171,7 +171,7 @@ pub unsafe fn unchecked_push(data: &mut Vec<u8>, value: u8) {
/// endian! Use [`slice_into_u8_big_endian`] to get big endian (the endianness that's used in NBT).
#[inline]
pub fn slice_into_u8_native_endian<T>(s: &[T]) -> &[u8] {
unsafe { slice::from_raw_parts(s.as_ptr() as *const u8, std::mem::size_of_val(s)) }
unsafe { slice::from_raw_parts(s.as_ptr() as *const u8, mem::size_of_val(s)) }
}
/// Convert a slice of any type into a Vec<u8>. This will return the data as big endian (the
@ -180,3 +180,28 @@ pub fn slice_into_u8_native_endian<T>(s: &[T]) -> &[u8] {
pub fn slice_into_u8_big_endian<T: SwappableNumber>(s: &[T]) -> Vec<u8> {
swap_endianness_as_u8::<T>(slice_into_u8_native_endian(s))
}
#[cfg(test)]
mod tests {
use super::*;
// this test specifically checks with little-endian
#[cfg(target_endian = "little")]
#[test]
fn test_slice_into_u8_native_endian() {
assert_eq!(slice_into_u8_native_endian(&[1u16, 2u16]), [1, 0, 2, 0]);
assert_eq!(
slice_into_u8_native_endian(&[1u32, 2u32]),
[1, 0, 0, 0, 2, 0, 0, 0]
);
}
#[test]
fn test_slice_into_u8_big_endian() {
assert_eq!(slice_into_u8_big_endian(&[1u16, 2u16]), [0, 1, 0, 2]);
assert_eq!(
slice_into_u8_big_endian(&[1u32, 2u32]),
[0, 0, 0, 1, 0, 0, 0, 2]
);
}
}

View file

@ -29,8 +29,8 @@ fn is_plain_ascii(slice: &[u8]) -> bool {
let mask = u8x16::splat(0b10000000);
let zero = u8x16::splat(0);
let simd = u8x16::from_array(*chunk);
let xor = simd & mask;
if xor != zero {
let and = simd & mask;
if and != zero {
is_plain_ascii = false;
}
}
@ -40,8 +40,8 @@ fn is_plain_ascii(slice: &[u8]) -> bool {
let mask = u8x8::splat(0b10000000);
let zero = u8x8::splat(0);
let simd = u8x8::from_array(*chunk);
let xor = simd & mask;
if xor != zero {
let and = simd & mask;
if and != zero {
is_plain_ascii = false;
}
}
@ -51,8 +51,8 @@ fn is_plain_ascii(slice: &[u8]) -> bool {
let mask = u8x4::splat(0b10000000);
let zero = u8x4::splat(0);
let simd = u8x4::from_array(*chunk);
let xor = simd & mask;
if xor != zero {
let and = simd & mask;
if and != zero {
is_plain_ascii = false;
}
}
@ -66,8 +66,8 @@ fn is_plain_ascii(slice: &[u8]) -> bool {
let mask = u8x32::splat(0b10000000);
let zero = u8x32::splat(0);
let simd = u8x32::from_array(chunk);
let xor = simd & mask;
if xor != zero {
let and = simd & mask;
if and != zero {
is_plain_ascii = false;
}
}

View file

@ -147,7 +147,8 @@ impl NbtList {
write_with_u32_length(data, 4, &slice_into_u8_big_endian(floats));
}
NbtList::Double(doubles) => {
write_with_u32_length(data, 8, &slice_into_u8_big_endian(doubles));
let bytes = slice_into_u8_big_endian(doubles);
write_with_u32_length(data, 8, &bytes);
}
NbtList::ByteArray(byte_arrays) => {
write_u32(data, byte_arrays.len() as u32);

View file

@ -588,11 +588,9 @@ impl From<Nbt> for NbtTag {
mod tests {
use std::io::Read;
use byteorder::{WriteBytesExt, BE};
use byteorder::WriteBytesExt;
use flate2::read::GzDecoder;
use crate::common::{INT_ID, LIST_ID, LONG_ID};
use super::*;
#[test]

View file

@ -1,4 +1,4 @@
use std::marker::PhantomData;
use std::{marker::PhantomData, mem};
use crate::swap_endianness::{swap_endianness, swap_endianness_as_u8, SwappableNumber};
@ -18,7 +18,7 @@ impl<'a, T> RawList<'a, T> {
}
pub fn len(&self) -> usize {
self.data.len() / std::mem::size_of::<T>()
self.data.len() / mem::size_of::<T>()
}
pub fn is_empty(&self) -> bool {

View file

@ -1,4 +1,4 @@
use std::simd::prelude::*;
use std::{mem, simd::prelude::*};
pub trait SwappableNumber {}
impl SwappableNumber for u16 {}
@ -251,35 +251,57 @@ fn swap_endianness_64bit(bytes: &mut [u8], num: usize) {
}
}
#[inline]
pub fn swap_endianness_as_u8<T: SwappableNumber>(data: &[u8]) -> Vec<u8> {
let length = data.len() / std::mem::size_of::<T>();
let mut items = data.to_vec();
/// Swap the endianness of the given array (unless we're on a big-endian system) in-place depending
/// on the width of the given type.
fn swap_endianness_from_type<T: SwappableNumber>(items: &mut [u8]) {
let item_width = mem::size_of::<T>();
let length = items.len() / item_width;
if cfg!(target_endian = "little") {
match std::mem::size_of::<T>() {
2 => swap_endianness_16bit(&mut items, length),
4 => swap_endianness_32bit(&mut items, length),
8 => swap_endianness_64bit(&mut items, length),
match item_width {
2 => swap_endianness_16bit(items, length),
4 => swap_endianness_32bit(items, length),
8 => swap_endianness_64bit(items, length),
_ => panic!("unsupported size of type"),
}
}
}
/// Swaps the endianness of the given data and return it as a `Vec<u8>`.
#[inline]
pub fn swap_endianness_as_u8<T: SwappableNumber>(data: &[u8]) -> Vec<u8> {
let mut items = data.to_vec();
swap_endianness_from_type::<T>(&mut items);
items
}
#[inline]
pub fn swap_endianness<T: SwappableNumber>(data: &[u8]) -> Vec<T> {
let length = data.len() / std::mem::size_of::<T>();
let items = swap_endianness_as_u8::<T>(data);
let width_of_t = mem::size_of::<T>();
let length_of_vec_t = data.len() / width_of_t;
{
let ptr = items.as_ptr() as *const T;
std::mem::forget(items);
// the data must be a multiple of the item width, otherwise it's UB
assert_eq!(data.len() % width_of_t, 0);
// have the vec be of T initially so it's aligned
let mut vec_t = Vec::<T>::with_capacity(length_of_vec_t);
let mut vec_u8: Vec<u8> = {
let ptr = vec_t.as_mut_ptr() as *mut u8;
mem::forget(vec_t);
// SAFETY: the new capacity is correct since we checked that data.len() is a multiple of width_of_t
unsafe { Vec::from_raw_parts(ptr, 0, data.len()) }
};
vec_u8.extend_from_slice(data);
swap_endianness_from_type::<T>(&mut vec_u8);
// now convert our Vec<u8> back to Vec<T>
let ptr = vec_u8.as_mut_ptr() as *mut T;
mem::forget(vec_u8);
// SAFETY: The length won't be greater than the length of the original data
unsafe { Vec::from_raw_parts(ptr as *mut T, length, length) }
}
unsafe { Vec::from_raw_parts(ptr, length_of_vec_t, length_of_vec_t) }
}
#[cfg(test)]
@ -307,4 +329,15 @@ mod tests {
[8, 7, 6, 5, 4, 3, 2, 1]
);
}
#[test]
fn test_swap_endianness_u64_vec() {
assert_eq!(
swap_endianness::<u64>(&[1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1]),
vec![
u64::from_le_bytes([8, 7, 6, 5, 4, 3, 2, 1]),
u64::from_le_bytes([1, 2, 3, 4, 5, 6, 7, 8])
]
);
}
}